Fix crash for ShaderCompilerService (#8626)

This change fixes a crash that occurs in ShaderCompilerService under a certain condition described below. Functions are called in the order described below. 1. `ShaderCompilerService::initialize(...)` is called when a new gl program is created. There are cases whre the corresponding TickOp may be still alive at the end of this method. 2. `OpenGLProgram::~OpenGLProgram()` is called when the program is immediately destroyed without being used. This deletes gl.program. 3. `ShaderCompilerService::tick()` is called later, and it references the dangling gl object `gl.program` in the lambda function, and it crashes. This change also includes refactoring the class `ShaderCompilerService` to make code simpler and less error-prone. TEST = Tested on Desktop, Android, and Web by running sample apps BUGS = [394319326, 407090622]
2025-04-22 17:31:40 +00:00
parent ca3ff7e08e
commit c3542b135e
3 changed files with 393 additions and 403 deletions
--- a/filament/backend/src/opengl/OpenGLProgram.cpp
+++ b/filament/backend/src/opengl/OpenGLProgram.cpp
@@ -85,6 +85,7 @@ OpenGLProgram::~OpenGLProgram() noexcept {
        delete lazyInitializationData;

        ShaderCompilerService::terminate(mToken);
+        assert_invariant(!mToken);
    }

    delete [] mUniformsRecords;
--- a/filament/backend/src/opengl/ShaderCompilerService.cpp
+++ b/filament/backend/src/opengl/ShaderCompilerService.cpp
@@ -17,11 +17,15 @@
 #include "ShaderCompilerService.h"

 #include "BlobCacheKey.h"
+#include "CallbackManager.h"
+#include "CompilerThreadPool.h"
 #include "OpenGLBlobCache.h"
 #include "OpenGLDriver.h"

+#include <iterator>
 #include <private/backend/BackendUtils.h>

+#include <backend/DriverEnums.h>
 #include <backend/Program.h>

 #include <utils/compiler.h>
@@ -34,9 +38,9 @@
 #include <utils/Panic.h>
 #include <utils/Systrace.h>

+#include <algorithm>
 #include <array>
 #include <cctype>
-#include <chrono>
 #include <mutex>
 #include <memory>
 #include <string>
@@ -47,6 +51,7 @@

 #include <stddef.h>
 #include <stdint.h>
+#include <string.h>

 namespace filament::backend {

@@ -54,17 +59,17 @@ using namespace utils;

 // ------------------------------------------------------------------------------------------------

-static inline std::string to_string(bool b) noexcept { return b ? "true" : "false"; }
-static inline std::string to_string(int i) noexcept { return std::to_string(i); }
-static inline std::string to_string(float f) noexcept { return "float(" + std::to_string(f) + ")"; }
+static std::string to_string(bool const b) { return b ? "true" : "false"; }
+static std::string to_string(int const i) { return std::to_string(i); }
+static std::string to_string(float const f) { return "float(" + std::to_string(f) + ")"; }

 static void logCompilationError(io::ostream& out, ShaderStage shaderType, const char* name,
        GLuint shaderId, CString const& sourceCode) noexcept;
 static void logProgramLinkError(io::ostream& out, char const* name, GLuint program) noexcept;

-static void process_GOOGLE_cpp_style_line_directive(OpenGLContext& context, char* source,
+static void process_GOOGLE_cpp_style_line_directive(OpenGLContext const& context, char* source,
        size_t len) noexcept;
-static void process_OVR_multiview2(OpenGLContext& context, int32_t eyeCount, char* source,
+static void process_OVR_multiview2(OpenGLContext const& context, int32_t eyeCount, char* source,
        size_t len) noexcept;
 static std::string_view process_ARB_shading_language_packing(OpenGLContext& context) noexcept;
 static std::array<std::string_view, 3> splitShaderSource(std::string_view source) noexcept;
@@ -72,20 +77,15 @@ static std::array<std::string_view, 3> splitShaderSource(std::string_view source
 // ------------------------------------------------------------------------------------------------

 struct ShaderCompilerService::OpenGLProgramToken : ProgramToken {
-    struct ProgramData {
-        GLuint program{};
-        shaders_t shaders{};
-    };
-
    ~OpenGLProgramToken() override;

-    OpenGLProgramToken(ShaderCompilerService& compiler, utils::CString const& name) noexcept
-            : compiler(compiler), name(name) {
+    OpenGLProgramToken(ShaderCompilerService& compiler, CString const& name) noexcept
+            : compiler(compiler), name(name), handle(compiler.issueCallbackHandle()) {
    }

    ShaderCompilerService& compiler;
-    utils::CString const& name;
-    utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>> attributes;
+    CString const& name;
+    FixedCapacityVector<std::pair<CString, uint8_t>> attributes;
    shaders_source_t shaderSourceCode;
    void* user = nullptr;
    struct {
@@ -93,48 +93,34 @@ struct ShaderCompilerService::OpenGLProgramToken : ProgramToken {
        GLuint program = 0;
    } gl; // 12 bytes

-
-    // Sets the programData, typically from the compiler thread, and signal the main thread.
-    // This is similar to std::promise::set_value.
-    void set(ProgramData const& data) noexcept {
+    // Used in THREAD_POOL mode. The job from ThreadPool should call this when the token is ready to
+    // be used. It sends a signal to the engine thread being blocked upon the `wait` call, so that
+    // the engine thread resumes its processing with the token.
+    void signal() noexcept {
        std::unique_lock const l(lock);
-        programData = data;
        signaled = true;
        cond.notify_one();
    }

-    // Get the programBinary, wait if necessary.
-    // This is similar to std::future::get
-    ProgramData const& get() const noexcept {
-        std::unique_lock l(lock);
-        cond.wait(l, [this](){ return signaled; });
-        return programData;
-    }
-
+    // Used in THREAD_POOL mode. The engine thread should call this before accessing token's fields.
+    // This may block until the token is ready to be used.
    void wait() const noexcept {
        std::unique_lock l(lock);
-        cond.wait(l, [this](){ return signaled; });
-    }
-
-    // Checks if the programBinary is ready.
-    // This is similar to std::future::wait_for(0s)
-    bool isReady() const noexcept {
-        std::unique_lock l(lock);
-        using namespace std::chrono_literals;
-        return cond.wait_for(l, 0s, [this](){ return signaled; });
+        cond.wait(l, [this] { return signaled; });
    }

    CallbackManager::Handle handle{};
    BlobCacheKey key;
-    mutable utils::Mutex lock;
-    mutable utils::Condition cond;
-    ProgramData programData;
-    bool signaled = false;

-    bool canceled = false; // not part of the signaling
+    // Used for the `THREAD_POOL` mode.
+    mutable Mutex lock;
+    mutable Condition cond;
+    bool signaled = false;
 };

-ShaderCompilerService::OpenGLProgramToken::~OpenGLProgramToken() = default;
+ShaderCompilerService::OpenGLProgramToken::~OpenGLProgramToken() {
+    compiler.submitCallbackHandle(handle);
+}

 /* static */ void ShaderCompilerService::setUserData(const program_token_t& token,
        void* user) noexcept {
@@ -189,9 +175,9 @@ void ShaderCompilerService::init() noexcept {
    if (mMode == Mode::THREAD_POOL) {
        // - on Adreno there is a single compiler object. We can't use a pool > 1
        //   also glProgramBinary blocks if other threads are compiling.
-        // - on Mali shader compilation can be multi-threaded, but program linking happens on
+        // - on Mali shader compilation can be multithreaded, but program linking happens on
        //   a single service thread, so we don't bother using more than one thread either.
-        // - on PowerVR shader compilation and linking can be multi-threaded.
+        // - on PowerVR shader compilation and linking can be multithreaded.
        //   How many threads should we use?
        // - on macOS (M1 MacBook Pro/Ventura) there is global lock around all GL APIs when using
        //   a shared context, so parallel shader compilation yields no benefit.
@@ -220,7 +206,7 @@ void ShaderCompilerService::init() noexcept {

        mShaderCompilerThreadCount = poolSize;
        mCompilerThreadPool.init(mShaderCompilerThreadCount,
-                [&platform = mDriver.mPlatform, priority]() {
+                [&platform = mDriver.mPlatform, priority] {
                    // give the thread a name
                    JobSystem::setThreadName("CompilerThreadPool");
                    // run at a slightly lower priority than other filament threads
@@ -228,7 +214,7 @@ void ShaderCompilerService::init() noexcept {
                    // create a gl context current to this thread
                    platform.createContext(true);
                },
-                [&platform = mDriver.mPlatform]() {
+                [&platform = mDriver.mPlatform] {
                    // release context and thread state
                    platform.releaseContext();
                });
@@ -249,119 +235,65 @@ void ShaderCompilerService::terminate() noexcept {
 }

 ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
-        utils::CString const& name, Program&& program) {
+        CString const& name, Program&& program) {
    auto& gl = mDriver.getContext();

+    // Create a token. A callback condition (handle) is internally created upon token creation.
    auto token = std::make_shared<OpenGLProgramToken>(*this, name);
    if (UTILS_UNLIKELY(gl.isES2())) {
        token->attributes = std::move(program.getAttributes());
    }

+    // Try retrieving the cached program blob if available.
    token->gl.program = mBlobCache.retrieve(&token->key, mDriver.mPlatform, program);
    if (token->gl.program) {
        return token;
    }

-    token->handle = mCallbackManager.get();
-
+    // Initiate program compilation.
    CompilerPriorityQueue const priorityQueue = program.getPriorityQueue();
    switch (mMode) {
        case Mode::THREAD_POOL: {
-            // queue a compile job
            mCompilerThreadPool.queue(priorityQueue, token,
                    [this, &gl, program = std::move(program), token]() mutable {
-                        // compile the shaders
-                        shaders_t shaders{};
-                        compileShaders(gl,
-                                std::move(program.getShadersSource()),
-                                program.getSpecializationConstants(),
-                                program.isMultiview(),
-                                shaders,
-                                token->shaderSourceCode);
-
-                        // link the program
-                        GLuint const glProgram = linkProgram(gl, shaders, token->attributes);
-
-                        OpenGLProgramToken::ProgramData programData;
-                        programData.shaders = shaders;
-
-                        // We need to query the link status here to guarantee that the
-                        // program is compiled and linked now (we don't want this to be
-                        // deferred to later). We don't care about the result at this point.
-                        GLint status = GL_FALSE;
-                        glGetProgramiv(glProgram, GL_LINK_STATUS, &status);
-                        programData.program = glProgram;
-
-                        // we don't need to check for success here, it'll be done on the
-                        // main thread side.
-                        token->set(programData);
-
-                        mCallbackManager.put(token->handle);
-
-                        // caching must be the last thing we do
-                        if (token->key && status == GL_TRUE) {
-                            // Attempt to cache. This calls glGetProgramBinary.
-                            mBlobCache.insert(mDriver.mPlatform, token->key, glProgram);
-                        }
+                        compileShaders(gl, std::move(program.getShadersSource()),
+                                program.getSpecializationConstants(), program.isMultiview(), token);
+                        linkProgram(gl, token);
+                        // Now `token->gl.program` must be populated, so we signal the completion
+                        // of the linking. We don't need to check the result of the program here
+                        // because it'll be done in the engine thread.
+                        token->signal();
+                        // We try caching the program blob after sending the signal. This allows us
+                        // to unblock the engine thread as soon as the token is ready while
+                        // performing an expensive caching operation still in the pool.
+                        tryCachingProgram(mBlobCache, mDriver.mPlatform, token);
                    });
            break;
        }

        case Mode::SYNCHRONOUS:
        case Mode::ASYNCHRONOUS: {
-            // this cannot fail because we check compilation status after linking the program
-            // shaders[] is filled with id of shader stages present.
-            compileShaders(gl,
-                    std::move(program.getShadersSource()),
-                    program.getSpecializationConstants(),
-                    program.isMultiview(),
-                    token->gl.shaders,
-                    token->shaderSourceCode);
+            compileShaders(gl, std::move(program.getShadersSource()),
+                    program.getSpecializationConstants(), program.isMultiview(), token);

            runAtNextTick(priorityQueue, token, [this, token](Job const&) {
                assert_invariant(mMode != Mode::THREAD_POOL);
                if (mMode == Mode::ASYNCHRONOUS) {
-                    // don't attempt to link this program if all shaders are not done compiling
-                    GLint status;
+                    // Check link completion if link was initiated.
                    if (token->gl.program) {
-                        glGetProgramiv(token->gl.program, GL_COMPLETION_STATUS, &status);
-                        if (status == GL_FALSE) {
-                            return false;
-                        }
-                    } else {
-                        for (auto shader: token->gl.shaders) {
-                            if (shader) {
-                                glGetShaderiv(shader, GL_COMPLETION_STATUS, &status);
-                                if (status == GL_FALSE) {
-                                    return false;
-                                }
-                            }
-                        }
+                        return isLinkCompleted(token);
                    }
-                }
-
-                if (!token->gl.program) {
-                    // link the program, this also cannot fail because status is checked later.
-                    token->gl.program = linkProgram(mDriver.getContext(),
-                            token->gl.shaders, token->attributes);
-                    if (mMode == Mode::ASYNCHRONOUS) {
-                        // wait until the link finishes...
+                    // Link hasn't been initiated, then check compile completion.
+                    if (!isCompileCompleted(token)) {
                        return false;
                    }
                }
-
-                assert_invariant(token->gl.program);
-
-                mCallbackManager.put(token->handle);
-
-                if (token->key) {
-                    // TODO: technically we don't have to cache right now. Is it advantageous to
-                    //       do this later, maybe depending on CPU usage?
-                    // attempt to cache if we don't have a thread pool (otherwise it's done
-                    // by the pool).
-                    mBlobCache.insert(mDriver.mPlatform, token->key, token->gl.program);
+                if (!token->gl.program) {
+                    linkProgram(mDriver.getContext(), token);
+                    if (mMode == Mode::ASYNCHRONOUS) {
+                        return false;// Wait until the link finishes.
+                    }
                }
-
                return true;
            });
            break;
@@ -375,7 +307,7 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
    return token;
 }

-GLuint ShaderCompilerService::getProgram(ShaderCompilerService::program_token_t& token) {
+GLuint ShaderCompilerService::getProgram(program_token_t& token) {
    GLuint const program = initialize(token);
    assert_invariant(token == nullptr);
 #if !FILAMENT_ENABLE_MATDBG
@@ -384,43 +316,29 @@ GLuint ShaderCompilerService::getProgram(ShaderCompilerService::program_token_t&
    return program;
 }

+/*
+ * Cancel program compilation. This function is responsible for cleaning up the ongoing
+ * compilation & link process. If the process is already completed by calling `initialize(token)`,
+ * this function is not called.
+ */
 /* static */ void ShaderCompilerService::terminate(program_token_t& token) {
-    assert_invariant(token);

-    token->canceled = true;
-
-    bool const isTickOpCanceled = token->compiler.cancelTickOp(token);
+    assert_invariant(token);// This function should be called when the token is still alive.

    if (token->compiler.mMode == Mode::THREAD_POOL) {
-        auto job = token->compiler.mCompilerThreadPool.dequeue(token);
+        auto const job = token->compiler.mCompilerThreadPool.dequeue(token);
        if (!job) {
-            // The job is being executed right now. We need to wait for it to finish to avoid a
-            // race.
+            // It's likely that the job was already completed. But it may be still being
+            // executed at this moment. Just try waiting for it to avoid a race.
            token->wait();
-        } else {
-            // The job has not been executed, but we still need to inform the callback manager in
-            // order for future callbacks to be successfully called.
-            token->compiler.mCallbackManager.put(token->handle);
-        }
-    } else if (isTickOpCanceled) {
-        // Since the tick op was canceled, we need to .put the token here.
-        token->compiler.mCallbackManager.put(token->handle);
-    }
-
-    for (GLuint& shader: token->gl.shaders) {
-        if (shader) {
-            if (token->gl.program) {
-                glDetachShader(token->gl.program, shader);
-            }
-            glDeleteShader(shader);
-            shader = 0;
        }
    }
-    if (token->gl.program) {
-        glDeleteProgram(token->gl.program);
-    }

-    token.reset();
+    cleanupProgramAndShaders(token);
+
+    // Cleanup the token.
+    token->compiler.cancelTickOp(token);
+    token = nullptr;// This will submit a callback condition (handle) to the callback manager.
 }

 void ShaderCompilerService::tick() {
@@ -430,8 +348,16 @@ void ShaderCompilerService::tick() {
    }
 }

+CallbackManager::Handle ShaderCompilerService::issueCallbackHandle() const noexcept {
+    return mCallbackManager.get();
+}
+
+void ShaderCompilerService::submitCallbackHandle(CallbackManager::Handle handle) noexcept {
+    mCallbackManager.put(handle);
+}
+
 void ShaderCompilerService::notifyWhenAllProgramsAreReady(
-        CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
+        CallbackHandler* handler, CallbackHandler::Callback const callback, void* user) {
    if (callback) {
        mCallbackManager.setCallback(handler, callback, user);
    }
@@ -439,117 +365,137 @@ void ShaderCompilerService::notifyWhenAllProgramsAreReady(

 // ------------------------------------------------------------------------------------------------

-/* static */ void ShaderCompilerService::getProgramFromCompilerPool(
-        program_token_t& token) noexcept {
-    OpenGLProgramToken::ProgramData const& programData{ token->get() };
-    if (!token->canceled) {
-        token->gl.shaders = programData.shaders;
-        token->gl.program = programData.program;
-    }
-}
+GLuint ShaderCompilerService::initialize(program_token_t& token) {

-GLuint ShaderCompilerService::initialize(program_token_t& token) noexcept {
    SYSTRACE_CALL();
-    if (!token->gl.program) {
-        switch (mMode) {
-            case Mode::THREAD_POOL: {
-                // we need this program right now, remove it from the queue
-                auto job = mCompilerThreadPool.dequeue(token);
-                if (job) {
-                    // if we were able to remove it, we execute the job now, otherwise it means
-                    // it's being executed right now.
-                    job();
-                }

-                if (!token->canceled) {
-                    token->compiler.cancelTickOp(token);
-                }
+    assert_invariant(token);// This function should be called when the token is still alive.

-                // Block until we get the program from the pool. Generally this wouldn't block
-                // because we just compiled the program above, when executing job.
-                ShaderCompilerService::getProgramFromCompilerPool(token);
-                break;
-            }
-
-            case Mode::ASYNCHRONOUS: {
-                // we force the program link -- which might stall, either here or below in
-                // checkProgramStatus(), but we don't have a choice, we need to use the program now.
-                token->compiler.cancelTickOp(token);
-
-                token->gl.program =
-                        linkProgram(mDriver.getContext(), token->gl.shaders, token->attributes);
-
-                assert_invariant(token->gl.program);
-
-                mCallbackManager.put(token->handle);
-
-                if (token->key) {
-                    mBlobCache.insert(mDriver.mPlatform, token->key, token->gl.program);
-                }
-                break;
-            }
-
-            case Mode::SYNCHRONOUS: {
-                // if we don't have a program yet, block until we get it.
-                tick();
-                break;
-            }
-
-            case Mode::UNDEFINED: {
-                assert_invariant(false);
-            }
-        }
-    }
-
-    // by this point we must have a GL program
+    ensureTokenIsReady(token);
    assert_invariant(token->gl.program);

-    GLuint program = 0;
+    // Check status of program linking. If it failed, errors will be logged.
+    bool const linked = checkLinkStatusAndCleanupShaders(token);

-    // check status of program linking and shader compilation, logs error and free all resources
-    // in case of error.
-    bool const success = checkProgramStatus(token);
-
-    // Unless we have matdbg, we panic if a program is invalid. Otherwise, we'd get a UB.
-    // The compilation error has been logged to log.e by this point.
-    FILAMENT_CHECK_POSTCONDITION(FILAMENT_ENABLE_MATDBG || success)
+    // We panic if it failed to create the program.
+    FILAMENT_CHECK_POSTCONDITION(linked)
            << "OpenGL program " << token->name.c_str_safe() << " failed to link or compile";

-    if (UTILS_LIKELY(success)) {
-        program = token->gl.program;
-        // no need to keep the shaders around
-        UTILS_NOUNROLL
-        for (GLuint& shader: token->gl.shaders) {
-            if (shader) {
-                glDetachShader(program, shader);
-                glDeleteShader(shader);
-                shader = 0;
-            }
-        }
+    // The program is successfully created. Try caching the program blob. In the THREAD_POOL mode,
+    // caching is performed in the pool.
+    if (mMode != Mode::THREAD_POOL) {
+        tryCachingProgram(mBlobCache, mDriver.mPlatform, token);
    }

-    // and destroy all temporary init data
-    token = nullptr;
+    GLuint const program = token->gl.program;
+
+    // Cleanup the token.
+    token->compiler.cancelTickOp(token);
+    token = nullptr;// This will submit a callback condition (handle) to the callback manager.

    return program;
 }

+void ShaderCompilerService::ensureTokenIsReady(program_token_t const& token) {
+    if (token->gl.program) {
+        return;// It's ready.
+    }
+
+    switch (mMode) {
+        case Mode::THREAD_POOL: {
+            // We need this program right now, make sure the job is finished.
+            if (auto job = mCompilerThreadPool.dequeue(token)) {
+                job();// The job hasn't started yet, so execute it now.
+            }
+
+            // This may block if the job was already taken by a thread ahead of the `dequeue`
+            // above and currently being executed. Otherwise, the job must have already been
+            // completed by this point from either the code above or the other thread.
+            token->wait();
+            break;
+        }
+
+        case Mode::ASYNCHRONOUS: {
+            // Technically the shader compilation may not have finished yet. To deal with the case,
+            // ideally, we should wait here until the compilation is finished. However, for now, we
+            // just log warnings here instead of repeatedly checking compile status. If this turns
+            // out to be a real issue later, we would need to consider doing the canonical way.
+            if (!isCompileCompleted(token)) {
+                slog.w << "Shader compilation for OpenGL program " << token->name.c_str_safe()
+                       << " is not completed yet. The following program link may not succeed.";
+            }
+
+            linkProgram(mDriver.getContext(), token);
+            break;
+        }
+
+        case Mode::SYNCHRONOUS: {
+            // We must not have called the TickOp yet until now. Call now to have
+            // `token->gl.program` ready to use.
+            tick();
+            break;
+        }
+
+        case Mode::UNDEFINED: {
+            assert_invariant(false);
+        }
+    }
+}
+
+// ------------------------------------------------------------------------------------------------
+
+void ShaderCompilerService::runAtNextTick(CompilerPriorityQueue priority,
+        program_token_t const& token, Job job) noexcept {
+    // insert items in order of priority and at the end of the range
+    auto& ops = mRunAtNextTickOps;
+    auto const pos = std::lower_bound(ops.begin(), ops.end(), priority,
+            [](ContainerType const& lhs, CompilerPriorityQueue const priorityQueue) {
+                return std::get<0>(lhs) < priorityQueue;
+            });
+    ops.emplace(pos, priority, token, std::move(job));
+
+    SYSTRACE_CONTEXT();
+    SYSTRACE_VALUE32("ShaderCompilerService Jobs", mRunAtNextTickOps.size());
+}
+
+bool ShaderCompilerService::cancelTickOp(program_token_t const& token) noexcept {
+    // We do a linear search here, but this is rare, and we know the list is pretty small.
+    auto& ops = mRunAtNextTickOps;
+    auto const pos = std::find_if(ops.begin(), ops.end(), [&](const auto& item) {
+        return std::get<1>(item) == token;
+    });
+    if (pos != ops.end()) {
+        ops.erase(pos);
+        return true;
+    }
+    SYSTRACE_CONTEXT();
+    SYSTRACE_VALUE32("ShaderCompilerService Jobs", ops.size());
+    return false;
+}
+
+void ShaderCompilerService::executeTickOps() noexcept {
+    auto& ops = mRunAtNextTickOps;
+    auto it = ops.begin();
+    while (it != ops.end()) {
+        Job const& job = std::get<2>(*it);
+        bool const remove = job.fn(job);
+        if (remove) {
+            it = ops.erase(it);
+        } else {
+            ++it;
+        }
+    }
+    SYSTRACE_CONTEXT();
+    SYSTRACE_VALUE32("ShaderCompilerService Jobs", ops.size());
+}

-/*
- * Compile shaders in the ShaderSource. This cannot fail because compilation failures are not
- * checked until after the program is linked.
- * This always returns the GL shader IDs or zero a shader stage is not present.
- */
 /* static */ void ShaderCompilerService::compileShaders(OpenGLContext& context,
        Program::ShaderSource shadersSource,
-        utils::FixedCapacityVector<Program::SpecializationConstant> const& specializationConstants,
-        bool multiview,
-        shaders_t& outShaders,
-        UTILS_UNUSED_IN_RELEASE shaders_source_t& outShaderSourceCode) noexcept {
-
+        FixedCapacityVector<Program::SpecializationConstant> const& specializationConstants,
+        bool multiview, program_token_t const& token) noexcept {
    SYSTRACE_CALL();

-    auto appendSpecConstantString = +[](std::string& s, Program::SpecializationConstant const& sc) {
+    auto const appendSpecConstantString = +[](std::string& s, Program::SpecializationConstant const& sc) {
        s += "#define SPIRV_CROSS_CONSTANT_ID_" + std::to_string(sc.id) + ' ';
        s += std::visit([](auto&& arg) { return to_string(arg); }, sc.value);
        s += '\n';
@@ -558,7 +504,7 @@ GLuint ShaderCompilerService::initialize(program_token_t& token) noexcept {

    std::string specializationConstantString;
    int32_t numViews = 2;
-    for (auto const& sc : specializationConstants) {
+    for (auto const& sc: specializationConstants) {
        appendSpecConstantString(specializationConstantString, sc);
        if (sc.id == 8) {
            // This constant must match
@@ -596,7 +542,7 @@ GLuint ShaderCompilerService::initialize(program_token_t& token) noexcept {
        if (UTILS_LIKELY(!shadersSource[i].empty())) {
            Program::ShaderBlob& shader = shadersSource[i];
            char* shader_src = reinterpret_cast<char*>(shader.data());
-            size_t shader_len = shader.size();
+            size_t const shader_len = shader.size();

            // remove GOOGLE_cpp_style_line_directive
            process_GOOGLE_cpp_style_line_directive(context, shader_src, shader_len);
@@ -619,178 +565,191 @@ GLuint ShaderCompilerService::initialize(program_token_t& token) noexcept {
            }

            std::array<std::string_view, 5> sources = {
-                version,
-                prolog,
-                specializationConstantString,
-                packingFunctions,
-                { body.data(), body.size() - 1 }  // null-terminated
+                version, prolog, specializationConstantString, packingFunctions,
+                { body.data(), body.size() - 1 }// null-terminated
            };

            // Some of the sources may be zero-length. Remove them as to avoid passing lengths of
            // zero to glShaderSource(). glShaderSource should work with lengths of zero, but some
            // drivers instead interpret zero as a sentinel for a null-terminated string.
-            auto partitionPoint = std::stable_partition(
-                    sources.begin(), sources.end(), [](std::string_view s) { return !s.empty(); });
-            size_t count = std::distance(sources.begin(), partitionPoint);
+            auto const partitionPoint = std::stable_partition(sources.begin(), sources.end(),
+                    [](std::string_view s) { return !s.empty(); });
+            size_t const count = std::distance(sources.begin(), partitionPoint);

            std::array<const char*, 5> shaderStrings;
            std::array<GLint, 5> lengths;
-            for (size_t i = 0; i < count; i++) {
-                shaderStrings[i] = sources[i].data();
-                lengths[i] = sources[i].size();
+            for (size_t j = 0; j < count; j++) {
+                shaderStrings[j] = sources[j].data();
+                lengths[j] = GLint(sources[j].size());
            }

            GLuint const shaderId = glCreateShader(glShaderType);
-            glShaderSource(shaderId, count, shaderStrings.data(), lengths.data());
-
+            glShaderSource(shaderId, GLsizei(count), shaderStrings.data(), lengths.data());
            glCompileShader(shaderId);
-
 #ifndef NDEBUG
            // for debugging we return the original shader source (without the modifications we
            // made here), otherwise the line numbers wouldn't match.
-            outShaderSourceCode[i] = { shader_src, shader_len };
+            token->shaderSourceCode[i] = { shader_src, shader_len };
 #endif
-
-            outShaders[i] = shaderId;
+            token->gl.shaders[i] = shaderId;
        }
    }
 }

-/*
- * Create a program from the given shader IDs and links it. This cannot fail because errors
- * are checked later. This always returns a valid GL program ID (which doesn't mean the
- * program itself is valid).
- */
-/* static */ GLuint ShaderCompilerService::linkProgram(OpenGLContext& context,
-        shaders_t const& shaders,
-        utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>> const& attributes) noexcept {
+/* static */ bool ShaderCompilerService::isCompileCompleted(program_token_t const& token) noexcept {
+    GLenum param = GL_COMPLETION_STATUS;
+    if (UTILS_UNLIKELY(token->compiler.mMode != Mode::ASYNCHRONOUS)) {
+        param = GL_COMPILE_STATUS;
+    }

+    for (auto shader: token->gl.shaders) {
+        if (!shader) {
+            continue;
+        }
+        GLint status;
+        glGetShaderiv(shader, param, &status);
+        if (status == GL_FALSE) {
+            return false;
+        }
+    }
+    return true;
+}
+
+/* static */ void ShaderCompilerService::checkCompileStatus(program_token_t const& token) noexcept {
    SYSTRACE_CALL();

+    UTILS_NOUNROLL
+    for (size_t i = 0; i < Program::SHADER_TYPE_COUNT; i++) {
+        const GLuint shader = token->gl.shaders[i];
+        if (!shader) {
+            continue;// We're not using this shader stage.
+        }
+        // GL_COMPILE_STATUS may block until the compilation is completed.
+        GLint status;
+        glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
+        if (UTILS_LIKELY(status == GL_TRUE)) {
+            continue;// Succeeded in compilation.
+        }
+        // Something went wrong. Log the error message.
+        const ShaderStage type = static_cast<ShaderStage>(i);
+        logCompilationError(slog.e, type, token->name.c_str_safe(), shader,
+                token->shaderSourceCode[i]);
+    }
+}
+
+/* static */ void ShaderCompilerService::linkProgram(OpenGLContext const& context,
+        program_token_t const& token) noexcept {
+    SYSTRACE_CALL();
+
+    // Shader compilation should be completed by now. Check the status and log errors on failure.
+    checkCompileStatus(token);
+
+    // Link program
    GLuint const program = glCreateProgram();
-    for (auto shader : shaders) {
+    for (auto const shader: token->gl.shaders) {
        if (shader) {
            glAttachShader(program, shader);
        }
    }
-
    if (UTILS_UNLIKELY(context.isES2())) {
-        for (auto const& [ name, loc ] : attributes) {
+        for (auto const& [name, loc]: token->attributes) {
            glBindAttribLocation(program, loc, name.c_str());
        }
    }
-
    glLinkProgram(program);
-
-    return program;
+    token->gl.program = program;
 }

-// ------------------------------------------------------------------------------------------------
-
-void ShaderCompilerService::runAtNextTick(CompilerPriorityQueue priority,
-        const program_token_t& token, Job job) noexcept {
-    // insert items in order of priority and at the end of the range
-    auto& ops = mRunAtNextTickOps;
-    auto const pos = std::lower_bound(ops.begin(), ops.end(), priority,
-            [](ContainerType const& lhs, CompilerPriorityQueue priorityQueue) {
-                return std::get<0>(lhs) < priorityQueue;
-            });
-    ops.emplace(pos, priority, token, std::move(job));
-
-    SYSTRACE_CONTEXT();
-    SYSTRACE_VALUE32("ShaderCompilerService Jobs", mRunAtNextTickOps.size());
-}
-
-bool ShaderCompilerService::cancelTickOp(program_token_t token) noexcept {
-    // We do a linear search here, but this is rare, and we know the list is pretty small.
-    auto& ops = mRunAtNextTickOps;
-    auto pos = std::find_if(ops.begin(), ops.end(), [&](const auto& item) {
-        return std::get<1>(item) == token;
-    });
-    if (pos != ops.end()) {
-        ops.erase(pos);
-        return true;
-    }
-    SYSTRACE_CONTEXT();
-    SYSTRACE_VALUE32("ShaderCompilerService Jobs", ops.size());
-    return false;
-}
-
-void ShaderCompilerService::executeTickOps() noexcept {
-    auto& ops = mRunAtNextTickOps;
-    auto it = ops.begin();
-    while (it != ops.end()) {
-        Job const& job = std::get<2>(*it);
-        bool const remove = job.fn(job);
-        if (remove) {
-            it = ops.erase(it);
-        } else {
-            ++it;
-        }
-    }
-    SYSTRACE_CONTEXT();
-    SYSTRACE_VALUE32("ShaderCompilerService Jobs", ops.size());
-}
-
-// ------------------------------------------------------------------------------------------------
-
-/*
- * Checks a program link status and logs errors and frees resources on failure.
- * Returns true on success.
- */
-/* static */ bool ShaderCompilerService::checkProgramStatus(program_token_t const& token) noexcept {
-
-    SYSTRACE_CALL();
-
+/* static */ bool ShaderCompilerService::isLinkCompleted(program_token_t const& token) noexcept {
    assert_invariant(token->gl.program);

-    GLint status;
-    glGetProgramiv(token->gl.program, GL_LINK_STATUS, &status);
-    if (UTILS_LIKELY(status == GL_TRUE)) {
-        return true;
+    GLenum param = GL_COMPLETION_STATUS;
+    if (UTILS_UNLIKELY(token->compiler.mMode != Mode::ASYNCHRONOUS)) {
+        param = GL_LINK_STATUS;
    }

-    // only if the link fails, we check the compilation status
+    GLint status;
+    glGetProgramiv(token->gl.program, param, &status);
+    return (status == GL_TRUE);
+}
+
+/* static */ bool ShaderCompilerService::checkLinkStatusAndCleanupShaders(
+        program_token_t const& token) noexcept {
+    SYSTRACE_CALL();
+    assert_invariant(token->gl.program);
+
+    bool linked = true;
+    GLint status;
+    // GL_LINK_STATUS may block until the link is completed.
+    glGetProgramiv(token->gl.program, GL_LINK_STATUS, &status);
+    if (UTILS_UNLIKELY(status != GL_TRUE)) {
+        // Something went wrong. Log the error message.
+        logProgramLinkError(slog.e, token->name.c_str_safe(), token->gl.program);
+        linked = false;
+    }
+    // No need to keep the shaders around regardless of the result of the program linking.
    UTILS_NOUNROLL
-    for (size_t i = 0; i < Program::SHADER_TYPE_COUNT; i++) {
-        const ShaderStage type = static_cast<ShaderStage>(i);
-        const GLuint shader = token->gl.shaders[i];
+    for (GLuint& shader: token->gl.shaders) {
        if (shader) {
-            glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
-            if (status != GL_TRUE) {
-                logCompilationError(slog.e, type,
-                        token->name.c_str_safe(), shader, token->shaderSourceCode[i]);
-            }
            glDetachShader(token->gl.program, shader);
            glDeleteShader(shader);
-            token->gl.shaders[i] = 0;
+            shader = 0;
        }
    }
-    // log the link error as well
-    logProgramLinkError(slog.e, token->name.c_str_safe(), token->gl.program);
-    glDeleteProgram(token->gl.program);
-    token->gl.program = 0;
-    return false;
+    return linked;
+}
+
+/* static */ void ShaderCompilerService::tryCachingProgram(OpenGLBlobCache& cache,
+        OpenGLPlatform& platform, program_token_t const& token) noexcept {
+    if (!token->key || !token->gl.program) {
+        return; // Invalid params
+    }
+    GLint status = GL_FALSE;
+    glGetProgramiv(token->gl.program, GL_LINK_STATUS, &status);
+    if (status == GL_FALSE) {
+        return;// Link failure
+    }
+
+    cache.insert(platform, token->key, token->gl.program);
+}
+
+/* static */ void ShaderCompilerService::cleanupProgramAndShaders(
+        program_token_t const& token) noexcept {
+    for (GLuint& shader: token->gl.shaders) {
+        if (!shader) {
+            continue;
+        }
+        if (token->gl.program) {
+            glDetachShader(token->gl.program, shader);
+        }
+        glDeleteShader(shader);
+        shader = 0;
+    }
+    if (token->gl.program) {
+        glDeleteProgram(token->gl.program);
+        token->gl.program = 0;
+    }
 }

 // ------------------------------------------------------------------------------------------------

 UTILS_NOINLINE
 /* static */ void logCompilationError(io::ostream& out, ShaderStage shaderType, const char* name,
-        GLuint shaderId, UTILS_UNUSED_IN_RELEASE CString const& sourceCode) noexcept {
+        GLuint const shaderId, UTILS_UNUSED_IN_RELEASE CString const& sourceCode) noexcept {

-    auto to_string = [](ShaderStage type) -> const char* {
-        switch (type) {
-            case ShaderStage::VERTEX:
-                return "vertex";
-            case ShaderStage::FRAGMENT:
-                return "fragment";
-            case ShaderStage::COMPUTE:
-                return "compute";
-        }
-    };
+    { // scope for the temporary string storage
+        auto to_string = [](ShaderStage type) -> const char* {
+            switch (type) {
+                case ShaderStage::VERTEX:
+                    return "vertex";
+                case ShaderStage::FRAGMENT:
+                    return "fragment";
+                case ShaderStage::COMPUTE:
+                    return "compute";
+            }
+            return "unknown";
+        };

-    {// scope for the temporary string storage
        GLint length = 0;
        glGetShaderiv(shaderId, GL_INFO_LOG_LENGTH, &length);

@@ -837,8 +796,8 @@ UTILS_NOINLINE

 // If usages of the Google-style line directive are present, remove them, as some
 // drivers don't allow the quotation marks. This source modification happens in-place.
-/* static */ void process_GOOGLE_cpp_style_line_directive(OpenGLContext& context, char* source,
-        size_t len) noexcept {
+/* static */ void process_GOOGLE_cpp_style_line_directive(OpenGLContext const& context,
+        char* source, size_t len) noexcept {
    if (!context.ext.GOOGLE_cpp_style_line_directive) {
        if (UTILS_UNLIKELY(requestsGoogleLineDirectivesExtension({ source, len }))) {
            removeGoogleLineDirectives(source, len);// length is unaffected
@@ -850,13 +809,13 @@ UTILS_NOINLINE
 // necessary for OpenGL because OpenGL relies on the number specified in shader files to determine
 // the number of views, which is assumed as a single digit, for multiview.
 // This source modification happens in-place.
-/* static */ void process_OVR_multiview2(OpenGLContext& context, int32_t eyeCount, char* source,
-        size_t len) noexcept {
+/* static */ void process_OVR_multiview2(OpenGLContext const& context, int32_t const eyeCount,
+    char* source, size_t const len) noexcept {
    // We don't use regular expression in favor of performance.
    if (context.ext.OVR_multiview2) {
        const std::string_view shader{ source, len };
-        const std::string_view layout = "layout";
-        const std::string_view num_views = "num_views";
+        constexpr std::string_view layout = "layout";
+        constexpr std::string_view num_views = "num_views";
        size_t found = 0;
        while (true) {
            found = shader.find(layout, found);
@@ -1011,20 +970,20 @@ mediump vec4 unpackSnorm4x8(highp uint v) {
 // - extensions
 // - everything else
 /* static */ std::array<std::string_view, 3> splitShaderSource(std::string_view source) noexcept {
-    auto version_start = source.find("#version");
+    auto const version_start = source.find("#version");
    assert_invariant(version_start != std::string_view::npos);

-    auto version_eol = source.find('\n', version_start) + 1;
+    auto const version_eol = source.find('\n', version_start) + 1;
    assert_invariant(version_eol != std::string_view::npos);

-    auto prolog_start = version_eol;
+    auto const prolog_start = version_eol;
    auto prolog_eol = source.rfind("\n#extension");// last #extension line
    if (prolog_eol == std::string_view::npos) {
        prolog_eol = prolog_start;
    } else {
        prolog_eol = source.find('\n', prolog_eol + 1) + 1;
    }
-    auto body_start = prolog_eol;
+    auto const body_start = prolog_eol;

    std::string_view const version = source.substr(version_start, version_eol - version_start);
    std::string_view const prolog = source.substr(prolog_start, prolog_eol - prolog_start);
--- a/filament/backend/src/opengl/ShaderCompilerService.h
+++ b/filament/backend/src/opengl/ShaderCompilerService.h
@@ -24,23 +24,23 @@
 #include "OpenGLBlobCache.h"

 #include <backend/CallbackHandler.h>
+#include <backend/DriverEnums.h>
 #include <backend/Program.h>

 #include <utils/CString.h>
 #include <utils/FixedCapacityVector.h>
-#include <utils/Invocable.h>
 #include <utils/JobSystem.h>

-#include <atomic>
-#include <condition_variable>
-#include <deque>
+#include <array>
 #include <functional>
 #include <memory>
 #include <mutex>
-#include <thread>
+#include <tuple>
 #include <utility>
 #include <vector>

+#include <stdint.h>
+
 namespace filament::backend {

 class OpenGLDriver;
@@ -84,6 +84,7 @@ public:
    void tick();

    // Destroys a valid token and all associated resources. Used to "cancel" a program compilation.
+    // This function is not called if `initialize(token)` is already invoked.
    static void terminate(program_token_t& token);

    // stores a user data pointer in the token
@@ -92,6 +93,12 @@ public:
    // retrieves the user data pointer stored in the token
    static void* getUserData(const program_token_t& token) noexcept;

+    // Issue one callback handle.
+    CallbackManager::Handle issueCallbackHandle() const noexcept;
+
+    // Return a callback handle to the callback manager.
+    void submitCallbackHandle(CallbackManager::Handle handle) noexcept;
+
    // call the callback when all active programs are ready
    void notifyWhenAllProgramsAreReady(
            CallbackHandler* handler, CallbackHandler::Callback callback, void* user);
@@ -99,7 +106,7 @@ public:
 private:
    struct Job {
        template<typename FUNC>
-        Job(FUNC&& fn) : fn(std::forward<FUNC>(fn)) {}
+        Job(FUNC&& fn) : fn(std::forward<FUNC>(fn)) {} // NOLINT(*-explicit-constructor)
        Job(std::function<bool(Job const& job)> fn,
                CallbackHandler* handler, void* user, CallbackHandler::Callback callback)
                : fn(std::move(fn)), handler(handler), user(user), callback(callback) {
@@ -128,26 +135,49 @@ private:
    using ContainerType = std::tuple<CompilerPriorityQueue, program_token_t, Job>;
    std::vector<ContainerType> mRunAtNextTickOps;

-    GLuint initialize(ShaderCompilerService::program_token_t& token) noexcept;
+    GLuint initialize(program_token_t& token);
+    void ensureTokenIsReady(program_token_t const& token);

-    static void getProgramFromCompilerPool(program_token_t& token) noexcept;
-
-    static void compileShaders(
-            OpenGLContext& context,
-            Program::ShaderSource shadersSource,
-            utils::FixedCapacityVector<Program::SpecializationConstant> const& specializationConstants,
-            bool multiview, shaders_t& outShaders, shaders_source_t& outShaderSourceCode) noexcept;
-
-    static GLuint linkProgram(OpenGLContext& context, shaders_t const& shaders,
-            utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>> const& attributes) noexcept;
-
-    static bool checkProgramStatus(program_token_t const& token) noexcept;
-
-    void runAtNextTick(CompilerPriorityQueue priority,
-            const program_token_t& token, Job job) noexcept;
+    void runAtNextTick(CompilerPriorityQueue priority, program_token_t const& token,
+            Job job) noexcept;
    void executeTickOps() noexcept;
-    bool cancelTickOp(program_token_t token) noexcept;
-    // order of insertion is important
+    bool cancelTickOp(program_token_t const& token) noexcept;
+
+    // Compile shaders with the given `shaderSource`. `gl.shaders` is always populated with valid
+    // shader IDs after this method. But this doesn't necessarily mean the shaders are successfully
+    // compiled. Errors can be checked by calling `checkCompileStatus` later.
+    static void compileShaders(OpenGLContext& context, Program::ShaderSource shadersSource,
+            utils::FixedCapacityVector<Program::SpecializationConstant> const&
+                    specializationConstants,
+            bool multiview, program_token_t const& token) noexcept;
+
+    // Check if the shader compilation is completed. You may want to call this when the extension
+    // `KHR_parallel_shader_compile` is enabled.
+    static bool isCompileCompleted(program_token_t const& token) noexcept;
+
+    // Check compilation status of the shaders and log errors on failure.
+    static void checkCompileStatus(program_token_t const& token) noexcept;
+
+    // Create a program by linking the compiled shaders. `gl.program` is always populated with a
+    // valid program ID after this method. But this doesn't necessarily mean the program is
+    // successfully linked. Errors can be checked by calling `checkLinkStatusAndCleanupShaders`
+    // later.
+    static void linkProgram(OpenGLContext const& context, program_token_t const& token) noexcept;
+
+    // Check if the program link is completed. You may want to call this when the extension
+    // `KHR_parallel_shader_compile` is enabled.
+    static bool isLinkCompleted(program_token_t const& token) noexcept;
+
+    // Check link status of the program and log errors on failure. Return the result of the link.
+    // Also cleanup shaders regardless of the result.
+    static bool checkLinkStatusAndCleanupShaders(program_token_t const& token) noexcept;
+
+    // Try caching the program if we haven't done it yet. Cache it only when the program is valid.
+    static void tryCachingProgram(OpenGLBlobCache& cache, OpenGLPlatform& platform,
+            program_token_t const& token) noexcept;
+
+    // Cleanup GL resources.
+    static void cleanupProgramAndShaders(program_token_t const& token) noexcept;
 };

 } // namespace filament::backend