vk: fix leaks for descriptor sets/layouts

vk: Use new descriptor set caching
- Use new descriptor set and layout caching - Remove descriptor set related code in VulkanPipelineCache FIXES=248594812,325157400
2024-04-09 11:36:57 -07:00 · 2024-04-09 11:36:50 -07:00 · 2024-04-08 20:49:02 +00:00 · 2024-04-05 17:50:07 -07:00 · 2024-04-05 17:49:56 -07:00 · 2024-04-04 10:15:43 -07:00
307 changed files with 11829 additions and 5026 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -129,7 +129,7 @@ jobs:
      - name: Sign sample-gltf-viewer
        run: |
          echo "${APK_KEYSTORE_BASE64}" > filament.jks.base64
-          base64 --decode filament.jks.base64 > filament.jks
+          base64 --decode -i filament.jks.base64 > filament.jks
          BUILD_TOOLS_VERSION=$(ls ${ANDROID_HOME}/build-tools | sort -V | tail -n 1)
          APKSIGNER=${ANDROID_HOME}/build-tools/${BUILD_TOOLS_VERSION}/apksigner
          IN_FILE="out/sample-gltf-viewer-release.apk"
@@ -205,7 +205,7 @@ jobs:
          TAG: ${{ steps.git_ref.outputs.tag }}
        run: |
          build\windows\build-github.bat release
-          move out\filament-windows.tgz out\filament-$Env:TAG-windows.tgz
+          move out\filament-windows.tgz out\filament-%TAG%-windows.tgz
        shell: cmd
      - uses: actions/github-script@v6
        env:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,6 +45,8 @@ option(FILAMENT_ENABLE_TSAN "Enable Thread Sanitizer" OFF)

 option(FILAMENT_ENABLE_FEATURE_LEVEL_0 "Enable Feature Level 0" ON)

+option(FILAMENT_ENABLE_MULTIVIEW "Enable multiview for Filament" OFF)
+
 set(FILAMENT_NDK_VERSION "" CACHE STRING
    "Android NDK version or version prefix to be used when building for Android."
 )
@@ -531,6 +533,21 @@ else()
    option(FILAMENT_DISABLE_MATOPT "Disable material optimizations" ON)
 endif()

+# This only affects the prebuilt shader files in gltfio and samples, not filament library.
+# The value can be either "instanced" or "multiview".
+set(FILAMENT_SAMPLES_STEREO_TYPE "instanced" CACHE STRING
+    "Stereoscopic type that shader files in gltfio and samples are built for."
+)
+string(TOLOWER "${FILAMENT_SAMPLES_STEREO_TYPE}" FILAMENT_SAMPLES_STEREO_TYPE)
+if (NOT FILAMENT_SAMPLES_STEREO_TYPE STREQUAL "instanced" AND NOT FILAMENT_SAMPLES_STEREO_TYPE STREQUAL "multiview")
+    message(FATAL_ERROR "Invalid stereo type: \"${FILAMENT_SAMPLES_STEREO_TYPE}\" choose either \"instanced\" or \"multiview\" ")
+endif ()
+
+# Compiling samples for multiview implies enabling multiview feature as well.
+if (FILAMENT_SAMPLES_STEREO_TYPE STREQUAL "multiview")
+    set(FILAMENT_ENABLE_MULTIVIEW ON)
+endif ()
+
 # ==================================================================================================
 # Material compilation flags
 # ==================================================================================================
@@ -706,7 +723,7 @@ function(get_resgen_vars ARCHIVE_DIR ARCHIVE_NAME)
        set(RESGEN_OUTPUTS "${OUTPUTS}" PARENT_SCOPE)
        set(RESGEN_FLAGS -qx ${ARCHIVE_DIR} -p ${ARCHIVE_NAME} PARENT_SCOPE)
        set(RESGEN_SOURCE "${ARCHIVE_DIR}/${ARCHIVE_NAME}${ASM_SUFFIX}.S" PARENT_SCOPE)
-        set(RESGEN_SOURCE_FLAGS "-I${ARCHIVE_DIR} ${ASM_ARCH_FLAG}" PARENT_SCOPE)
+        set(RESGEN_SOURCE_FLAGS "-I'${ARCHIVE_DIR}' ${ASM_ARCH_FLAG}" PARENT_SCOPE)
    endif()
 endfunction()

--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ repositories {
 }

 dependencies {
-    implementation 'com.google.android.filament:filament-android:1.50.1'
+    implementation 'com.google.android.filament:filament-android:1.51.3'
 }
 ```

@@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`:
 iOS projects can use CocoaPods to install the latest release:

 ```shell
-pod 'Filament', '~> 1.50.1'
+pod 'Filament', '~> 1.51.3'
 ```

 ### Snapshots
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -7,6 +7,42 @@ A new header is inserted each time a *tag* is created.
 Instead, if you are authoring a PR for the main branch, add your release note to
 [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md).

+## v1.51.4
+
+
+## v1.51.3
+
+
+## v1.51.2
+
+- engine: Add experimental APIs `Engine::builder::paused()` and `Engine::setPaused()`
+
+## v1.51.1
+
+
+## v1.51.0
+
+- materials: add support for post-lighting mix factor (b/328498606) [⚠️ **New Material Version**]
+
+## v1.50.6
+
+- Add new API `SwapChain::getFrameScheduledCallback`
+- vulkan: fixed validation error VUID-vkAcquireNextImageKHR-semaphore-01779
+- opengl: Add support for protected content swapchains and contexts
+
+## v1.50.5
+
+- android: NDK 26.1.10909125 is used by default
+- android: Minimum API level on Android is now API 21 instead of API 19. This allows the use of OpenGL ES 3.1
+- rendering: New PBR Neutral tone mapper, designed to preserve materials color appearance
+- android: Change default frameRateOptions.interval to 1.0
+
+## v1.50.4
+
+
+## v1.50.3
+
+
 ## v1.50.2


--- a/android/build.gradle
+++ b/android/build.gradle
@@ -80,7 +80,7 @@ buildscript {

    ext.versions = [
        'jdk': 17,
-        'minSdk': 19,
+        'minSdk': 21,
        'targetSdk': 34,
        'compileSdk': 34,
        'kotlin': '1.9.21',
--- a/android/filament-android/src/main/cpp/Engine.cpp
+++ b/android/filament-android/src/main/cpp/Engine.cpp
@@ -391,6 +391,13 @@ Java_com_google_android_filament_Engine_nFlush(JNIEnv*, jclass,
    engine->flush();
 }

+extern "C" JNIEXPORT void JNICALL
+Java_com_google_android_filament_Engine_nSetPaused(JNIEnv*, jclass,
+        jlong nativeEngine, jboolean paused) {
+    Engine* engine = (Engine*) nativeEngine;
+    engine->setPaused(paused);
+}
+
 // Managers...

 extern "C" JNIEXPORT jlong JNICALL
@@ -484,7 +491,11 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu
 extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBuilderConfig(JNIEnv*,
        jclass, jlong nativeBuilder, jlong commandBufferSizeMB, jlong perRenderPassArenaSizeMB,
        jlong driverHandleArenaSizeMB, jlong minCommandBufferSizeMB, jlong perFrameCommandsSizeMB,
-        jlong jobSystemThreadCount, jlong stereoscopicEyeCount) {
+        jlong jobSystemThreadCount,
+        jlong textureUseAfterFreePoolSize, jboolean disableParallelShaderCompile,
+        jint stereoscopicType, jlong stereoscopicEyeCount,
+        jlong resourceAllocatorCacheSizeMB, jlong resourceAllocatorCacheMaxAge,
+        jboolean disableHandleUseAfterFreeCheck) {
    Engine::Builder* builder = (Engine::Builder*) nativeBuilder;
    Engine::Config config = {
            .commandBufferSizeMB = (uint32_t) commandBufferSizeMB,
@@ -493,7 +504,13 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu
            .minCommandBufferSizeMB = (uint32_t) minCommandBufferSizeMB,
            .perFrameCommandsSizeMB = (uint32_t) perFrameCommandsSizeMB,
            .jobSystemThreadCount = (uint32_t) jobSystemThreadCount,
+            .textureUseAfterFreePoolSize = (uint32_t) textureUseAfterFreePoolSize,
+            .disableParallelShaderCompile = (bool) disableParallelShaderCompile,
+            .stereoscopicType = (Engine::StereoscopicType) stereoscopicType,
            .stereoscopicEyeCount = (uint8_t) stereoscopicEyeCount,
+            .resourceAllocatorCacheSizeMB = (uint32_t) resourceAllocatorCacheSizeMB,
+            .resourceAllocatorCacheMaxAge = (uint8_t) resourceAllocatorCacheMaxAge,
+            .disableHandleUseAfterFreeCheck = (bool) disableHandleUseAfterFreeCheck,
    };
    builder->config(&config);
 }
@@ -510,6 +527,12 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu
    builder->sharedContext((void*) sharedContext);
 }

+extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBuilderPaused(
+        JNIEnv*, jclass, jlong nativeBuilder, jboolean paused) {
+    Engine::Builder* builder = (Engine::Builder*) nativeBuilder;
+    builder->paused((bool) paused);
+}
+
 extern "C" JNIEXPORT jlong JNICALL
 Java_com_google_android_filament_Engine_nBuilderBuild(JNIEnv*, jclass, jlong nativeBuilder) {
    Engine::Builder* builder = (Engine::Builder*) nativeBuilder;
--- a/android/filament-android/src/main/cpp/RenderableManager.cpp
+++ b/android/filament-android/src/main/cpp/RenderableManager.cpp
@@ -104,6 +104,14 @@ Java_com_google_android_filament_RenderableManager_nBuilderGeometry__JIIJJIIII(J
            (size_t) count);
 }

+extern "C"
+JNIEXPORT void JNICALL
+Java_com_google_android_filament_RenderableManager_nBuilderGeometryType(JNIEnv*, jclass,
+        jlong nativeBuilder, int type) {
+    RenderableManager::Builder *builder = (RenderableManager::Builder *) nativeBuilder;
+    builder->geometryType((RenderableManager::Builder::GeometryType)type);
+}
+
 extern "C"
 JNIEXPORT void JNICALL
 Java_com_google_android_filament_RenderableManager_nBuilderMaterial(JNIEnv*, jclass,
--- a/android/filament-android/src/main/cpp/SwapChain.cpp
+++ b/android/filament-android/src/main/cpp/SwapChain.cpp
@@ -34,7 +34,15 @@ Java_com_google_android_filament_SwapChain_nSetFrameCompletedCallback(JNIEnv* en
 }

 extern "C" JNIEXPORT jboolean JNICALL
-Java_com_google_android_filament_SwapChain_nIsSRGBSwapChainSupported(JNIEnv *, jclass, jlong nativeEngine) {
+Java_com_google_android_filament_SwapChain_nIsSRGBSwapChainSupported(
+        JNIEnv *, jclass, jlong nativeEngine) {
    Engine* engine = (Engine*) nativeEngine;
-    return (bool)SwapChain::isSRGBSwapChainSupported(*engine);
+    return (jboolean)SwapChain::isSRGBSwapChainSupported(*engine);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_SwapChain_nIsProtectedContentSupported(
+        JNIEnv *, jclass, jlong nativeEngine) {
+    Engine* engine = (Engine*) nativeEngine;
+    return (jboolean)SwapChain::isProtectedContentSupported(*engine);
 }
--- a/android/filament-android/src/main/cpp/ToneMapper.cpp
+++ b/android/filament-android/src/main/cpp/ToneMapper.cpp
@@ -47,6 +47,11 @@ Java_com_google_android_filament_ToneMapper_nCreateFilmicToneMapper(JNIEnv*, jcl
    return (jlong) new FilmicToneMapper();
 }

+extern "C" JNIEXPORT jlong JNICALL
+Java_com_google_android_filament_ToneMapper_nCreatePBRNeutralToneMapper(JNIEnv*, jclass) {
+    return (jlong) new PBRNeutralToneMapper();
+}
+
 extern "C" JNIEXPORT jlong JNICALL
 Java_com_google_android_filament_ToneMapper_nCreateAgxToneMapper(JNIEnv*, jclass, jint look) {
    return (jlong) new AgxToneMapper(AgxToneMapper::AgxLook(look));
--- a/android/filament-android/src/main/java/com/google/android/filament/Engine.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/Engine.java
@@ -158,6 +158,16 @@ public class Engine {
        FEATURE_LEVEL_3,
    };

+    /**
+     * The type of technique for stereoscopic rendering
+     */
+    public enum StereoscopicType {
+        /** Stereoscopic rendering is performed using instanced rendering technique. */
+        INSTANCED,
+        /** Stereoscopic rendering is performed using the multiview feature from the graphics backend. */
+        MULTIVIEW,
+    };
+
    /**
     * Constructs <code>Engine</code> objects using a builder pattern.
     */
@@ -211,7 +221,11 @@ public class Engine {
            nSetBuilderConfig(mNativeBuilder, config.commandBufferSizeMB,
                    config.perRenderPassArenaSizeMB, config.driverHandleArenaSizeMB,
                    config.minCommandBufferSizeMB, config.perFrameCommandsSizeMB,
-                    config.jobSystemThreadCount, config.stereoscopicEyeCount);
+                    config.jobSystemThreadCount,
+                    config.textureUseAfterFreePoolSize, config.disableParallelShaderCompile,
+                    config.stereoscopicType.ordinal(), config.stereoscopicEyeCount,
+                    config.resourceAllocatorCacheSizeMB, config.resourceAllocatorCacheMaxAge,
+                    config.disableHandleUseAfterFreeCheck);
            return this;
        }

@@ -226,6 +240,20 @@ public class Engine {
            return this;
        }

+        /**
+         * Sets the initial paused state of the rendering thread.
+         *
+         * <p>Warning: This is an experimental API. See {@link Engine#setPaused(boolean)} for
+         * caveats.
+         *
+         * @param paused Whether to start the rendering thread paused.
+         * @return A reference to this Builder for chaining calls.
+         */
+        public Builder paused(boolean paused) {
+            nSetBuilderPaused(mNativeBuilder, paused);
+            return this;
+        }
+
        /**
         * Creates an instance of Engine
         *
@@ -348,6 +376,35 @@ public class Engine {
         */
        public long jobSystemThreadCount = 0;

+        /**
+         * Number of most-recently destroyed textures to track for use-after-free.
+         *
+         * This will cause the backend to throw an exception when a texture is freed but still bound
+         * to a SamplerGroup and used in a draw call. 0 disables completely.
+         *
+         * Currently only respected by the Metal backend.
+         */
+        public long textureUseAfterFreePoolSize = 0;
+
+        /**
+         * Set to `true` to forcibly disable parallel shader compilation in the backend.
+         * Currently only honored by the GL backend.
+         */
+        public boolean disableParallelShaderCompile = false;
+
+        /**
+         * The type of technique for stereoscopic rendering.
+         *
+         * This setting determines the algorithm used when stereoscopic rendering is enabled. This
+         * decision applies to the entire Engine for the lifetime of the Engine. E.g., multiple
+         * Views created from the Engine must use the same stereoscopic type.
+         *
+         * Each view can enable stereoscopic rendering via the StereoscopicOptions::enable flag.
+         *
+         * @see View#setStereoscopicOptions
+         */
+        public StereoscopicType stereoscopicType = StereoscopicType.INSTANCED;
+
        /**
         * The number of eyes to render when stereoscopic rendering is enabled. Supported values are
         * between 1 and Engine#getMaxStereoscopicEyes() (inclusive).
@@ -356,6 +413,21 @@ public class Engine {
         * @see Engine#getMaxStereoscopicEyes
         */
        public long stereoscopicEyeCount = 2;
+
+        /*
+         * @Deprecated This value is no longer used.
+         */
+        public long resourceAllocatorCacheSizeMB = 64;
+
+        /*
+         * This value determines for how many frames are texture entries kept in the cache.
+         */
+        public long resourceAllocatorCacheMaxAge = 2;
+
+        /*
+         * Disable backend handles use-after-free checks.
+         */
+        public boolean disableHandleUseAfterFreeCheck = false;
    }

    private Engine(long nativeEngine, Config config) {
@@ -602,7 +674,7 @@ public class Engine {
     */
    @NonNull
    public SwapChain createSwapChain(@NonNull Object surface) {
-        return createSwapChain(surface, SwapChain.CONFIG_DEFAULT);
+        return createSwapChain(surface, SwapChainFlags.CONFIG_DEFAULT);
    }

    /**
@@ -610,15 +682,15 @@ public class Engine {
     *
     * @param surface on Android, <b>must be</b> an instance of {@link android.view.Surface}
     *
-     * @param flags configuration flags, see {@link SwapChain}
+     * @param flags configuration flags, see {@link SwapChainFlags}
     *
     * @return a newly created {@link SwapChain} object
     *
     * @exception IllegalStateException can be thrown if the SwapChain couldn't be created
     *
-     * @see SwapChain#CONFIG_DEFAULT
-     * @see SwapChain#CONFIG_TRANSPARENT
-     * @see SwapChain#CONFIG_READABLE
+     * @see SwapChainFlags#CONFIG_DEFAULT
+     * @see SwapChainFlags#CONFIG_TRANSPARENT
+     * @see SwapChainFlags#CONFIG_READABLE
     *
     */
    @NonNull
@@ -636,21 +708,22 @@ public class Engine {
     *
     * @param width  width of the rendering buffer
     * @param height height of the rendering buffer
-     * @param flags  configuration flags, see {@link SwapChain}
+     * @param flags  configuration flags, see {@link SwapChainFlags}
     *
     * @return a newly created {@link SwapChain} object
     *
     * @exception IllegalStateException can be thrown if the SwapChain couldn't be created
     *
-     * @see SwapChain#CONFIG_DEFAULT
-     * @see SwapChain#CONFIG_TRANSPARENT
-     * @see SwapChain#CONFIG_READABLE
+     * @see SwapChainFlags#CONFIG_DEFAULT
+     * @see SwapChainFlags#CONFIG_TRANSPARENT
+     * @see SwapChainFlags#CONFIG_READABLE
     *
     */
    @NonNull
    public SwapChain createSwapChain(int width, int height, long flags) {
        if (width >= 0 && height >= 0) {
-            long nativeSwapChain = nCreateSwapChainHeadless(getNativeObject(), width, height, flags);
+            long nativeSwapChain =
+                nCreateSwapChainHeadless(getNativeObject(), width, height, flags);
            if (nativeSwapChain == 0) throw new IllegalStateException("Couldn't create SwapChain");
            return new SwapChain(nativeSwapChain, null);
        }
@@ -662,11 +735,12 @@ public class Engine {
     *
     * @param surface a properly initialized {@link NativeSurface}
     *
-     * @param flags configuration flags, see {@link SwapChain}
+     * @param flags configuration flags, see {@link SwapChainFlags}
     *
     * @return a newly created {@link SwapChain} object
     *
-     * @exception IllegalStateException can be thrown if the {@link SwapChain} couldn't be created
+     * @exception IllegalStateException can be thrown if the {@link SwapChainFlags} couldn't be
+     *            created
     */
    @NonNull
    public SwapChain createSwapChainFromNativeSurface(@NonNull NativeSurface surface, long flags) {
@@ -1135,6 +1209,22 @@ public class Engine {
        nFlush(getNativeObject());
    }

+    /**
+     * Pause or resume the rendering thread.
+     *
+     * <p>Warning: This is an experimental API. In particular, note the following caveats.
+     *
+     * <ul><li>
+     * Buffer callbacks will never be called as long as the rendering thread is paused.
+     * Do not rely on a buffer callback to unpause the thread.
+     * </li><li>
+     * While the rendering thread is paused, rendering commands will continue to be queued until the
+     * buffer limit is reached. When the limit is reached, the program will abort.
+     * </li></ul>
+     */
+    public void setPaused(boolean paused) {
+        nSetPaused(getNativeObject(), paused);
+    }

    @UsedByReflection("TextureHelper.java")
    public long getNativeObject() {
@@ -1209,6 +1299,7 @@ public class Engine {
    private static native void nDestroyEntity(long nativeEngine, int entity);
    private static native void nFlushAndWait(long nativeEngine);
    private static native void nFlush(long nativeEngine);
+    private static native void nSetPaused(long nativeEngine, boolean paused);
    private static native long nGetTransformManager(long nativeEngine);
    private static native long nGetLightManager(long nativeEngine);
    private static native long nGetRenderableManager(long nativeEngine);
@@ -1227,8 +1318,12 @@ public class Engine {
    private static native void nSetBuilderConfig(long nativeBuilder, long commandBufferSizeMB,
            long perRenderPassArenaSizeMB, long driverHandleArenaSizeMB,
            long minCommandBufferSizeMB, long perFrameCommandsSizeMB, long jobSystemThreadCount,
-            long stereoscopicEyeCount);
+            long textureUseAfterFreePoolSize, boolean disableParallelShaderCompile,
+            int stereoscopicType, long stereoscopicEyeCount,
+            long resourceAllocatorCacheSizeMB, long resourceAllocatorCacheMaxAge,
+            boolean disableHandleUseAfterFreeCheck);
    private static native void nSetBuilderFeatureLevel(long nativeBuilder, int ordinal);
    private static native void nSetBuilderSharedContext(long nativeBuilder, long sharedContext);
+    private static native void nSetBuilderPaused(long nativeBuilder, boolean paused);
    private static native long nBuilderBuild(long nativeBuilder);
 }
--- a/android/filament-android/src/main/java/com/google/android/filament/RenderableManager.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/RenderableManager.java
@@ -175,6 +175,32 @@ public class RenderableManager {
            return this;
        }

+        /**
+         * Type of geometry for a Renderable
+         */
+        public enum GeometryType {
+            /** dynamic gemoetry has no restriction */
+            DYNAMIC,
+            /** bounds and world space transform are immutable */
+            STATIC_BOUNDS,
+            /** skinning/morphing not allowed and Vertex/IndexBuffer immutables */
+            STATIC
+        }
+
+        /**
+         * Specify whether this renderable has static bounds. In this context his means that
+         * the renderable's bounding box cannot change and that the renderable's transform is
+         * assumed immutable. Changing the renderable's transform via the TransformManager
+         * can lead to corrupted graphics. Note that skinning and morphing are not forbidden.
+         * Disabled by default.
+         * @param enable whether this renderable has static bounds. false by default.
+         */
+        @NonNull
+        public Builder geometryType(GeometryType type) {
+            nBuilderGeometryType(mNativeBuilder, type.ordinal());
+            return this;
+        }
+
        /**
         * Binds a material instance to the specified primitive.
         *
@@ -964,6 +990,7 @@ public class RenderableManager {
    private static native void nBuilderGeometry(long nativeBuilder, int index, int value, long nativeVertexBuffer, long nativeIndexBuffer);
    private static native void nBuilderGeometry(long nativeBuilder, int index, int value, long nativeVertexBuffer, long nativeIndexBuffer, int offset, int count);
    private static native void nBuilderGeometry(long nativeBuilder, int index, int value, long nativeVertexBuffer, long nativeIndexBuffer, int offset, int minIndex, int maxIndex, int count);
+    private static native void nBuilderGeometryType(long nativeBuilder, int type);
    private static native void nBuilderMaterial(long nativeBuilder, int index, long nativeMaterialInstance);
    private static native void nBuilderBlendOrder(long nativeBuilder, int index, int blendOrder);
    private static native void nBuilderGlobalBlendOrderEnabled(long nativeBuilder, int index, boolean enabled);
--- a/android/filament-android/src/main/java/com/google/android/filament/Renderer.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/Renderer.java
@@ -101,7 +101,7 @@ public class Renderer {
        /**
         * Desired frame interval in unit of 1 / DisplayInfo.refreshRate.
         */
-        public float interval = 1.0f / 60.0f;
+        public float interval = 1.0f;

        /**
         * Additional headroom for the GPU as a ratio of the targetFrameTime.
--- a/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/SwapChain.java
@@ -68,77 +68,30 @@ public class SwapChain {
    private final Object mSurface;
    private long mNativeObject;

-    public static final long CONFIG_DEFAULT = 0x0;
-
-    /**
-     * This flag indicates that the <code>SwapChain</code> must be allocated with an
-     * alpha-channel.
-     */
-    public static final long CONFIG_TRANSPARENT = 0x1;
-
-    /**
-     * This flag indicates that the <code>SwapChain</code> may be used as a source surface
-     * for reading back render results.  This config must be set when creating
-     * any <code>SwapChain</code>  that will be used as the source for a blit operation.
-     *
-     * @see Renderer#copyFrame
-     */
-    public static final long CONFIG_READABLE = 0x2;
-
-    /**
-     * Indicates that the native X11 window is an XCB window rather than an XLIB window.
-     * This is ignored on non-Linux platforms and in builds that support only one X11 API.
-     */
-    public static final long CONFIG_ENABLE_XCB = 0x4;
-
-    /**
-     * Indicates that the SwapChain must automatically perform linear to sRGB encoding.
-     *
-     * This flag is ignored if isSRGBSwapChainSupported() is false.
-     *
-     * When using this flag, post-processing should be disabled.
-     *
-     * @see SwapChain#isSRGBSwapChainSupported
-     * @see View#setPostProcessingEnabled
-     */
-    public static final long CONFIG_SRGB_COLORSPACE = 0x10;
-
-    /**
-     * Indicates that this SwapChain should allocate a stencil buffer in addition to a depth buffer.
-     *
-     * This flag is necessary when using View::setStencilBufferEnabled and rendering directly into
-     * the SwapChain (when post-processing is disabled).
-     *
-     * The specific format of the stencil buffer depends on platform support. The following pixel
-     * formats are tried, in order of preference:
-     *
-     * Depth only (without CONFIG_HAS_STENCIL_BUFFER):
-     * - DEPTH32F
-     * - DEPTH24
-     *
-     * Depth + stencil (with CONFIG_HAS_STENCIL_BUFFER):
-     * - DEPTH32F_STENCIL8
-     * - DEPTH24F_STENCIL8
-     *
-     * Note that enabling the stencil buffer may hinder depth precision and should only be used if
-     * necessary.
-     *
-     * @see View#setStencilBufferEnabled
-     * @see View#setPostProcessingEnabled
-     */
-    public static final long CONFIG_HAS_STENCIL_BUFFER = 0x20;
-
    SwapChain(long nativeSwapChain, Object surface) {
        mNativeObject = nativeSwapChain;
        mSurface = surface;
    }

    /**
-     * Return whether createSwapChain supports the SWAP_CHAIN_CONFIG_SRGB_COLORSPACE flag.
+     * Return whether createSwapChain supports the CONFIG_PROTECTED_CONTENT flag.
     * The default implementation returns false.
     *
     * @param engine A reference to the filament Engine
-     * @return true if SWAP_CHAIN_CONFIG_SRGB_COLORSPACE is supported, false otherwise.
+     * @return true if CONFIG_PROTECTED_CONTENT is supported, false otherwise.
+     * @see SwapChainFlags#CONFIG_PROTECTED_CONTENT
+     */
+    public static boolean isProtectedContentSupported(@NonNull Engine engine) {
+        return nIsProtectedContentSupported(engine.getNativeObject());
+    }
+
+    /**
+     * Return whether createSwapChain supports the CONFIG_SRGB_COLORSPACE flag.
+     * The default implementation returns false.
+     *
+     * @param engine A reference to the filament Engine
+     * @return true if CONFIG_SRGB_COLORSPACE is supported, false otherwise.
+     * @see SwapChainFlags#CONFIG_SRGB_COLORSPACE
     */
    public static boolean isSRGBSwapChainSupported(@NonNull Engine engine) {
        return nIsSRGBSwapChainSupported(engine.getNativeObject());
@@ -186,4 +139,5 @@ public class SwapChain {

    private static native void nSetFrameCompletedCallback(long nativeSwapChain, Object handler, Runnable callback);
    private static native boolean nIsSRGBSwapChainSupported(long nativeEngine);
+    private static native boolean nIsProtectedContentSupported(long nativeEngine);
 }
--- a/android/filament-android/src/main/java/com/google/android/filament/SwapChainFlags.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/SwapChainFlags.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.android.filament;
+
+// Note: SwapChainFlags is kept separate from SwapChain so that UiHelper does not need to depend
+// on SwapChain. This allows clients to use UiHelper without requiring all of Filament's Java
+// classes.
+
+/**
+ * Flags that a <code>SwapChain</code> can be created with to control behavior.
+ *
+ * @see Engine#createSwapChain
+ * @see Engine#createSwapChainFromNativeSurface
+ */
+public final class SwapChainFlags {
+
+    public static final long CONFIG_DEFAULT = 0x0;
+
+    /**
+     * This flag indicates that the <code>SwapChain</code> must be allocated with an
+     * alpha-channel.
+     */
+    public static final long CONFIG_TRANSPARENT = 0x1;
+
+    /**
+     * This flag indicates that the <code>SwapChain</code> may be used as a source surface
+     * for reading back render results.  This config must be set when creating
+     * any <code>SwapChain</code>  that will be used as the source for a blit operation.
+     *
+     * @see Renderer#copyFrame
+     */
+    public static final long CONFIG_READABLE = 0x2;
+
+    /**
+     * Indicates that the native X11 window is an XCB window rather than an XLIB window.
+     * This is ignored on non-Linux platforms and in builds that support only one X11 API.
+     */
+    public static final long CONFIG_ENABLE_XCB = 0x4;
+
+    /**
+     * Indicates that the SwapChain must automatically perform linear to sRGB encoding.
+     *
+     * This flag is ignored if isSRGBSwapChainSupported() is false.
+     *
+     * When using this flag, post-processing should be disabled.
+     *
+     * @see SwapChain#isSRGBSwapChainSupported
+     * @see View#setPostProcessingEnabled
+     */
+    public static final long CONFIG_SRGB_COLORSPACE = 0x10;
+
+    /**
+     * Indicates that this SwapChain should allocate a stencil buffer in addition to a depth buffer.
+     *
+     * This flag is necessary when using View::setStencilBufferEnabled and rendering directly into
+     * the SwapChain (when post-processing is disabled).
+     *
+     * The specific format of the stencil buffer depends on platform support. The following pixel
+     * formats are tried, in order of preference:
+     *
+     * Depth only (without CONFIG_HAS_STENCIL_BUFFER):
+     * - DEPTH32F
+     * - DEPTH24
+     *
+     * Depth + stencil (with CONFIG_HAS_STENCIL_BUFFER):
+     * - DEPTH32F_STENCIL8
+     * - DEPTH24F_STENCIL8
+     *
+     * Note that enabling the stencil buffer may hinder depth precision and should only be used if
+     * necessary.
+     *
+     * @see View#setStencilBufferEnabled
+     * @see View#setPostProcessingEnabled
+     */
+    public static final long CONFIG_HAS_STENCIL_BUFFER = 0x20;
+
+    /**
+     * The SwapChain contains protected content. Only supported when isProtectedContentSupported()
+     * is true.
+     */
+    public static final long CONFIG_PROTECTED_CONTENT   = 0x40;
+}
+
--- a/android/filament-android/src/main/java/com/google/android/filament/ToneMapper.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/ToneMapper.java
@@ -16,12 +16,14 @@ package com.google.android.filament;
 * <li>Configurable tone mapping operators</li>
 * <ul>
 *   <li>GenericToneMapper</li>
+ *   <li>AgXToneMapper</li>
 * </ul>
 * <li>Fixed-aesthetic tone mapping operators</li>
 * <ul>
 *   <li>ACESToneMapper</li>
 *   <li>ACESLegacyToneMapper</li>
 *   <li>FilmicToneMapper</li>
+ *   <li>PBRNeutralToneMapper</li>
 * </ul>
 * <li>Debug/validation tone mapping operators</li>
 * <ul>
@@ -100,11 +102,21 @@ public class ToneMapper {
        }
    }

+    /**
+     * Khronos PBR Neutral tone mapping operator. This tone mapper was designed
+     * to preserve the appearance of materials across lighting conditions while
+     * avoiding artifacts in the highlights in high dynamic range conditions.
+     */
+    public static class PBRNeutralToneMapper extends ToneMapper {
+        public PBRNeutralToneMapper() {
+            super(nCreatePBRNeutralToneMapper());
+        }
+    }
+
    /**
     * AgX tone mapping operator.
     */
    public static class Agx extends ToneMapper {
-
        public enum AgxLook {
            /**
             * Base contrast with no look applied
@@ -233,6 +245,7 @@ public class ToneMapper {
    private static native long nCreateACESToneMapper();
    private static native long nCreateACESLegacyToneMapper();
    private static native long nCreateFilmicToneMapper();
+    private static native long nCreatePBRNeutralToneMapper();
    private static native long nCreateAgxToneMapper(int look);
    private static native long nCreateGenericToneMapper(
            float contrast, float midGrayIn, float midGrayOut, float hdrMax);
--- a/android/filament-android/src/main/java/com/google/android/filament/android/UiHelper.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/android/UiHelper.java
@@ -27,7 +27,7 @@ import android.view.SurfaceHolder;
 import android.view.SurfaceView;
 import android.view.TextureView;

-import com.google.android.filament.SwapChain;
+import com.google.android.filament.SwapChainFlags;

 /**
 * UiHelper is a simple class that can manage either a SurfaceView, TextureView, or a SurfaceHolder
@@ -538,7 +538,7 @@ public class UiHelper {
     * the options set on this UiHelper.
     */
    public long getSwapChainFlags() {
-        return isOpaque() ? SwapChain.CONFIG_DEFAULT : SwapChain.CONFIG_TRANSPARENT;
+        return isOpaque() ? SwapChainFlags.CONFIG_DEFAULT : SwapChainFlags.CONFIG_TRANSPARENT;
    }

    /**
--- a/android/gltfio-android/CMakeLists.txt
+++ b/android/gltfio-android/CMakeLists.txt
@@ -81,9 +81,17 @@ set(GLTFIO_SRCS
        ${GLTFIO_DIR}/src/TangentsJob.cpp
        ${GLTFIO_DIR}/src/TangentsJob.h
        ${GLTFIO_DIR}/src/UbershaderProvider.cpp
+        ${GLTFIO_DIR}/src/Utility.cpp
+        ${GLTFIO_DIR}/src/Utility.h
        ${GLTFIO_DIR}/src/Wireframe.cpp
        ${GLTFIO_DIR}/src/Wireframe.h
        ${GLTFIO_DIR}/src/downcast.h
+        ${GLTFIO_DIR}/src/extended/AssetLoaderExtended.h
+        ${GLTFIO_DIR}/src/extended/TangentsJobExtended.cpp
+        ${GLTFIO_DIR}/src/extended/TangentsJobExtended.h
+        ${GLTFIO_DIR}/src/extended/TangentSpaceMeshWrapper.cpp
+        ${GLTFIO_DIR}/src/extended/TangentSpaceMeshWrapper.h
+

        src/main/cpp/Animator.cpp
        src/main/cpp/AssetLoader.cpp
--- a/android/gradle.properties
+++ b/android/gradle.properties
@@ -1,5 +1,5 @@
 GROUP=com.google.android.filament
-VERSION_NAME=1.50.1
+VERSION_NAME=1.51.3

 POM_DESCRIPTION=Real-time physically based rendering engine for Android.

--- a/android/samples/sample-gltf-viewer/build.gradle
+++ b/android/samples/sample-gltf-viewer/build.gradle
@@ -30,7 +30,7 @@ android {
    compileSdkVersion versions.compileSdk
    defaultConfig {
        applicationId "com.google.android.filament.gltf"
-        minSdkVersion 19
+        minSdkVersion versions.minSdk
        targetSdkVersion versions.targetSdk
    }

--- a/build.sh
+++ b/build.sh
@@ -516,7 +516,7 @@ function build_android {
            if [[ "${BUILD_ANDROID_SAMPLES}" == "true" ]]; then
                for sample in ${ANDROID_SAMPLES}; do
                    echo "Installing out/${sample}-debug.apk"
-                    cp samples/${sample}/build/outputs/apk/debug/${sample}-debug-unsigned.apk \
+                    cp samples/${sample}/build/outputs/apk/debug/${sample}-debug.apk \
                        ../out/${sample}-debug.apk
                done
            fi
--- a/build/android/ndk.version
+++ b/build/android/ndk.version
@@ -1 +1 @@
-25.1.8937393
+26.1.10909125
--- a/build/linux/combine-static-libs.sh
+++ b/build/linux/combine-static-libs.sh
@@ -125,7 +125,13 @@ if [[ "${has_universal}" == "true" ]]; then

        arch_output="${OUTPUT_PATH%.a}_${arch}.a"
        arch_outputs+=("$arch_output")
-        combine_static_libs "$arch_output" $(find "$(pwd)/${archs_temp_dir}/${arch}" -iname '*.a')
+
+        archives=()
+        while IFS=  read -r -d $'\0'; do
+            archives+=("$REPLY")
+        done < <(find "$(pwd)/${archs_temp_dir}/${arch}" -iname '*.a' -print0)
+
+        combine_static_libs "$arch_output" "$archives"
    done

    # Finally, combine the single-architecture archives into a universal binary.
--- a/build/toolchain-arm7-linux-android.cmake
+++ b/build/toolchain-arm7-linux-android.cmake
@@ -21,7 +21,7 @@ set(CMAKE_SYSTEM_NAME Linux)
 set(CMAKE_SYSTEM_VERSION 1)

 # android
-set(API_LEVEL 19)
+set(API_LEVEL 21)

 # architecture
 set(ARCH armv7a-linux-androideabi)
--- a/build/toolchain-x86-linux-android.cmake
+++ b/build/toolchain-x86-linux-android.cmake
@@ -21,7 +21,7 @@ set(CMAKE_SYSTEM_NAME Linux)
 set(CMAKE_SYSTEM_VERSION 1)

 # android
-set(API_LEVEL 19)
+set(API_LEVEL 21)

 # architecture
 set(ARCH i686-linux-android)
--- a/docs/Materials.md.html
+++ b/docs/Materials.md.html
@@ -1397,7 +1397,7 @@ Type
 :    `string`

 Value
-:     Any of `opaque`, `transparent`, `fade`, `add`, `masked`, `multiply`, `screen`. Defaults to `opaque`.
+:     Any of `opaque`, `transparent`, `fade`, `add`, `masked`, `multiply`, `screen`, `custom`. Defaults to `opaque`.

 Description
 :     Defines how/if the rendered object is blended with the content of the render target.
@@ -1420,6 +1420,7 @@ Description
      of the material's output defines whether a fragment is discarded or not. Additionally,
      ALPHA_TO_COVERAGE is enabled for non-translucent views. See the maskThreshold section for more
      information.
+    - **Custom**: blending is enabled. But the blending function is user specified. See `blendFunction`.

 !!! Note
    When `blending` is set to `masked`, alpha to coverage is automatically enabled for the material.
@@ -1432,6 +1433,36 @@ material {
 }
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+### Blending and transparency: blendFunction
+
+Type
+:    `object`
+
+Fields
+:     `srcRGB`, `srcA`, `dstRGB`, `dstA`
+
+Description
+:       - *srcRGB*: source function applied to the RGB channels
+        - *srcA*: source function applied to the alpha channel
+        - *srcRGB*: destination function applied to the RGB channels
+        - *srcRGB*: destination function applied to the alpha channel
+        The values possible for each functions are one of `zero`, `one`, `srcColor`, `oneMinusSrcColor`,
+               `dstColor`, `oneMinusDstColor`, `srcAlpha`, `oneMinusSrcAlpha`, `dstAlpha`,
+               `oneMinusDstAlpha`, `srcAlphaSaturate`
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ JSON
+material {
+    blending : custom,
+    blendFunction :
+    {
+        srcRGB: one,
+        srcA: one,
+        dstRGB: oneMinusSrcColor,
+        dstA: oneMinusSrcAlpha
+    }
+ }
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 ### Blending and transparency: postLightingBlending

 Type
--- a/filament/CMakeLists.txt
+++ b/filament/CMakeLists.txt
@@ -66,6 +66,7 @@ set(SRCS
        src/Froxelizer.cpp
        src/Frustum.cpp
        src/HwRenderPrimitiveFactory.cpp
+        src/HwVertexBufferInfoFactory.cpp
        src/IndexBuffer.cpp
        src/IndirectLight.cpp
        src/InstanceBuffer.cpp
@@ -137,6 +138,7 @@ set(SRCS

 set(PRIVATE_HDRS
        src/Allocators.h
+        src/Bimap.h
        src/BufferPoolAllocator.h
        src/ColorSpaceUtils.h
        src/Culler.h
@@ -147,6 +149,7 @@ set(PRIVATE_HDRS
        src/FrameSkipper.h
        src/Froxelizer.h
        src/HwRenderPrimitiveFactory.h
+        src/HwVertexBufferInfoFactory.h
        src/Intersections.h
        src/MaterialParser.h
        src/PerViewUniforms.h
@@ -212,6 +215,7 @@ set(MATERIAL_SRCS
        src/materials/antiAliasing/fxaa.mat
        src/materials/antiAliasing/taa.mat
        src/materials/blitLow.mat
+        src/materials/blitArray.mat
        src/materials/bloom/bloomDownsample.mat
        src/materials/bloom/bloomDownsample2x.mat
        src/materials/bloom/bloomDownsample9.mat
@@ -248,8 +252,13 @@ set(MATERIAL_SRCS
 )

 set(MATERIAL_FL0_SRCS
-        src/materials/defaultMaterial0.mat
-        src/materials/skybox0.mat
+        src/materials/defaultMaterial.mat
+        src/materials/skybox.mat
+)
+
+set(MATERIAL_MULTIVIEW_SRCS
+        src/materials/defaultMaterial.mat
+        src/materials/skybox.mat
 )

 # Embed the binary resource blob for materials.
@@ -281,6 +290,11 @@ if (FILAMENT_ENABLE_FEATURE_LEVEL_0)
    add_definitions(-DFILAMENT_ENABLE_FEATURE_LEVEL_0)
 endif()

+# Whether to include MULTIVIEW materials.
+if (FILAMENT_ENABLE_MULTIVIEW)
+    add_definitions(-DFILAMENT_ENABLE_MULTIVIEW)
+endif()
+
 # ==================================================================================================
 # Definitions
 # ==================================================================================================
@@ -311,33 +325,42 @@ foreach (mat_src ${MATERIAL_SRCS})
    get_filename_component(localname "${mat_src}" NAME_WE)
    get_filename_component(fullname "${mat_src}" ABSOLUTE)
    set(output_path "${MATERIAL_DIR}/${localname}.filamat")
-
    add_custom_command(
            OUTPUT ${output_path}
            COMMAND matc ${MATC_BASE_FLAGS} -o ${output_path} ${fullname}
            MAIN_DEPENDENCY ${fullname}
            DEPENDS matc
-            COMMENT "Compiling material ${mat_src} to ${output_path}"
+            COMMENT "Compiling material ${fullname} to ${output_path}"
    )
    list(APPEND MATERIAL_BINS ${output_path})
-endforeach()
-
-if (FILAMENT_ENABLE_FEATURE_LEVEL_0)
-    foreach (mat_src ${MATERIAL_FL0_SRCS})
-        get_filename_component(localname "${mat_src}" NAME_WE)
-        get_filename_component(fullname "${mat_src}" ABSOLUTE)
-        set(output_path "${MATERIAL_DIR}/${localname}.filamat")

+    list(FIND MATERIAL_FL0_SRCS ${mat_src} index)
+    if (${index} GREATER -1 AND FILAMENT_ENABLE_FEATURE_LEVEL_0)
+        string(REGEX REPLACE "[.]filamat$" "_fl0.filamat" output_path_fl0 ${output_path})
        add_custom_command(
-                OUTPUT ${output_path}
-                COMMAND matc ${MATC_BASE_FLAGS} -o ${output_path} ${fullname}
+                OUTPUT ${output_path_fl0}
+                COMMAND matc ${MATC_BASE_FLAGS} -PfeatureLevel=0 -o ${output_path_fl0} ${fullname}
                MAIN_DEPENDENCY ${fullname}
                DEPENDS matc
-                COMMENT "Compiling material ${mat_src} to ${output_path}"
+                COMMENT "Compiling material ${fullname} to ${output_path_fl0}"
        )
-        list(APPEND MATERIAL_BINS ${output_path})
-    endforeach ()
-endif ()
+        list(APPEND MATERIAL_BINS ${output_path_fl0})
+    endif ()
+
+    list(FIND MATERIAL_MULTIVIEW_SRCS ${mat_src} index)
+    if (${index} GREATER -1 AND FILAMENT_ENABLE_MULTIVIEW)
+        string(REGEX REPLACE "[.]filamat$" "_multiview.filamat" output_path_multiview ${output_path})
+        add_custom_command(
+                OUTPUT ${output_path_multiview}
+                COMMAND matc ${MATC_BASE_FLAGS} -PstereoscopicType=multiview -o ${output_path_multiview} ${fullname}
+                MAIN_DEPENDENCY ${fullname}
+                DEPENDS matc
+                COMMENT "Compiling material ${fullname} to ${output_path_multiview}"
+        )
+        list(APPEND MATERIAL_BINS ${output_path_multiview})
+    endif ()
+
+endforeach()

 # Additional dependencies on included files for materials

--- a/filament/backend/CMakeLists.txt
+++ b/filament/backend/CMakeLists.txt
@@ -71,6 +71,8 @@ if (FILAMENT_SUPPORTS_OPENGL AND NOT FILAMENT_USE_EXTERNAL_GLES3 AND NOT FILAMEN
            include/backend/platforms/OpenGLPlatform.h
            src/opengl/gl_headers.cpp
            src/opengl/gl_headers.h
+            src/opengl/GLBufferObject.h
+            src/opengl/GLTexture.h
            src/opengl/GLUtils.cpp
            src/opengl/GLUtils.h
            src/opengl/OpenGLBlobCache.cpp
@@ -166,6 +168,10 @@ endif()
 if (FILAMENT_SUPPORTS_VULKAN)
    list(APPEND SRCS
            include/backend/platforms/VulkanPlatform.h
+            src/vulkan/caching/VulkanDescriptorSetManager.cpp
+            src/vulkan/caching/VulkanDescriptorSetManager.h
+            src/vulkan/caching/VulkanPipelineLayoutCache.cpp
+            src/vulkan/caching/VulkanPipelineLayoutCache.h
            src/vulkan/platform/VulkanPlatform.cpp
            src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp
            src/vulkan/platform/VulkanPlatformSwapChainImpl.h
--- a/filament/backend/include/backend/DriverEnums.h
+++ b/filament/backend/include/backend/DriverEnums.h
@@ -81,7 +81,14 @@ static constexpr uint64_t SWAP_CHAIN_CONFIG_SRGB_COLORSPACE     = 0x10;
 /**
 * Indicates that the SwapChain should also contain a stencil component.
 */
-static constexpr uint64_t SWAP_CHAIN_HAS_STENCIL_BUFFER         = 0x20;
+static constexpr uint64_t SWAP_CHAIN_CONFIG_HAS_STENCIL_BUFFER  = 0x20;
+static constexpr uint64_t SWAP_CHAIN_HAS_STENCIL_BUFFER         = SWAP_CHAIN_CONFIG_HAS_STENCIL_BUFFER;
+
+/**
+ * The SwapChain contains protected content. Currently only supported by OpenGLPlatform and
+ * only when OpenGLPlatform::isProtectedContextSupported() is true.
+ */
+static constexpr uint64_t SWAP_CHAIN_CONFIG_PROTECTED_CONTENT   = 0x40;


 static constexpr size_t MAX_VERTEX_ATTRIBUTE_COUNT  = 16;   // This is guaranteed by OpenGL ES.
@@ -129,6 +136,12 @@ enum class Backend : uint8_t {
    NOOP = 4,     //!< Selects the no-op driver for testing purposes.
 };

+enum class TimerQueryResult : int8_t {
+    ERROR = -1,     // an error occurred, result won't be available
+    NOT_READY = 0,  // result to ready yet
+    AVAILABLE = 1,  // result is available
+};
+
 static constexpr const char* backendToString(Backend backend) {
    switch (backend) {
        case Backend::NOOP:
@@ -659,16 +672,17 @@ enum class TextureFormat : uint16_t {
 };

 //! Bitmask describing the intended Texture Usage
-enum class TextureUsage : uint8_t {
-    NONE                = 0x00,
-    COLOR_ATTACHMENT    = 0x01,                     //!< Texture can be used as a color attachment
-    DEPTH_ATTACHMENT    = 0x02,                     //!< Texture can be used as a depth attachment
-    STENCIL_ATTACHMENT  = 0x04,                     //!< Texture can be used as a stencil attachment
-    UPLOADABLE          = 0x08,                     //!< Data can be uploaded into this texture (default)
-    SAMPLEABLE          = 0x10,                     //!< Texture can be sampled (default)
-    SUBPASS_INPUT       = 0x20,                     //!< Texture can be used as a subpass input
-    BLIT_SRC            = 0x40,                     //!< Texture can be used the source of a blit()
-    BLIT_DST            = 0x80,                     //!< Texture can be used the destination of a blit()
+enum class TextureUsage : uint16_t {
+    NONE                = 0x0000,
+    COLOR_ATTACHMENT    = 0x0001,            //!< Texture can be used as a color attachment
+    DEPTH_ATTACHMENT    = 0x0002,            //!< Texture can be used as a depth attachment
+    STENCIL_ATTACHMENT  = 0x0004,            //!< Texture can be used as a stencil attachment
+    UPLOADABLE          = 0x0008,            //!< Data can be uploaded into this texture (default)
+    SAMPLEABLE          = 0x0010,            //!< Texture can be sampled (default)
+    SUBPASS_INPUT       = 0x0020,            //!< Texture can be used as a subpass input
+    BLIT_SRC            = 0x0040,            //!< Texture can be used the source of a blit()
+    BLIT_DST            = 0x0080,            //!< Texture can be used the destination of a blit()
+    PROTECTED           = 0x0100,            //!< Texture can be used the destination of a blit()
    DEFAULT             = UPLOADABLE | SAMPLEABLE   //!< Default texture usage
 };

@@ -1172,11 +1186,27 @@ struct StencilState {

    //! Stencil operations for front-facing polygons
    StencilOperations front = {
-            .stencilFunc = StencilFunction::A, .ref = 0, .readMask = 0xff, .writeMask = 0xff };
+            .stencilFunc = StencilFunction::A,
+            .stencilOpStencilFail = StencilOperation::KEEP,
+            .padding0 = 0,
+            .stencilOpDepthFail = StencilOperation::KEEP,
+            .stencilOpDepthStencilPass = StencilOperation::KEEP,
+            .padding1 = 0,
+            .ref = 0,
+            .readMask = 0xff,
+            .writeMask = 0xff };

    //! Stencil operations for back-facing polygons
    StencilOperations back  = {
-            .stencilFunc = StencilFunction::A, .ref = 0, .readMask = 0xff, .writeMask = 0xff };
+            .stencilFunc = StencilFunction::A,
+            .stencilOpStencilFail = StencilOperation::KEEP,
+            .padding0 = 0,
+            .stencilOpDepthFail = StencilOperation::KEEP,
+            .stencilOpDepthStencilPass = StencilOperation::KEEP,
+            .padding1 = 0,
+            .ref = 0,
+            .readMask = 0xff,
+            .writeMask = 0xff };

    //! Whether stencil-buffer writes are enabled
    bool stencilWrite = false;
@@ -1212,6 +1242,14 @@ enum class Workaround : uint16_t {
    DISABLE_THREAD_AFFINITY
 };

+//! The type of technique for stereoscopic rendering
+enum class StereoscopicType : uint8_t {
+    // Stereoscopic rendering is performed using instanced rendering technique.
+    INSTANCED,
+    // Stereoscopic rendering is performed using the multiview feature from the graphics backend.
+    MULTIVIEW,
+};
+
 } // namespace filament::backend

 template<> struct utils::EnableBitMaskOperators<filament::backend::ShaderStageFlags>
--- a/filament/backend/include/backend/Handle.h
+++ b/filament/backend/include/backend/Handle.h
@@ -39,6 +39,7 @@ struct HwStream;
 struct HwSwapChain;
 struct HwTexture;
 struct HwTimerQuery;
+struct HwVertexBufferInfo;
 struct HwVertexBuffer;

 /*
@@ -62,14 +63,6 @@ public:
    // clear the handle, this doesn't free associated resources
    void clear() noexcept { object = nullid; }

-    // compare handles
-    bool operator==(const HandleBase& rhs) const noexcept { return object == rhs.object; }
-    bool operator!=(const HandleBase& rhs) const noexcept { return object != rhs.object; }
-    bool operator<(const HandleBase& rhs) const noexcept { return object < rhs.object; }
-    bool operator<=(const HandleBase& rhs) const noexcept { return object <= rhs.object; }
-    bool operator>(const HandleBase& rhs) const noexcept { return object > rhs.object; }
-    bool operator>=(const HandleBase& rhs) const noexcept { return object >= rhs.object; }
-
    // get this handle's handleId
    HandleId getId() const noexcept { return object; }

@@ -101,6 +94,14 @@ struct Handle : public HandleBase {

    explicit Handle(HandleId id) noexcept : HandleBase(id) { }

+    // compare handles of the same type
+    bool operator==(const Handle& rhs) const noexcept { return getId() == rhs.getId(); }
+    bool operator!=(const Handle& rhs) const noexcept { return getId() != rhs.getId(); }
+    bool operator<(const Handle& rhs) const noexcept { return getId() < rhs.getId(); }
+    bool operator<=(const Handle& rhs) const noexcept { return getId() <= rhs.getId(); }
+    bool operator>(const Handle& rhs) const noexcept { return getId() > rhs.getId(); }
+    bool operator>=(const Handle& rhs) const noexcept { return getId() >= rhs.getId(); }
+
    // type-safe Handle cast
    template<typename B, typename = std::enable_if_t<std::is_base_of<T, B>::value> >
    Handle(Handle<B> const& base) noexcept : HandleBase(base) { } // NOLINT(hicpp-explicit-conversions,google-explicit-constructor)
@@ -114,18 +115,19 @@ private:

 // Types used by the command stream
 // (we use this renaming because the macro-system doesn't deal well with "<" and ">")
-using BufferObjectHandle    = Handle<HwBufferObject>;
-using FenceHandle           = Handle<HwFence>;
-using IndexBufferHandle     = Handle<HwIndexBuffer>;
-using ProgramHandle         = Handle<HwProgram>;
-using RenderPrimitiveHandle = Handle<HwRenderPrimitive>;
-using RenderTargetHandle    = Handle<HwRenderTarget>;
-using SamplerGroupHandle    = Handle<HwSamplerGroup>;
-using StreamHandle          = Handle<HwStream>;
-using SwapChainHandle       = Handle<HwSwapChain>;
-using TextureHandle         = Handle<HwTexture>;
-using TimerQueryHandle      = Handle<HwTimerQuery>;
-using VertexBufferHandle    = Handle<HwVertexBuffer>;
+using BufferObjectHandle     = Handle<HwBufferObject>;
+using FenceHandle            = Handle<HwFence>;
+using IndexBufferHandle      = Handle<HwIndexBuffer>;
+using ProgramHandle          = Handle<HwProgram>;
+using RenderPrimitiveHandle  = Handle<HwRenderPrimitive>;
+using RenderTargetHandle     = Handle<HwRenderTarget>;
+using SamplerGroupHandle     = Handle<HwSamplerGroup>;
+using StreamHandle           = Handle<HwStream>;
+using SwapChainHandle        = Handle<HwSwapChain>;
+using TextureHandle          = Handle<HwTexture>;
+using TimerQueryHandle       = Handle<HwTimerQuery>;
+using VertexBufferHandle     = Handle<HwVertexBuffer>;
+using VertexBufferInfoHandle = Handle<HwVertexBufferInfo>;

 } // namespace filament::backend

--- a/filament/backend/include/backend/PipelineState.h
+++ b/filament/backend/include/backend/PipelineState.h
@@ -29,11 +29,13 @@ namespace filament::backend {
 //! \privatesection

 struct PipelineState {
-    Handle<HwProgram> program;
-    RasterState rasterState;
-    StencilState stencilState;
-    PolygonOffset polygonOffset;
-    Viewport scissor{ 0, 0, (uint32_t)INT32_MAX, (uint32_t)INT32_MAX };
+    Handle<HwProgram> program;                                              //  4
+    Handle<HwVertexBufferInfo> vertexBufferInfo;                            //  4
+    RasterState rasterState;                                                //  4
+    StencilState stencilState;                                              // 12
+    PolygonOffset polygonOffset;                                            //  8
+    PrimitiveType primitiveType = PrimitiveType::TRIANGLES;                 //  1
+    uint8_t padding[3] = {};                                                //  3
 };

 } // namespace filament::backend
--- a/filament/backend/include/backend/Platform.h
+++ b/filament/backend/include/backend/Platform.h
@@ -23,6 +23,7 @@
 #include <utils/Invocable.h>

 #include <stddef.h>
+#include <stdint.h>

 namespace filament::backend {

@@ -56,9 +57,14 @@ public:

        /**
         * Set to `true` to forcibly disable parallel shader compilation in the backend.
-         * Currently only honored by the GL backend.
+         * Currently only honored by the GL and Metal backends.
         */
        bool disableParallelShaderCompile = false;
+
+        /**
+         * Disable backend handles use-after-free checks.
+         */
+        bool disableHandleUseAfterFreeCheck = false;
    };

    Platform() noexcept;
@@ -84,7 +90,7 @@ public:
     *
     * @return nullptr on failure, or a pointer to the newly created driver.
     */
-    virtual backend::Driver* createDriver(void* sharedContext,
+    virtual backend::Driver* UTILS_NULLABLE createDriver(void* UTILS_NULLABLE sharedContext,
            const DriverConfig& driverConfig) noexcept = 0;

    /**
@@ -102,7 +108,8 @@ public:
     * cache.
     */
    using InsertBlobFunc = utils::Invocable<
-            void(const void* key, size_t keySize, const void* value, size_t valueSize)>;
+            void(const void* UTILS_NONNULL key, size_t keySize,
+                    const void* UTILS_NONNULL value, size_t valueSize)>;

    /*
     * RetrieveBlobFunc is an Invocable to an application-provided function that a
@@ -110,7 +117,8 @@ public:
     * cache.
     */
    using RetrieveBlobFunc = utils::Invocable<
-            size_t(const void* key, size_t keySize, void* value, size_t valueSize)>;
+            size_t(const void* UTILS_NONNULL key, size_t keySize,
+                    void* UTILS_NONNULL value, size_t valueSize)>;

    /**
     * Sets the callback functions that the backend can use to interact with caching functionality
@@ -163,7 +171,8 @@ public:
     * @param value         pointer to the beginning of the value data that is to be inserted
     * @param valueSize     specifies the size in byte of the data pointed to by <value>
     */
-    void insertBlob(const void* key, size_t keySize, const void* value, size_t valueSize);
+    void insertBlob(const void* UTILS_NONNULL key, size_t keySize,
+            const void* UTILS_NONNULL value, size_t valueSize);

    /**
     * To retrieve the binary value associated with a given key from the cache, a
@@ -182,11 +191,43 @@ public:
     * @return             If the cache contains a value associated with the given key then the
     *                     size of that binary value in bytes is returned. Otherwise 0 is returned.
     */
-    size_t retrieveBlob(const void* key, size_t keySize, void* value, size_t valueSize);
+    size_t retrieveBlob(const void* UTILS_NONNULL key, size_t keySize,
+            void* UTILS_NONNULL value, size_t valueSize);
+
+    using DebugUpdateStatFunc = utils::Invocable<void(const char* UTILS_NONNULL key, uint64_t value)>;
+
+    /**
+     * Sets the callback function that the backend can use to update backend-specific statistics
+     * to aid with debugging. This callback is guaranteed to be called on the Filament driver
+     * thread.
+     *
+     * @param debugUpdateStat   an Invocable that updates debug statistics
+     */
+    void setDebugUpdateStatFunc(DebugUpdateStatFunc&& debugUpdateStat) noexcept;
+
+    /**
+     * @return true if debugUpdateStat is valid.
+     */
+    bool hasDebugUpdateStatFunc() const noexcept;
+
+    /**
+     * To track backend-specific statistics, the backend implementation can call the
+     * application-provided callback function debugUpdateStatFunc to associate or update a value
+     * with a given key. It is possible for this function to be called multiple times with the
+     * same key, in which case newer values should overwrite older values.
+     *
+     * This function is guaranteed to be called only on a single thread, the Filament driver
+     * thread.
+     *
+     * @param key          a null-terminated C-string with the key of the debug statistic
+     * @param value        the updated value of key
+     */
+    void debugUpdateStat(const char* UTILS_NONNULL key, uint64_t value);

 private:
    InsertBlobFunc mInsertBlob;
    RetrieveBlobFunc mRetrieveBlob;
+    DebugUpdateStatFunc mDebugUpdateStat;
 };

 } // namespace filament
--- a/filament/backend/include/backend/Program.h
+++ b/filament/backend/include/backend/Program.h
@@ -116,6 +116,8 @@ public:

    Program& cacheId(uint64_t cacheId) noexcept;

+    Program& multiview(bool multiview) noexcept;
+
    ShaderSource const& getShadersSource() const noexcept { return mShadersSource; }
    ShaderSource& getShadersSource() noexcept { return mShadersSource; }

@@ -143,6 +145,8 @@ public:

    uint64_t getCacheId() const noexcept { return mCacheId; }

+    bool isMultiview() const noexcept { return mMultiview; }
+
    CompilerPriorityQueue getPriorityQueue() const noexcept { return mPriorityQueue; }

 private:
@@ -158,6 +162,11 @@ private:
    utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>> mAttributes;
    std::array<UniformInfo, Program::UNIFORM_BINDING_COUNT> mBindingUniformInfo;
    CompilerPriorityQueue mPriorityQueue = CompilerPriorityQueue::HIGH;
+    // Indicates the current engine was initialized with multiview stereo, and the variant for this
+    // program contains STE flag. This will be referred later for the OpenGL shader compiler to
+    // determine whether shader code replacement for the num_views should be performed.
+    // This variable could be promoted as a more generic variable later if other similar needs occur.
+    bool mMultiview = false;
 };

 } // namespace filament::backend
--- a/filament/backend/include/backend/TargetBufferInfo.h
+++ b/filament/backend/include/backend/TargetBufferInfo.h
@@ -32,6 +32,10 @@ struct TargetBufferInfo {
    // texture to be used as render target
    Handle<HwTexture> handle;

+    // starting layer index for multiview. This value is only used when the `layerCount` for the
+    // render target is greater than 1.
+    uint8_t baseViewIndex = 0;
+
    // level to be used
    uint8_t level = 0;

@@ -80,7 +84,7 @@ public:

    // this is here for backward compatibility
    MRT(Handle<HwTexture> handle, uint8_t level, uint16_t layer) noexcept
-            : mInfos{{ handle, level, layer }} {
+            : mInfos{{ handle, 0, level, layer }} {
    }
 };

--- a/filament/backend/include/backend/platforms/OpenGLPlatform.h
+++ b/filament/backend/include/backend/platforms/OpenGLPlatform.h
@@ -21,6 +21,10 @@
 #include <backend/DriverEnums.h>
 #include <backend/Platform.h>

+#include <utils/compiler.h>
+#include <utils/Invocable.h>
+
+#include <stddef.h>
 #include <stdint.h>

 namespace filament::backend {
@@ -41,8 +45,8 @@ protected:
     * Derived classes can use this to instantiate the default OpenGLDriver backend.
     * This is typically called from your implementation of createDriver()
     */
-    static Driver* createDefaultDriver(OpenGLPlatform* platform,
-            void* sharedContext, const DriverConfig& driverConfig);
+    static Driver* UTILS_NULLABLE createDefaultDriver(OpenGLPlatform* UTILS_NONNULL platform,
+            void* UTILS_NULLABLE sharedContext, const DriverConfig& driverConfig);

    ~OpenGLPlatform() noexcept override;

@@ -60,6 +64,22 @@ public:
     */
    virtual void terminate() noexcept = 0;

+    /**
+     * Return whether createSwapChain supports the SWAP_CHAIN_CONFIG_SRGB_COLORSPACE flag.
+     * The default implementation returns false.
+     *
+     * @return true if SWAP_CHAIN_CONFIG_SRGB_COLORSPACE is supported, false otherwise.
+     */
+    virtual bool isSRGBSwapChainSupported() const noexcept;
+
+    /**
+     * Return whether protected contexts are supported by this backend.
+     * If protected context are supported, the SWAP_CHAIN_CONFIG_PROTECTED_CONTENT flag can be
+     * used when creating a SwapChain.
+     * The default implementation returns false.
+     */
+    virtual bool isProtectedContextSupported() const noexcept;
+
    /**
     * Called by the driver to create a SwapChain for this driver.
     *
@@ -69,15 +89,8 @@ public:
     * @return              The driver's SwapChain object.
     *
     */
-    virtual SwapChain* createSwapChain(void* nativeWindow, uint64_t flags) noexcept = 0;
-
-    /**
-     * Return whether createSwapChain supports the SWAP_CHAIN_CONFIG_SRGB_COLORSPACE flag.
-     * The default implementation returns false.
-     *
-     * @return true if SWAP_CHAIN_CONFIG_SRGB_COLORSPACE is supported, false otherwise.
-     */
-    virtual bool isSRGBSwapChainSupported() const noexcept;
+    virtual SwapChain* UTILS_NULLABLE createSwapChain(
+            void* UTILS_NULLABLE nativeWindow, uint64_t flags) noexcept = 0;

    /**
     * Called by the driver create a headless SwapChain.
@@ -90,13 +103,14 @@ public:
     * TODO: we need a more generic way of passing construction parameters
     *       A void* might be enough.
     */
-    virtual SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept = 0;
+    virtual SwapChain* UTILS_NULLABLE createSwapChain(
+            uint32_t width, uint32_t height, uint64_t flags) noexcept = 0;

    /**
     * Called by the driver to destroys the SwapChain
     * @param swapChain SwapChain to be destroyed.
     */
-    virtual void destroySwapChain(SwapChain* swapChain) noexcept = 0;
+    virtual void destroySwapChain(SwapChain* UTILS_NONNULL swapChain) noexcept = 0;

    /**
     * Returns the set of buffers that must be preserved up to the call to commit().
@@ -109,28 +123,80 @@ public:
     * @return buffer that must be preserved
     * @see commit()
     */
-    virtual TargetBufferFlags getPreservedFlags(SwapChain* swapChain) noexcept;
+    virtual TargetBufferFlags getPreservedFlags(SwapChain* UTILS_NONNULL swapChain) noexcept;
+
+    /**
+     * Returns true if the swapchain is protected
+     */
+    virtual bool isSwapChainProtected(Platform::SwapChain* UTILS_NONNULL swapChain) noexcept;

    /**
     * Called by the driver to establish the default FBO. The default implementation returns 0.
-      * @return a GLuint casted to a uint32_t that is an OpenGL framebuffer object.
+     *
+     * This method can be called either on the regular or protected OpenGL contexts and can return
+     * a different or identical name, since these names exist in different namespaces.
+     *
+     * @return a GLuint casted to a uint32_t that is an OpenGL framebuffer object.
     */
-    virtual uint32_t createDefaultRenderTarget() noexcept;
+    virtual uint32_t getDefaultFramebufferObject() noexcept;
+
+
+    /**
+     * Type of contexts available
+     */
+    enum class ContextType {
+        NONE,           //!< No current context
+        UNPROTECTED,    //!< current context is unprotected
+        PROTECTED       //!< current context supports protected content
+    };
+
+    /**
+     * Returns the type of the context currently in use. This value is updated by makeCurrent()
+     * and therefore can be cached between calls. ContextType::PROTECTED can only be returned
+     * if isProtectedContextSupported() is true.
+     * @return ContextType
+     */
+    virtual ContextType getCurrentContextType() const noexcept;
+
+    /**
+     * Binds the requested context to the current thread and drawSwapChain to the default FBO
+     * returned by getDefaultFramebufferObject().
+     *
+     * @param type type of context to bind to the current thread.
+     * @param drawSwapChain SwapChain to draw to. It must be bound to the default FBO.
+     * @param readSwapChain SwapChain to read from (for operation like `glBlitFramebuffer`)
+     * @return true on success, false on error.
+     */
+    virtual bool makeCurrent(ContextType type,
+            SwapChain* UTILS_NONNULL drawSwapChain,
+            SwapChain* UTILS_NONNULL readSwapChain) noexcept = 0;

    /**
     * Called by the driver to make the OpenGL context active on the calling thread and bind
-     * the drawSwapChain to the default render target (FBO) created with createDefaultRenderTarget.
+     * the drawSwapChain to the default FBO returned by getDefaultFramebufferObject().
+     * The context used is either the default context or the protected context. When a context
+     * change is necessary, the preContextChange and postContextChange callbacks are called,
+     * before and after the context change respectively. postContextChange is given the index
+     * of the new context (0 for default and 1 for protected).
+     * The default implementation just calls makeCurrent(getCurrentContextType(), SwapChain*, SwapChain*).
+     *
     * @param drawSwapChain SwapChain to draw to. It must be bound to the default FBO.
     * @param readSwapChain SwapChain to read from (for operation like `glBlitFramebuffer`)
+     * @param preContextChange called before the context changes
+     * @param postContextChange called after the context changes
     */
-    virtual void makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept = 0;
+    virtual void makeCurrent(
+            SwapChain* UTILS_NONNULL drawSwapChain,
+            SwapChain* UTILS_NONNULL readSwapChain,
+            utils::Invocable<void()> preContextChange,
+            utils::Invocable<void(size_t index)> postContextChange) noexcept;

    /**
     * Called by the driver once the current frame finishes drawing. Typically, this should present
     * the drawSwapChain. This is for example where `eglMakeCurrent()` would be called.
     * @param swapChain the SwapChain to present.
     */
-    virtual void commit(SwapChain* swapChain) noexcept = 0;
+    virtual void commit(SwapChain* UTILS_NONNULL swapChain) noexcept = 0;

    /**
     * Set the time the next committed buffer should be presented to the user at.
@@ -155,14 +221,14 @@ public:
     *
     * @return A Fence object. The default implementation returns nullptr.
     */
-    virtual Fence* createFence() noexcept;
+    virtual Fence* UTILS_NULLABLE createFence() noexcept;

    /**
     * Destroys a Fence object. The default implementation does nothing.
     *
     * @param fence Fence to destroy.
     */
-    virtual void destroyFence(Fence* fence) noexcept;
+    virtual void destroyFence(Fence* UTILS_NONNULL fence) noexcept;

    /**
     * Waits on a Fence.
@@ -172,7 +238,7 @@ public:
     * @return Whether the fence signaled or timed out. See backend::FenceStatus.
     *         The default implementation always return backend::FenceStatus::ERROR.
     */
-    virtual backend::FenceStatus waitFence(Fence* fence, uint64_t timeout) noexcept;
+    virtual backend::FenceStatus waitFence(Fence* UTILS_NONNULL fence, uint64_t timeout) noexcept;


    // --------------------------------------------------------------------------------------------
@@ -186,13 +252,13 @@ public:
     * @param nativeStream The native stream, this parameter depends on the concrete implementation.
     * @return A new Stream object.
     */
-    virtual Stream* createStream(void* nativeStream) noexcept;
+    virtual Stream* UTILS_NULLABLE createStream(void* UTILS_NULLABLE nativeStream) noexcept;

    /**
     * Destroys a Stream.
     * @param stream Stream to destroy.
     */
-    virtual void destroyStream(Stream* stream) noexcept;
+    virtual void destroyStream(Stream* UTILS_NONNULL stream) noexcept;

    /**
     * The specified stream takes ownership of the texture (tname) object
@@ -202,20 +268,21 @@ public:
     * @param stream Stream to take ownership of the texture
     * @param tname  GL texture id to "bind" to the Stream.
     */
-    virtual void attach(Stream* stream, intptr_t tname) noexcept;
+    virtual void attach(Stream* UTILS_NONNULL stream, intptr_t tname) noexcept;

    /**
     * Destroys the texture associated to the stream
     * @param stream Stream to detach from its texture
     */
-    virtual void detach(Stream* stream) noexcept;
+    virtual void detach(Stream* UTILS_NONNULL stream) noexcept;

    /**
     * Updates the content of the texture attached to the stream.
     * @param stream Stream to update
     * @param timestamp Output parameter: Timestamp of the image bound to the texture.
     */
-    virtual void updateTexImage(Stream* stream, int64_t* timestamp) noexcept;
+    virtual void updateTexImage(Stream* UTILS_NONNULL stream,
+            int64_t* UTILS_NONNULL timestamp) noexcept;


    // --------------------------------------------------------------------------------------------
@@ -228,13 +295,13 @@ public:
     *         implementation could just return { 0, GL_TEXTURE_2D } at this point. The actual
     *         values can be delayed until setExternalImage.
     */
-    virtual ExternalTexture *createExternalImageTexture() noexcept;
+    virtual ExternalTexture* UTILS_NULLABLE createExternalImageTexture() noexcept;

    /**
     * Destroys an external texture handle and associated data.
     * @param texture a pointer to the handle to destroy.
     */
-    virtual void destroyExternalImage(ExternalTexture* texture) noexcept;
+    virtual void destroyExternalImage(ExternalTexture* UTILS_NONNULL texture) noexcept;

    // called on the application thread to allow Filament to take ownership of the image

@@ -247,7 +314,7 @@ public:
     * @param externalImage A token representing the platform's external image.
     * @see destroyExternalImage
     */
-    virtual void retainExternalImage(void* externalImage) noexcept;
+    virtual void retainExternalImage(void* UTILS_NONNULL externalImage) noexcept;

    /**
     * Called to bind the platform-specific externalImage to an ExternalTexture.
@@ -261,7 +328,8 @@ public:
     * @param texture an in/out pointer to ExternalTexture, id and target can be updated if necessary.
     * @return true on success, false on error.
     */
-    virtual bool setExternalImage(void* externalImage, ExternalTexture* texture) noexcept;
+    virtual bool setExternalImage(void* UTILS_NONNULL externalImage,
+            ExternalTexture* UTILS_NONNULL texture) noexcept;

    /**
     * The method allows platforms to convert a user-supplied external image object into a new type
--- a/filament/backend/include/backend/platforms/PlatformCocoaGL.h
+++ b/filament/backend/include/backend/platforms/PlatformCocoaGL.h
@@ -57,7 +57,7 @@ protected:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    void makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
    void commit(SwapChain* swapChain) noexcept override;
    OpenGLPlatform::ExternalTexture* createExternalImageTexture() noexcept override;
    void destroyExternalImage(ExternalTexture* texture) noexcept override;
--- a/filament/backend/include/backend/platforms/PlatformCocoaTouchGL.h
+++ b/filament/backend/include/backend/platforms/PlatformCocoaTouchGL.h
@@ -45,7 +45,7 @@ public:

    void terminate() noexcept override;

-    uint32_t createDefaultRenderTarget() noexcept override;
+    uint32_t getDefaultFramebufferObject() noexcept override;

    bool isExtraContextSupported() const noexcept override;
    void createContext(bool shared) override;
@@ -53,7 +53,7 @@ public:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    void makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
    void commit(SwapChain* swapChain) noexcept override;

    OpenGLPlatform::ExternalTexture* createExternalImageTexture() noexcept override;
--- a/filament/backend/include/backend/platforms/PlatformEGL.h
+++ b/filament/backend/include/backend/platforms/PlatformEGL.h
@@ -25,6 +25,8 @@
 #include <EGL/eglext.h>
 #include <EGL/eglplatform.h>

+#include <utils/Invocable.h>
+
 #include <initializer_list>
 #include <utility>
 #include <vector>
@@ -41,15 +43,11 @@ class PlatformEGL : public OpenGLPlatform {
 public:

    PlatformEGL() noexcept;
-    bool isExtraContextSupported() const noexcept override;
-    void createContext(bool shared) override;
-    void releaseContext() noexcept override;

    // Return true if we're on an OpenGL platform (as opposed to OpenGL ES). false by default.
    virtual bool isOpenGL() const noexcept;

 protected:
-
    // --------------------------------------------------------------------------------------------
    // Helper for EGL configs and attributes parameters

@@ -89,13 +87,30 @@ protected:
    // --------------------------------------------------------------------------------------------
    // OpenGLPlatform Interface

+    bool isExtraContextSupported() const noexcept override;
+    void createContext(bool shared) override;
+    void releaseContext() noexcept override;
+
    void terminate() noexcept override;

+    bool isProtectedContextSupported() const noexcept override;
+
    bool isSRGBSwapChainSupported() const noexcept override;
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    void makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool isSwapChainProtected(SwapChain* swapChain) noexcept override;
+
+    ContextType getCurrentContextType() const noexcept override;
+
+    bool makeCurrent(ContextType type,
+            SwapChain* drawSwapChain,
+            SwapChain* readSwapChain) noexcept override;
+
+    void makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain,
+            utils::Invocable<void()> preContextChange,
+            utils::Invocable<void(size_t index)> postContextChange) noexcept override;
+
    void commit(SwapChain* swapChain) noexcept override;

    bool canCreateFence() noexcept override;
@@ -122,16 +137,27 @@ protected:
    static void clearGlError() noexcept;

    /**
-     * Always use this instead of eglMakeCurrent().
+     * Always use this instead of eglMakeCurrent(), as it tracks some state.
     */
-    EGLBoolean makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) noexcept;
+
+    EGLContext getContextForType(ContextType type) const noexcept;
+
+    // makes the draw and read surface current without changing the current context
+    EGLBoolean makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) noexcept {
+        return egl.makeCurrent(drawSurface, readSurface);
+    }
+
+    // makes context current and set draw and read surfaces to EGL_NO_SURFACE
+    EGLBoolean makeCurrent(EGLContext context) noexcept {
+        return egl.makeCurrent(context, mEGLDummySurface, mEGLDummySurface);
+    }

    // TODO: this should probably use getters instead.
    EGLDisplay mEGLDisplay = EGL_NO_DISPLAY;
    EGLContext mEGLContext = EGL_NO_CONTEXT;
-    EGLSurface mCurrentDrawSurface = EGL_NO_SURFACE;
-    EGLSurface mCurrentReadSurface = EGL_NO_SURFACE;
+    EGLContext mEGLContextProtected = EGL_NO_CONTEXT;
    EGLSurface mEGLDummySurface = EGL_NO_SURFACE;
+    ContextType mCurrentContextType = ContextType::NONE;
    // mEGLConfig is valid only if ext.egl.KHR_no_config_context is false
    EGLConfig mEGLConfig = EGL_NO_CONFIG_KHR;
    Config mContextAttribs;
@@ -148,13 +174,38 @@ protected:
            bool KHR_gl_colorspace = false;
            bool KHR_no_config_context = false;
            bool KHR_surfaceless_context = false;
+            bool EXT_protected_content = false;
        } egl;
    } ext;

+    struct SwapChainEGL : public Platform::SwapChain {
+        EGLSurface sur = EGL_NO_SURFACE;
+        Config attribs{};
+        EGLNativeWindowType nativeWindow{};
+        EGLConfig config{};
+        uint64_t flags{};
+    };
+
    void initializeGlExtensions() noexcept;

 protected:
    EGLConfig findSwapChainConfig(uint64_t flags, bool window, bool pbuffer) const;
+
+private:
+    class EGL {
+        EGLDisplay& mEGLDisplay;
+        EGLSurface mCurrentDrawSurface = EGL_NO_SURFACE;
+        EGLSurface mCurrentReadSurface = EGL_NO_SURFACE;
+        EGLContext mCurrentContext = EGL_NO_CONTEXT;
+    public:
+        explicit EGL(EGLDisplay& dpy) : mEGLDisplay(dpy) {}
+        EGLBoolean makeCurrent(EGLContext context,
+                EGLSurface drawSurface, EGLSurface readSurface) noexcept;
+
+        EGLBoolean makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) noexcept {
+            return makeCurrent(mCurrentContext, drawSurface, readSurface);
+        }
+    } egl{ mEGLDisplay };
 };

 } // namespace filament::backend
--- a/filament/backend/include/backend/platforms/PlatformGLX.h
+++ b/filament/backend/include/backend/platforms/PlatformGLX.h
@@ -51,7 +51,7 @@ protected:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    void makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
    void commit(SwapChain* swapChain) noexcept override;

 private:
--- a/filament/backend/include/backend/platforms/PlatformWGL.h
+++ b/filament/backend/include/backend/platforms/PlatformWGL.h
@@ -53,7 +53,7 @@ protected:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    void makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
    void commit(SwapChain* swapChain) noexcept override;

 protected:
--- a/filament/backend/include/backend/platforms/PlatformWebGL.h
+++ b/filament/backend/include/backend/platforms/PlatformWebGL.h
@@ -46,7 +46,7 @@ protected:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    void makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
    void commit(SwapChain* swapChain) noexcept override;
 };

--- a/filament/backend/include/private/backend/CircularBuffer.h
+++ b/filament/backend/include/private/backend/CircularBuffer.h
@@ -17,7 +17,10 @@
 #ifndef TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H
 #define TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H

+#include <utils/debug.h>
+
 #include <stddef.h>
+#include <stdint.h>

 namespace filament::backend {

@@ -37,28 +40,36 @@ public:

    ~CircularBuffer() noexcept;

-    // allocates 'size' bytes in the circular buffer and returns a pointer to the memory
-    // return the current head and moves it forward by size bytes
-    inline void* allocate(size_t size) noexcept {
+    static size_t getBlockSize() noexcept { return sPageSize; }
+
+    // Total size of circular buffer. This is a constant.
+    size_t size() const noexcept { return mSize; }
+
+    // Allocates `s` bytes in the circular buffer and returns a pointer to the memory. All
+    // allocations must not exceed size() bytes.
+    inline void* allocate(size_t s) noexcept {
+        // We can never allocate more that size().
+        assert_invariant(getUsed() + s <= size());
        char* const cur = static_cast<char*>(mHead);
-        mHead = cur + size;
+        mHead = cur + s;
        return cur;
    }

-    // Total size of circular buffer
-    size_t size() const noexcept { return mSize; }
-
-    // returns true if the buffer is empty (e.g. after calling flush)
+    // Returns true if the buffer is empty, i.e.: no allocations were made since
+    // calling getBuffer();
    bool empty() const noexcept { return mTail == mHead; }

-    void* getHead() const noexcept { return mHead; }
+    // Returns the size used since the last call to getBuffer()
+    size_t getUsed() const noexcept { return intptr_t(mHead) - intptr_t(mTail); }

-    void* getTail() const noexcept { return mTail; }
-
-    // call at least once every getRequiredSize() bytes allocated from the buffer
-    void circularize() noexcept;
-
-    static size_t getBlockSize() noexcept { return sPageSize; }
+    // Retrieves the current allocated range and frees it. It is the responsibility of the caller
+    // to make sure the returned range is no longer in use by the time allocate() allocates
+    // (size() - getUsed()) bytes.
+    struct Range {
+        void* tail;
+        void* head;
+    };
+    Range getBuffer() noexcept;

 private:
    void* alloc(size_t size) noexcept;
@@ -66,10 +77,10 @@ private:

    // pointer to the beginning of the circular buffer (constant)
    void* mData = nullptr;
-    int mUsesAshmem = -1;
+    int mAshmemFd = -1;

    // size of the circular buffer (constant)
-    size_t mSize = 0;
+    size_t const mSize;

    // pointer to the beginning of recorded data
    void* mTail = nullptr;
--- a/filament/backend/include/private/backend/CommandBufferQueue.h
+++ b/filament/backend/include/private/backend/CommandBufferQueue.h
@@ -33,7 +33,7 @@ namespace filament::backend {
 * A producer-consumer command queue that uses a CircularBuffer as main storage
 */
 class CommandBufferQueue {
-    struct Slice {
+    struct Range {
        void* begin;
        void* end;
    };
@@ -46,29 +46,33 @@ class CommandBufferQueue {

    mutable utils::Mutex mLock;
    mutable utils::Condition mCondition;
-    mutable std::vector<Slice> mCommandBuffersToExecute;
+    mutable std::vector<Range> mCommandBuffersToExecute;
    size_t mFreeSpace = 0;
    size_t mHighWatermark = 0;
    uint32_t mExitRequested = 0;
+    bool mPaused = false;

    static constexpr uint32_t EXIT_REQUESTED = 0x31415926;

 public:
    // requiredSize: guaranteed available space after flush()
-    CommandBufferQueue(size_t requiredSize, size_t bufferSize);
+    CommandBufferQueue(size_t requiredSize, size_t bufferSize, bool paused);
    ~CommandBufferQueue();

-    CircularBuffer& getCircularBuffer() { return mCircularBuffer; }
+    CircularBuffer& getCircularBuffer() noexcept { return mCircularBuffer; }
+    CircularBuffer const& getCircularBuffer() const noexcept { return mCircularBuffer; }
+
+    size_t getCapacity() const noexcept { return mRequiredSize; }

    size_t getHighWatermark() const noexcept { return mHighWatermark; }

    // wait for commands to be available and returns an array containing these commands
-    std::vector<Slice> waitForCommands() const;
+    std::vector<Range> waitForCommands() const;

    // return the memory used by this command buffer to the circular buffer
    // WARNING: releaseBuffer() must be called in sequence of the Slices returned by
    // waitForCommands()
-    void releaseBuffer(Slice const& buffer);
+    void releaseBuffer(Range const& buffer);

    // all commands buffers (Slices) written to this point are returned by waitForCommand(). This
    // call blocks until the CircularBuffer has at least mRequiredSize bytes available.
@@ -77,6 +81,9 @@ public:
    // returns from waitForCommands() immediately.
    void requestExit();

+    // suspend or unsuspend the queue.
+    void setPaused(bool paused);
+
    bool isExitRequested() const;
 };

--- a/filament/backend/include/private/backend/CommandStream.h
+++ b/filament/backend/include/private/backend/CommandStream.h
@@ -134,7 +134,7 @@ struct CommandType<void (Driver::*)(ARGS...)> {

    public:
        template<typename M, typename D>
-        static inline void execute(M&& method, D&& driver, CommandBase* base, intptr_t* next) noexcept {
+        static inline void execute(M&& method, D&& driver, CommandBase* base, intptr_t* next) {
            Command* self = static_cast<Command*>(base);
            *next = align(sizeof(Command));
 #if DEBUG_COMMAND_STREAM
@@ -168,7 +168,7 @@ struct CommandType<void (Driver::*)(ARGS...)> {

 class CustomCommand : public CommandBase {
    std::function<void()> mCommand;
-    static void execute(Driver&, CommandBase* base, intptr_t* next) noexcept;
+    static void execute(Driver&, CommandBase* base, intptr_t* next);
 public:
    inline CustomCommand(CustomCommand&& rhs) = default;
    inline explicit CustomCommand(std::function<void()> cmd)
@@ -213,6 +213,8 @@ public:
    CommandStream(CommandStream const& rhs) noexcept = delete;
    CommandStream& operator=(CommandStream const& rhs) noexcept = delete;

+    CircularBuffer const& getCircularBuffer() const noexcept { return mCurrentBuffer; }
+
 public:
 #define DECL_DRIVER_API(methodName, paramsDecl, params)                                         \
    inline void methodName(paramsDecl) {                                                        \
--- a/filament/backend/include/private/backend/Driver.h
+++ b/filament/backend/include/private/backend/Driver.h
@@ -76,7 +76,7 @@ public:
    // the fn function will execute a batch of driver commands
    // this gives the driver a chance to wrap their execution in a meaningful manner
    // the default implementation simply calls fn
-    virtual void execute(std::function<void(void)> const& fn) noexcept;
+    virtual void execute(std::function<void(void)> const& fn);

    // This is called on debug build, or when enabled manually on the backend thread side.
    virtual void debugCommandBegin(CommandStream* cmds,
--- a/filament/backend/include/private/backend/DriverAPI.inc
+++ b/filament/backend/include/private/backend/DriverAPI.inc
@@ -167,12 +167,15 @@ DECL_DRIVER_API_0(resetState)
 * -----------------------
 */

-DECL_DRIVER_API_R_N(backend::VertexBufferHandle, createVertexBuffer,
+DECL_DRIVER_API_R_N(backend::VertexBufferInfoHandle, createVertexBufferInfo,
        uint8_t, bufferCount,
        uint8_t, attributeCount,
-        uint32_t, vertexCount,
        backend::AttributeArray, attributes)

+DECL_DRIVER_API_R_N(backend::VertexBufferHandle, createVertexBuffer,
+        uint32_t, vertexCount,
+        backend::VertexBufferInfoHandle, vbih)
+
 DECL_DRIVER_API_R_N(backend::IndexBufferHandle, createIndexBuffer,
        backend::ElementType, elementType,
        uint32_t, indexCount,
@@ -224,11 +227,7 @@ DECL_DRIVER_API_R_N(backend::SamplerGroupHandle, createSamplerGroup,
 DECL_DRIVER_API_R_N(backend::RenderPrimitiveHandle, createRenderPrimitive,
        backend::VertexBufferHandle, vbh,
        backend::IndexBufferHandle, ibh,
-        backend::PrimitiveType, pt,
-        uint32_t, offset,
-        uint32_t, minIndex,
-        uint32_t, maxIndex,
-        uint32_t, count)
+        backend::PrimitiveType, pt)

 DECL_DRIVER_API_R_N(backend::ProgramHandle, createProgram,
        backend::Program&&, program)
@@ -240,6 +239,7 @@ DECL_DRIVER_API_R_N(backend::RenderTargetHandle, createRenderTarget,
        uint32_t, width,
        uint32_t, height,
        uint8_t, samples,
+        uint8_t, layerCount,
        backend::MRT, color,
        backend::TargetBufferInfo, depth,
        backend::TargetBufferInfo, stencil)
@@ -264,6 +264,7 @@ DECL_DRIVER_API_R_0(backend::TimerQueryHandle, createTimerQuery)
 */

 DECL_DRIVER_API_N(destroyVertexBuffer,    backend::VertexBufferHandle, vbh)
+DECL_DRIVER_API_N(destroyVertexBufferInfo,backend::VertexBufferInfoHandle, vbih)
 DECL_DRIVER_API_N(destroyIndexBuffer,     backend::IndexBufferHandle, ibh)
 DECL_DRIVER_API_N(destroyBufferObject,    backend::BufferObjectHandle, ibh)
 DECL_DRIVER_API_N(destroyRenderPrimitive, backend::RenderPrimitiveHandle, rph)
@@ -298,14 +299,16 @@ DECL_DRIVER_API_SYNCHRONOUS_0(bool, isFrameBufferFetchMultiSampleSupported)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, isFrameTimeSupported)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, isAutoDepthResolveSupported)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, isSRGBSwapChainSupported)
-DECL_DRIVER_API_SYNCHRONOUS_0(bool, isStereoSupported)
+DECL_DRIVER_API_SYNCHRONOUS_0(bool, isProtectedContentSupported)
+DECL_DRIVER_API_SYNCHRONOUS_N(bool, isStereoSupported, backend::StereoscopicType, stereoscopicType)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, isParallelShaderCompileSupported)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, isDepthStencilResolveSupported)
+DECL_DRIVER_API_SYNCHRONOUS_0(bool, isProtectedTexturesSupported)
 DECL_DRIVER_API_SYNCHRONOUS_0(uint8_t, getMaxDrawBuffers)
 DECL_DRIVER_API_SYNCHRONOUS_0(size_t, getMaxUniformBufferSize)
 DECL_DRIVER_API_SYNCHRONOUS_0(math::float2, getClipSpaceParams)
 DECL_DRIVER_API_SYNCHRONOUS_N(void, setupExternalImage, void*, image)
-DECL_DRIVER_API_SYNCHRONOUS_N(bool, getTimerQueryValue, backend::TimerQueryHandle, query, uint64_t*, elapsedTime)
+DECL_DRIVER_API_SYNCHRONOUS_N(backend::TimerQueryResult, getTimerQueryValue, backend::TimerQueryHandle, query, uint64_t*, elapsedTime)
 DECL_DRIVER_API_SYNCHRONOUS_N(bool, isWorkaroundNeeded, backend::Workaround, workaround)
 DECL_DRIVER_API_SYNCHRONOUS_0(backend::FeatureLevel, getFeatureLevel)

@@ -491,15 +494,32 @@ DECL_DRIVER_API_N(blit,
        math::uint2, srcOrigin,
        math::uint2, size)

+DECL_DRIVER_API_N(bindPipeline,
+        backend::PipelineState, state)
+
+DECL_DRIVER_API_N(bindRenderPrimitive,
+        backend::RenderPrimitiveHandle, rph)
+
+DECL_DRIVER_API_N(draw2,
+        uint32_t, indexOffset,
+        uint32_t, indexCount,
+        uint32_t, instanceCount)
+
 DECL_DRIVER_API_N(draw,
        backend::PipelineState, state,
        backend::RenderPrimitiveHandle, rph,
+        uint32_t, indexOffset,
+        uint32_t, indexCount,
        uint32_t, instanceCount)

+
 DECL_DRIVER_API_N(dispatchCompute,
        backend::ProgramHandle, program,
        math::uint3, workGroupCount)

+DECL_DRIVER_API_N(scissor,
+        Viewport, scissor)
+

 #pragma clang diagnostic pop

--- a/filament/backend/include/private/backend/HandleAllocator.h
+++ b/filament/backend/include/private/backend/HandleAllocator.h
@@ -24,36 +24,32 @@
 #include <utils/compiler.h>
 #include <utils/debug.h>
 #include <utils/ostream.h>
+#include <utils/Panic.h>

 #include <tsl/robin_map.h>

+#include <cstddef>
 #include <exception>
 #include <type_traits>
 #include <unordered_map>
+#include <utility>

 #include <stddef.h>
 #include <stdint.h>

-#if !defined(NDEBUG) && UTILS_HAS_RTTI
-#   define HANDLE_TYPE_SAFETY 1
-#else
-#   define HANDLE_TYPE_SAFETY 0
-#endif
-
-#define HandleAllocatorGL  HandleAllocator<16, 64, 208>
-#define HandleAllocatorVK  HandleAllocator<16, 64, 880>
-#define HandleAllocatorMTL HandleAllocator<16, 64, 584>
+#define HandleAllocatorGL  HandleAllocator<32,  64, 136>    // ~4520 / pool / MiB
+#define HandleAllocatorVK  HandleAllocator<64, 160, 312>    // ~1820 / pool / MiB
+#define HandleAllocatorMTL HandleAllocator<32,  48, 552>    // ~1660 / pool / MiB

 namespace filament::backend {

 /*
 * A utility class to efficiently allocate and manage Handle<>
 */
-template <size_t P0, size_t P1, size_t P2>
+template<size_t P0, size_t P1, size_t P2>
 class HandleAllocator {
 public:
-
-    HandleAllocator(const char* name, size_t size) noexcept;
+    HandleAllocator(const char* name, size_t size, bool disableUseAfterFreeCheck) noexcept;
    HandleAllocator(HandleAllocator const& rhs) = delete;
    HandleAllocator& operator=(HandleAllocator const& rhs) = delete;
    ~HandleAllocator();
@@ -69,15 +65,10 @@ public:
     *
     */
    template<typename D, typename ... ARGS>
-    Handle<D> allocateAndConstruct(ARGS&& ... args) noexcept {
-        Handle<D> h{ allocateHandle<sizeof(D)>() };
+    Handle<D> allocateAndConstruct(ARGS&& ... args) {
+        Handle<D> h{ allocateHandle<D>() };
        D* addr = handle_cast<D*>(h);
        new(addr) D(std::forward<ARGS>(args)...);
-#if HANDLE_TYPE_SAFETY
-        mLock.lock();
-        mHandleTypeId[addr] = typeid(D).name();
-        mLock.unlock();
-#endif
        return h;
    }

@@ -93,13 +84,7 @@ public:
     */
    template<typename D>
    Handle<D> allocate() noexcept {
-        Handle<D> h{ allocateHandle<sizeof(D)>() };
-#if HANDLE_TYPE_SAFETY
-        D* addr = handle_cast<D*>(h);
-        mLock.lock();
-        mHandleTypeId[addr] = typeid(D).name();
-        mLock.unlock();
-#endif
+        Handle<D> h{ allocateHandle<D>() };
        return h;
    }

@@ -112,21 +97,14 @@ public:
     */
    template<typename D, typename B, typename ... ARGS>
    typename std::enable_if_t<std::is_base_of_v<B, D>, D>*
-    destroyAndConstruct(Handle<B> const& handle, ARGS&& ... args) noexcept {
+    destroyAndConstruct(Handle<B> const& handle, ARGS&& ... args) {
        assert_invariant(handle);
        D* addr = handle_cast<D*>(const_cast<Handle<B>&>(handle));
        assert_invariant(addr);
-
        // currently we implement construct<> with dtor+ctor, we could use operator= also
        // but all our dtors are trivial, ~D() is actually a noop.
        addr->~D();
        new(addr) D(std::forward<ARGS>(args)...);
-
-#if HANDLE_TYPE_SAFETY
-        mLock.lock();
-        mHandleTypeId[addr] = typeid(D).name();
-        mLock.unlock();
-#endif
        return addr;
    }

@@ -143,12 +121,6 @@ public:
        D* addr = handle_cast<D*>(const_cast<Handle<B>&>(handle));
        assert_invariant(addr);
        new(addr) D(std::forward<ARGS>(args)...);
-
-#if HANDLE_TYPE_SAFETY
-        mLock.lock();
-        mHandleTypeId[addr] = typeid(D).name();
-        mLock.unlock();
-#endif
        return addr;
    }

@@ -164,19 +136,8 @@ public:
    void deallocate(Handle<B>& handle, D const* p) noexcept {
        // allow to destroy the nullptr, similarly to operator delete
        if (p) {
-#if HANDLE_TYPE_SAFETY
-            mLock.lock();
-            auto typeId = mHandleTypeId[p];
-            mHandleTypeId.erase(p);
-            mLock.unlock();
-            if (UTILS_UNLIKELY(typeId != typeid(D).name())) {
-                utils::slog.e << "Destroying handle " << handle.getId() << ", type " << typeid(D).name()
-                       << ", but handle's actual type is " << typeId << utils::io::endl;
-                std::terminate();
-            }
-#endif
            p->~D();
-            deallocateHandle<sizeof(D)>(handle.getId());
+            deallocateHandle<D>(handle.getId());
        }
    }

@@ -202,9 +163,21 @@ public:
    inline typename std::enable_if_t<
            std::is_pointer_v<Dp> &&
            std::is_base_of_v<B, typename std::remove_pointer_t<Dp>>, Dp>
-    handle_cast(Handle<B>& handle) noexcept {
+    handle_cast(Handle<B>& handle) {
        assert_invariant(handle);
-        void* const p = handleToPointer(handle.getId());
+        auto [p, tag] = handleToPointer(handle.getId());
+
+        if (isPoolHandle(handle.getId())) {
+            // check for use after free
+            if (UTILS_UNLIKELY(!mUseAfterFreeCheckDisabled)) {
+                uint8_t const age = (tag & HANDLE_AGE_MASK) >> HANDLE_AGE_SHIFT;
+                auto const pNode = static_cast<typename Allocator::Node*>(p);
+                uint8_t const expectedAge = pNode[-1].age;
+                ASSERT_POSTCONDITION(expectedAge == age,
+                        "use-after-free of Handle with id=%d", handle.getId());
+            }
+        }
+
        return static_cast<Dp>(p);
    }

@@ -212,36 +185,66 @@ public:
    inline typename std::enable_if_t<
            std::is_pointer_v<Dp> &&
            std::is_base_of_v<B, typename std::remove_pointer_t<Dp>>, Dp>
-    handle_cast(Handle<B> const& handle) noexcept {
+    handle_cast(Handle<B> const& handle) {
        return handle_cast<Dp>(const_cast<Handle<B>&>(handle));
    }

-
 private:

-    // template <int P0, int P1, int P2>
+    template<typename D>
+    static constexpr size_t getBucketSize() noexcept {
+        if constexpr (sizeof(D) <= P0) { return P0; }
+        if constexpr (sizeof(D) <= P1) { return P1; }
+        static_assert(sizeof(D) <= P2);
+        return P2;
+    }
+
    class Allocator {
        friend class HandleAllocator;
-        utils::PoolAllocator<P0, 16>   mPool0;
-        utils::PoolAllocator<P1, 16>   mPool1;
-        utils::PoolAllocator<P2, 16>   mPool2;
+        static constexpr size_t MIN_ALIGNMENT = alignof(std::max_align_t);
+        struct Node { uint8_t age; };
+        // Note: using the `extra` parameter of PoolAllocator<>, even with a 1-byte structure,
+        // generally increases all pool allocations by 8-bytes because of alignment restrictions.
+        template<size_t SIZE>
+        using Pool = utils::PoolAllocator<SIZE, MIN_ALIGNMENT, sizeof(Node)>;
+        Pool<P0> mPool0;
+        Pool<P1> mPool1;
+        Pool<P2> mPool2;
        UTILS_UNUSED_IN_RELEASE const utils::AreaPolicy::HeapArea& mArea;
+        bool mUseAfterFreeCheckDisabled;
    public:
-        static constexpr size_t MIN_ALIGNMENT_SHIFT = 4;
-        explicit Allocator(const utils::AreaPolicy::HeapArea& area);
+        explicit Allocator(const utils::AreaPolicy::HeapArea& area, bool disableUseAfterFreeCheck);
+
+        static constexpr size_t getAlignment() noexcept { return MIN_ALIGNMENT; }

        // this is in fact always called with a constexpr size argument
-        [[nodiscard]] inline void* alloc(size_t size, size_t, size_t extra) noexcept {
+        [[nodiscard]] inline void* alloc(size_t size, size_t, size_t, uint8_t* outAge) noexcept {
            void* p = nullptr;
-                 if (size <= mPool0.getSize()) p = mPool0.alloc(size, 16, extra);
-            else if (size <= mPool1.getSize()) p = mPool1.alloc(size, 16, extra);
-            else if (size <= mPool2.getSize()) p = mPool2.alloc(size, 16, extra);
+            if      (size <= mPool0.getSize()) p = mPool0.alloc(size);
+            else if (size <= mPool1.getSize()) p = mPool1.alloc(size);
+            else if (size <= mPool2.getSize()) p = mPool2.alloc(size);
+            if (UTILS_LIKELY(p)) {
+                Node const* const pNode = static_cast<Node const*>(p);
+                // we are guaranteed to have at least sizeof<Node> bytes of extra storage before
+                // the allocation address.
+                *outAge = pNode[-1].age;
+            }
            return p;
        }

        // this is in fact always called with a constexpr size argument
-        inline void free(void* p, size_t size) noexcept {
+        inline void free(void* p, size_t size, uint8_t age) noexcept {
            assert_invariant(p >= mArea.begin() && (char*)p + size <= (char*)mArea.end());
+
+            // check for double-free
+            Node* const pNode = static_cast<Node*>(p);
+            uint8_t& expectedAge = pNode[-1].age;
+            if (UTILS_UNLIKELY(!mUseAfterFreeCheckDisabled)) {
+                ASSERT_POSTCONDITION(expectedAge == age,
+                        "double-free of Handle of size %d at %p", size, p);
+            }
+            expectedAge = (expectedAge + 1) & 0xF; // fixme
+
            if (size <= mPool0.getSize()) { mPool0.free(p); return; }
            if (size <= mPool1.getSize()) { mPool1.free(p); return; }
            if (size <= mPool2.getSize()) { mPool2.free(p); return; }
@@ -263,24 +266,16 @@ private:
    // allocateHandle()/deallocateHandle() selects the pool to use at compile-time based on the
    // allocation size this is always inlined, because all these do is to call
    // allocateHandleInPool()/deallocateHandleFromPool() with the right pool size.
-    template<size_t SIZE>
+    template<typename D>
    HandleBase::HandleId allocateHandle() noexcept {
-        if constexpr (SIZE <= P0) { return allocateHandleInPool<P0>(); }
-        if constexpr (SIZE <= P1) { return allocateHandleInPool<P1>(); }
-        static_assert(SIZE <= P2);
-        return allocateHandleInPool<P2>();
+        constexpr size_t BUCKET_SIZE = getBucketSize<D>();
+        return allocateHandleInPool<BUCKET_SIZE>();
    }

-    template<size_t SIZE>
+    template<typename D>
    void deallocateHandle(HandleBase::HandleId id) noexcept {
-        if constexpr (SIZE <= P0) {
-            deallocateHandleFromPool<P0>(id);
-        } else if constexpr (SIZE <= P1) {
-            deallocateHandleFromPool<P1>(id);
-        } else {
-            static_assert(SIZE <= P2);
-            deallocateHandleFromPool<P2>(id);
-        }
+        constexpr size_t BUCKET_SIZE = getBucketSize<D>();
+        deallocateHandleFromPool<BUCKET_SIZE>(id);
    }

    // allocateHandleInPool()/deallocateHandleFromPool() is NOT inlined, which will cause three
@@ -289,9 +284,11 @@ private:
    template<size_t SIZE>
    UTILS_NOINLINE
    HandleBase::HandleId allocateHandleInPool() noexcept {
-        void* p = mHandleArena.alloc(SIZE);
+        uint8_t age;
+        void* p = mHandleArena.alloc(SIZE, alignof(std::max_align_t), 0, &age);
        if (UTILS_LIKELY(p)) {
-            return pointerToHandle(p);
+            uint32_t const tag = (uint32_t(age) << HANDLE_AGE_SHIFT) & HANDLE_AGE_MASK;
+            return arenaPointerToHandle(p, tag);
        } else {
            return allocateHandleSlow(SIZE);
        }
@@ -301,42 +298,51 @@ private:
    UTILS_NOINLINE
    void deallocateHandleFromPool(HandleBase::HandleId id) noexcept {
        if (UTILS_LIKELY(isPoolHandle(id))) {
-            void* p = handleToPointer(id);
-            mHandleArena.free(p, SIZE);
+            auto [p, tag] = handleToPointer(id);
+            uint8_t const age = (tag & HANDLE_AGE_MASK) >> HANDLE_AGE_SHIFT;
+            mHandleArena.free(p, SIZE, age);
        } else {
            deallocateHandleSlow(id, SIZE);
        }
    }

-    static constexpr uint32_t HEAP_HANDLE_FLAG = 0x80000000u;
+    // we handle a 4 bits age per address
+    static constexpr uint32_t HANDLE_HEAP_FLAG      = 0x80000000u;      // pool vs heap handle
+    static constexpr uint32_t HANDLE_AGE_MASK       = 0x78000000u;      // handle's age
+    static constexpr uint32_t HANDLE_INDEX_MASK     = 0x07FFFFFFu;      // handle index
+    static constexpr uint32_t HANDLE_TAG_MASK       = HANDLE_AGE_MASK;
+    static constexpr uint32_t HANDLE_AGE_SHIFT      = 27;

    static bool isPoolHandle(HandleBase::HandleId id) noexcept {
-        return (id & HEAP_HANDLE_FLAG) == 0u;
+        return (id & HANDLE_HEAP_FLAG) == 0u;
    }

-    HandleBase::HandleId allocateHandleSlow(size_t size) noexcept;
+    HandleBase::HandleId allocateHandleSlow(size_t size);
    void deallocateHandleSlow(HandleBase::HandleId id, size_t size) noexcept;

    // We inline this because it's just 4 instructions in the fast case
-    inline void* handleToPointer(HandleBase::HandleId id) const noexcept {
+    inline std::pair<void*, uint32_t> handleToPointer(HandleBase::HandleId id) const noexcept {
        // note: the null handle will end-up returning nullptr b/c it'll be handled as
        // a non-pool handle.
        if (UTILS_LIKELY(isPoolHandle(id))) {
            char* const base = (char*)mHandleArena.getArea().begin();
-            size_t offset = id << Allocator::MIN_ALIGNMENT_SHIFT;
-            return static_cast<void*>(base + offset);
+            uint32_t const tag = id & HANDLE_TAG_MASK;
+            size_t const offset = (id & HANDLE_INDEX_MASK) * Allocator::getAlignment();
+            return { static_cast<void*>(base + offset), tag };
        }
-        return handleToPointerSlow(id);
+        return { handleToPointerSlow(id), 0 };
    }

    void* handleToPointerSlow(HandleBase::HandleId id) const noexcept;

    // We inline this because it's just 3 instructions
-    inline HandleBase::HandleId pointerToHandle(void* p) const noexcept {
+    inline HandleBase::HandleId arenaPointerToHandle(void* p, uint32_t tag) const noexcept {
        char* const base = (char*)mHandleArena.getArea().begin();
-        size_t offset = (char*)p - base;
-        auto id = HandleBase::HandleId(offset >> Allocator::MIN_ALIGNMENT_SHIFT);
-        assert_invariant((id & HEAP_HANDLE_FLAG) == 0);
+        size_t const offset = (char*)p - base;
+        assert_invariant((offset % Allocator::getAlignment()) == 0);
+        auto id = HandleBase::HandleId(offset / Allocator::getAlignment());
+        id |= tag & HANDLE_TAG_MASK;
+        assert_invariant((id & HANDLE_HEAP_FLAG) == 0);
        return id;
    }

@@ -346,9 +352,7 @@ private:
    mutable utils::Mutex mLock;
    tsl::robin_map<HandleBase::HandleId, void*> mOverflowMap;
    HandleBase::HandleId mId = 0;
-#if HANDLE_TYPE_SAFETY
-    mutable std::unordered_map<const void*, const char*> mHandleTypeId;
-#endif
+    bool mUseAfterFreeCheckDisabled = false;
 };

 } // namespace filament::backend
--- a/filament/backend/src/CircularBuffer.cpp
+++ b/filament/backend/src/CircularBuffer.cpp
@@ -16,6 +16,14 @@

 #include "private/backend/CircularBuffer.h"

+#include <utils/Log.h>
+#include <utils/Panic.h>
+#include <utils/architecture.h>
+#include <utils/ashmem.h>
+#include <utils/compiler.h>
+#include <utils/debug.h>
+#include <utils/ostream.h>
+
 #if !defined(WIN32) && !defined(__EMSCRIPTEN__) && !defined(IOS)
 #    include <sys/mman.h>
 #    include <unistd.h>
@@ -24,23 +32,20 @@
 #    define HAS_MMAP 0
 #endif

+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
 #include <stdio.h>

-#include <utils/architecture.h>
-#include <utils/ashmem.h>
-#include <utils/debug.h>
-#include <utils/Log.h>
-#include <utils/Panic.h>
-
 using namespace utils;

 namespace filament::backend {

 size_t CircularBuffer::sPageSize = arch::getPageSize();

-CircularBuffer::CircularBuffer(size_t size) {
+CircularBuffer::CircularBuffer(size_t size)
+    : mSize(size) {
    mData = alloc(size);
-    mSize = size;
    mTail = mData;
    mHead = mData;
 }
@@ -85,7 +90,7 @@ void* CircularBuffer::alloc(size_t size) noexcept {
                            MAP_PRIVATE, fd, (off_t)size);
                    if (vaddr_guard != MAP_FAILED && (vaddr_guard == (char*)vaddr_shadow + size)) {
                        // woo-hoo success!
-                        mUsesAshmem = fd;
+                        mAshmemFd = fd;
                        data = vaddr;
                    }
                }
@@ -93,7 +98,7 @@ void* CircularBuffer::alloc(size_t size) noexcept {
        }
    }

-    if (UTILS_UNLIKELY(mUsesAshmem < 0)) {
+    if (UTILS_UNLIKELY(mAshmemFd < 0)) {
        // ashmem failed
        if (vaddr_guard != MAP_FAILED) {
            munmap(vaddr_guard, size);
@@ -137,9 +142,9 @@ void CircularBuffer::dealloc() noexcept {
    if (mData) {
        size_t const BLOCK_SIZE = getBlockSize();
        munmap(mData, mSize * 2 + BLOCK_SIZE);
-        if (mUsesAshmem >= 0) {
-            close(mUsesAshmem);
-            mUsesAshmem = -1;
+        if (mAshmemFd >= 0) {
+            close(mAshmemFd);
+            mAshmemFd = -1;
        }
    }
 #else
@@ -149,23 +154,37 @@ void CircularBuffer::dealloc() noexcept {
 }


-void CircularBuffer::circularize() noexcept {
-    if (mUsesAshmem > 0) {
-        intptr_t const overflow = intptr_t(mHead) - (intptr_t(mData) + ssize_t(mSize));
-        if (overflow >= 0) {
-            assert_invariant(size_t(overflow) <= mSize);
-            mHead = (void *) (intptr_t(mData) + overflow);
-            #ifndef NDEBUG
-            memset(mData, 0xA5, size_t(overflow));
-            #endif
-        }
-    } else {
-        // Only circularize if mHead if in the second buffer.
-        if (intptr_t(mHead) - intptr_t(mData) > ssize_t(mSize)) {
+CircularBuffer::Range CircularBuffer::getBuffer() noexcept {
+    Range const range{ .tail = mTail, .head = mHead };
+
+    char* const pData = static_cast<char*>(mData);
+    char const* const pEnd = pData + mSize;
+    char const* const pHead = static_cast<char const*>(mHead);
+    if (UTILS_UNLIKELY(pHead >= pEnd)) {
+        size_t const overflow = pHead - pEnd;
+        if (UTILS_LIKELY(mAshmemFd > 0)) {
+            assert_invariant(overflow <= mSize);
+            mHead = static_cast<void*>(pData + overflow);
+            // Data         Tail  End   Head              [virtual]
+            //  v             v    v     v
+            //  +-------------:----+-----:--------------+
+            //  |             :    |     :              |
+            //  +-----:------------+--------------------+
+            //       Head          |<------ copy ------>| [physical]
+        } else {
+            // Data         Tail  End   Head
+            //  v             v    v     v
+            //  +-------------:----+-----:--------------+
+            //  |             :    |     :              |
+            //  +-----|------------+-----|--------------+
+            //        |<---------------->|
+            //           sliding window
            mHead = mData;
        }
    }
    mTail = mHead;
+
+    return range;
 }

 } // namespace filament::backend
--- a/filament/backend/src/CommandBufferQueue.cpp
+++ b/filament/backend/src/CommandBufferQueue.cpp
@@ -15,23 +15,35 @@
 */

 #include "private/backend/CommandBufferQueue.h"
+#include "private/backend/CircularBuffer.h"
+#include "private/backend/CommandStream.h"

+#include <utils/compiler.h>
 #include <utils/Log.h>
-#include <utils/Systrace.h>
+#include <utils/Mutex.h>
+#include <utils/ostream.h>
 #include <utils/Panic.h>
+#include <utils/Systrace.h>
 #include <utils/debug.h>

-#include "private/backend/BackendUtils.h"
-#include "private/backend/CommandStream.h"
+#include <algorithm>
+#include <mutex>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+#include <stddef.h>
+#include <stdint.h>

 using namespace utils;

 namespace filament::backend {

-CommandBufferQueue::CommandBufferQueue(size_t requiredSize, size_t bufferSize)
+CommandBufferQueue::CommandBufferQueue(size_t requiredSize, size_t bufferSize, bool paused)
        : mRequiredSize((requiredSize + (CircularBuffer::getBlockSize() - 1u)) & ~(CircularBuffer::getBlockSize() -1u)),
          mCircularBuffer(bufferSize),
-          mFreeSpace(mCircularBuffer.size()) {
+          mFreeSpace(mCircularBuffer.size()),
+          mPaused(paused) {
    assert_invariant(mCircularBuffer.size() > requiredSize);
 }

@@ -45,6 +57,16 @@ void CommandBufferQueue::requestExit() {
    mCondition.notify_one();
 }

+void CommandBufferQueue::setPaused(bool paused) {
+    std::lock_guard<utils::Mutex> const lock(mLock);
+    if (paused) {
+        mPaused = true;
+    } else {
+        mPaused = false;
+        mCondition.notify_one();
+    }
+}
+
 bool CommandBufferQueue::isExitRequested() const {
    std::lock_guard<utils::Mutex> const lock(mLock);
    ASSERT_PRECONDITION( mExitRequested == 0 || mExitRequested == EXIT_REQUESTED,
@@ -65,55 +87,61 @@ void CommandBufferQueue::flush() noexcept {
    // always guaranteed to have enough space for the NoopCommand
    new(circularBuffer.allocate(sizeof(NoopCommand))) NoopCommand(nullptr);

-    // end of this slice
-    void* const head = circularBuffer.getHead();
+    const size_t requiredSize = mRequiredSize;

-    // beginning of this slice
-    void* const tail = circularBuffer.getTail();
+    // get the current buffer
+    auto const [begin, end] = circularBuffer.getBuffer();

-    // size of this slice
-    uint32_t const used = uint32_t(intptr_t(head) - intptr_t(tail));
+    assert_invariant(circularBuffer.empty());

-    circularBuffer.circularize();
+    // size of the current buffer
+    size_t const used = std::distance(
+            static_cast<char const*>(begin), static_cast<char const*>(end));

    std::unique_lock<utils::Mutex> lock(mLock);
-    mCommandBuffersToExecute.push_back({ tail, head });
+    mCommandBuffersToExecute.push_back({ begin, end });
+    mCondition.notify_one();

    // circular buffer is too small, we corrupted the stream
    ASSERT_POSTCONDITION(used <= mFreeSpace,
            "Backend CommandStream overflow. Commands are corrupted and unrecoverable.\n"
            "Please increase minCommandBufferSizeMB inside the Config passed to Engine::create.\n"
-            "Space used at this time: %u bytes",
-            (unsigned)used);
+            "Space used at this time: %u bytes, overflow: %u bytes",
+            (unsigned)used, unsigned(used - mFreeSpace));

    // wait until there is enough space in the buffer
    mFreeSpace -= used;
-    const size_t requiredSize = mRequiredSize;
+    if (UTILS_UNLIKELY(mFreeSpace < requiredSize)) {
+

 #ifndef NDEBUG
-    size_t totalUsed = circularBuffer.size() - mFreeSpace;
-    mHighWatermark = std::max(mHighWatermark, totalUsed);
-    if (UTILS_UNLIKELY(totalUsed > requiredSize)) {
-        slog.d << "CommandStream used too much space: " << totalUsed
-            << ", out of " << requiredSize << " (will block)" << io::endl;
-    }
+        size_t const totalUsed = circularBuffer.size() - mFreeSpace;
+        slog.d << "CommandStream used too much space (will block): "
+                << "needed space " << requiredSize << " out of " << mFreeSpace
+                << ", totalUsed=" << totalUsed << ", current=" << used
+                << ", queue size=" << mCommandBuffersToExecute.size() << " buffers"
+                << io::endl;
+
+        mHighWatermark = std::max(mHighWatermark, totalUsed);
 #endif

-    mCondition.notify_one();
-    if (UTILS_LIKELY(mFreeSpace < requiredSize)) {
        SYSTRACE_NAME("waiting: CircularBuffer::flush()");
+        ASSERT_POSTCONDITION(!mPaused,
+                "CommandStream is full, but since the rendering thread is paused, "
+                "the buffer cannot flush and we will deadlock. Instead, abort.");
        mCondition.wait(lock, [this, requiredSize]() -> bool {
+            // TODO: on macOS, we need to call pumpEvents from time to time
            return mFreeSpace >= requiredSize;
        });
    }
 }

-std::vector<CommandBufferQueue::Slice> CommandBufferQueue::waitForCommands() const {
+std::vector<CommandBufferQueue::Range> CommandBufferQueue::waitForCommands() const {
    if (!UTILS_HAS_THREADING) {
        return std::move(mCommandBuffersToExecute);
    }
    std::unique_lock<utils::Mutex> lock(mLock);
-    while (mCommandBuffersToExecute.empty() && !mExitRequested) {
+    while ((mCommandBuffersToExecute.empty() || mPaused) && !mExitRequested) {
        mCondition.wait(lock);
    }

@@ -123,7 +151,7 @@ std::vector<CommandBufferQueue::Slice> CommandBufferQueue::waitForCommands() con
    return std::move(mCommandBuffersToExecute);
 }

-void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Slice const& buffer) {
+void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Range const& buffer) {
    std::lock_guard<utils::Mutex> const lock(mLock);
    mFreeSpace += uintptr_t(buffer.end) - uintptr_t(buffer.begin);
    mCondition.notify_one();
--- a/filament/backend/src/CommandStream.cpp
+++ b/filament/backend/src/CommandStream.cpp
@@ -149,7 +149,7 @@ void CommandType<void (Driver::*)(ARGS...)>::Command<METHOD>::log() noexcept  {

 // ------------------------------------------------------------------------------------------------

-void CustomCommand::execute(Driver&, CommandBase* base, intptr_t* next) noexcept {
+void CustomCommand::execute(Driver&, CommandBase* base, intptr_t* next) {
    *next = CustomCommand::align(sizeof(CustomCommand));
    static_cast<CustomCommand*>(base)->mCommand();
    static_cast<CustomCommand*>(base)->~CustomCommand();
--- a/filament/backend/src/Driver.cpp
+++ b/filament/backend/src/Driver.cpp
@@ -214,7 +214,7 @@ size_t Driver::getElementTypeSize(ElementType type) noexcept {

 Driver::~Driver() noexcept = default;

-void Driver::execute(std::function<void(void)> const& fn) noexcept {
+void Driver::execute(std::function<void(void)> const& fn) {
    fn();
 }

--- a/filament/backend/src/DriverBase.h
+++ b/filament/backend/src/DriverBase.h
@@ -49,24 +49,28 @@ struct AcquiredImage;
 struct HwBase {
 };

-struct HwVertexBuffer : public HwBase {
-    AttributeArray attributes{};          // 8 * MAX_VERTEX_ATTRIBUTE_COUNT
-    uint32_t vertexCount{};               //   4
+
+struct HwVertexBufferInfo : public HwBase {
    uint8_t bufferCount{};                //   1
    uint8_t attributeCount{};             //   1
-    bool padding{};                       //   1
-    uint8_t bufferObjectsVersion{};       //   1 -> total struct is 136 bytes
-
-    HwVertexBuffer() noexcept = default;
-    HwVertexBuffer(uint8_t bufferCount, uint8_t attributeCount, uint32_t elementCount,
-            AttributeArray const& attributes) noexcept
-            : attributes(attributes),
-              vertexCount(elementCount),
-              bufferCount(bufferCount),
+    bool padding[2]{};                    //   2
+    HwVertexBufferInfo() noexcept = default;
+    HwVertexBufferInfo(uint8_t bufferCount, uint8_t attributeCount) noexcept
+            : bufferCount(bufferCount),
              attributeCount(attributeCount) {
    }
 };

+struct HwVertexBuffer : public HwBase {
+    uint32_t vertexCount{};               //   4
+    uint8_t bufferObjectsVersion{0xff};   //   1
+    bool padding[3]{};                    //   2
+    HwVertexBuffer() noexcept = default;
+    explicit HwVertexBuffer(uint32_t vertextCount) noexcept
+            : vertexCount(vertextCount) {
+    }
+};
+
 struct HwBufferObject : public HwBase {
    uint32_t byteCount{};

@@ -88,11 +92,6 @@ struct HwIndexBuffer : public HwBase {
 };

 struct HwRenderPrimitive : public HwBase {
-    uint32_t offset{};
-    uint32_t minIndex{};
-    uint32_t maxIndex{};
-    uint32_t count{};
-    uint32_t maxVertexCount{};
    PrimitiveType type = PrimitiveType::TRIANGLES;
 };

@@ -114,7 +113,9 @@ struct HwTexture : public HwBase {
    uint8_t levels : 4;  // This allows up to 15 levels (max texture size of 32768 x 32768)
    uint8_t samples : 4; // Sample count per pixel (should always be a power of 2)
    TextureFormat format{};
+    uint8_t reserved0 = 0;
    TextureUsage usage{};
+    uint16_t reserved1 = 0;
    HwStream* hwStream = nullptr;

    HwTexture() noexcept : levels{}, samples{} {}
--- a/filament/backend/src/HandleAllocator.cpp
+++ b/filament/backend/src/HandleAllocator.cpp
@@ -16,9 +16,22 @@

 #include "private/backend/HandleAllocator.h"

+#include <backend/Handle.h>
+
+#include <utils/Allocator.h>
+#include <utils/Log.h>
 #include <utils/Panic.h>
+#include <utils/compiler.h>
+#include <utils/debug.h>
+#include <utils/ostream.h>
+
+#include <algorithm>
+#include <exception>
+#include <limits>
+#include <mutex>

 #include <stdlib.h>
+#include <string.h>

 namespace filament::backend {

@@ -26,23 +39,47 @@ using namespace utils;

 template <size_t P0, size_t P1, size_t P2>
 UTILS_NOINLINE
-HandleAllocator<P0, P1, P2>::Allocator::Allocator(AreaPolicy::HeapArea const& area)
-        : mArea(area) {
-    // TODO: we probably need a better way to set the size of these pools
-    const size_t unit = area.size() / 32;
-    const size_t offsetPool1 =      unit;
-    const size_t offsetPool2 = 16 * unit;
-    char* const p = (char*)area.begin();
-    mPool0 = PoolAllocator< P0, 16>(p, p + offsetPool1);
-    mPool1 = PoolAllocator< P1, 16>(p + offsetPool1, p + offsetPool2);
-    mPool2 = PoolAllocator< P2, 16>(p + offsetPool2, area.end());
+HandleAllocator<P0, P1, P2>::Allocator::Allocator(AreaPolicy::HeapArea const& area,
+        bool disableUseAfterFreeCheck)
+        : mArea(area),
+          mUseAfterFreeCheckDisabled(disableUseAfterFreeCheck) {
+
+    // The largest handle this allocator can generate currently depends on the architecture's
+    // min alignment, typically 8 or 16 bytes.
+    // e.g. On Android armv8, the alignment is 16 bytes, so for a 1 MiB heap, the largest handle
+    //      index will be 65536. Note that this is not the same as the number of handles (which
+    //      will always be less).
+    // Because our maximum representable handle currently is 0x07FFFFFF, the maximum no-nonsensical
+    // heap size is 2 GiB, which amounts to 7.6 millions handles per pool (in the GL case).
+    size_t const maxHeapSize = std::min(area.size(), HANDLE_INDEX_MASK * getAlignment());
+
+    if (UTILS_UNLIKELY(maxHeapSize != area.size())) {
+        slog.w << "HandleAllocator heap size reduced to "
+               << maxHeapSize << " from " << area.size() << io::endl;
+    }
+
+    // make sure we start with a clean arena. This is needed to ensure that all blocks start
+    // with an age of 0.
+    memset(area.data(), 0, maxHeapSize);
+
+    // size the different pools so that they can all contain the same number of handles
+    size_t const count = maxHeapSize / (P0 + P1 + P2);
+    char* const p0 = static_cast<char*>(area.begin());
+    char* const p1 = p0 + count * P0;
+    char* const p2 = p1 + count * P1;
+
+    mPool0 = Pool<P0>(p0, count * P0);
+    mPool1 = Pool<P1>(p1, count * P1);
+    mPool2 = Pool<P2>(p2, count * P2);
 }

 // ------------------------------------------------------------------------------------------------

 template <size_t P0, size_t P1, size_t P2>
-HandleAllocator<P0, P1, P2>::HandleAllocator(const char* name, size_t size) noexcept
-    : mHandleArena(name, size) {
+HandleAllocator<P0, P1, P2>::HandleAllocator(const char* name, size_t size,
+        bool disableUseAfterFreeCheck) noexcept
+    : mHandleArena(name, size, disableUseAfterFreeCheck),
+      mUseAfterFreeCheckDisabled(disableUseAfterFreeCheck) {
 }

 template <size_t P0, size_t P1, size_t P2>
@@ -70,14 +107,20 @@ void* HandleAllocator<P0, P1, P2>::handleToPointerSlow(HandleBase::HandleId id)
 }

 template <size_t P0, size_t P1, size_t P2>
-HandleBase::HandleId HandleAllocator<P0, P1, P2>::allocateHandleSlow(size_t size) noexcept {
+HandleBase::HandleId HandleAllocator<P0, P1, P2>::allocateHandleSlow(size_t size) {
    void* p = ::malloc(size);
    std::unique_lock lock(mLock);
-    HandleBase::HandleId id = (++mId) | HEAP_HANDLE_FLAG;
+
+    HandleBase::HandleId id = (++mId) | HANDLE_HEAP_FLAG;
+
+    ASSERT_POSTCONDITION(mId < HANDLE_HEAP_FLAG,
+            "No more Handle ids available! This can happen if HandleAllocator arena has been full"
+            " for a while. Please increase FILAMENT_OPENGL_HANDLE_ARENA_SIZE_IN_MB");
+
    mOverflowMap.emplace(id, p);
    lock.unlock();

-    if (UTILS_UNLIKELY(id == (HEAP_HANDLE_FLAG|1u))) { // meaning id was zero
+    if (UTILS_UNLIKELY(id == (HANDLE_HEAP_FLAG | 1u))) { // meaning id was zero
        PANIC_LOG("HandleAllocator arena is full, using slower system heap. Please increase "
                  "the appropriate constant (e.g. FILAMENT_OPENGL_HANDLE_ARENA_SIZE_IN_MB).");
    }
@@ -86,7 +129,7 @@ HandleBase::HandleId HandleAllocator<P0, P1, P2>::allocateHandleSlow(size_t size

 template <size_t P0, size_t P1, size_t P2>
 void HandleAllocator<P0, P1, P2>::deallocateHandleSlow(HandleBase::HandleId id, size_t) noexcept {
-    assert_invariant(id & HEAP_HANDLE_FLAG);
+    assert_invariant(id & HANDLE_HEAP_FLAG);
    void* p = nullptr;
    auto& overflowMap = mOverflowMap;

--- a/filament/backend/src/Platform.cpp
+++ b/filament/backend/src/Platform.cpp
@@ -53,4 +53,18 @@ size_t Platform::retrieveBlob(void const* key, size_t keySize, void* value, size
    return 0;
 }

+void Platform::setDebugUpdateStatFunc(DebugUpdateStatFunc&& debugUpdateStat) noexcept {
+    mDebugUpdateStat = std::move(debugUpdateStat);
+}
+
+bool Platform::hasDebugUpdateStatFunc() const noexcept {
+    return bool(mDebugUpdateStat);
+}
+
+void Platform::debugUpdateStat(const char* key, uint64_t value) {
+    if (mDebugUpdateStat) {
+        mDebugUpdateStat(key, value);
+    }
+}
+
 } // namespace filament::backend
--- a/filament/backend/src/Program.cpp
+++ b/filament/backend/src/Program.cpp
@@ -91,6 +91,11 @@ Program& Program::cacheId(uint64_t cacheId) noexcept {
    return *this;
 }

+Program& Program::multiview(bool multiview) noexcept {
+    mMultiview = multiview;
+    return *this;
+}
+
 io::ostream& operator<<(io::ostream& out, const Program& builder) {
    out << "Program{";
    builder.mLogger(out);
--- a/filament/backend/src/metal/MetalBuffer.h
+++ b/filament/backend/src/metal/MetalBuffer.h
@@ -18,7 +18,6 @@
 #define TNT_FILAMENT_DRIVER_METALBUFFER_H

 #include "MetalContext.h"
-#include "MetalBufferPool.h"

 #include <backend/DriverEnums.h>

@@ -28,9 +27,50 @@

 #include <utility>
 #include <memory>
+#include <atomic>

 namespace filament::backend {

+class TrackedMetalBuffer {
+public:
+    TrackedMetalBuffer() noexcept : mBuffer(nil) {}
+    TrackedMetalBuffer(id<MTLBuffer> buffer) noexcept : mBuffer(buffer) {
+        if (buffer) {
+            aliveBuffers++;
+        }
+    }
+    ~TrackedMetalBuffer() {
+        if (mBuffer) {
+            aliveBuffers--;
+        }
+    }
+
+    TrackedMetalBuffer(TrackedMetalBuffer&&) = delete;
+    TrackedMetalBuffer(TrackedMetalBuffer const&) = delete;
+    TrackedMetalBuffer& operator=(TrackedMetalBuffer const&) = delete;
+
+    TrackedMetalBuffer& operator=(TrackedMetalBuffer&& rhs) noexcept {
+        swap(rhs);
+        return *this;
+    }
+
+    id<MTLBuffer> get() const noexcept { return mBuffer; }
+    operator bool() const noexcept { return bool(mBuffer); }
+
+    static uint64_t getAliveBuffers() { return aliveBuffers; }
+
+private:
+    void swap(TrackedMetalBuffer& other) noexcept {
+        id<MTLBuffer> temp = mBuffer;
+        mBuffer = other.mBuffer;
+        other.mBuffer = temp;
+    }
+
+    id<MTLBuffer> mBuffer;
+
+    static std::atomic<uint64_t> aliveBuffers;
+};
+
 class MetalBuffer {
 public:

@@ -82,7 +122,7 @@ public:

 private:

-    id<MTLBuffer> mBuffer = nil;
+    TrackedMetalBuffer mBuffer;
    size_t mBufferSize = 0;
    void* mCpuBuffer = nullptr;
    MetalContext& mContext;
@@ -151,7 +191,7 @@ public:
            // finishes executing.
            mAuxBuffer = [mDevice newBufferWithLength:mSlotSizeBytes options:mBufferOptions];
            assert_invariant(mAuxBuffer);
-            return {mAuxBuffer, 0};
+            return {mAuxBuffer.get(), 0};
        }
        mCurrentSlot = (mCurrentSlot + 1) % mSlotCount;
        mOccupiedSlots->fetch_add(1, std::memory_order_relaxed);
@@ -180,9 +220,9 @@ public:
     */
    std::pair<id<MTLBuffer>, NSUInteger> getCurrentAllocation() const {
        if (UTILS_UNLIKELY(mAuxBuffer)) {
-            return { mAuxBuffer, 0 };
+            return { mAuxBuffer.get(), 0 };
        }
-        return { mBuffer, mCurrentSlot * mSlotSizeBytes };
+        return { mBuffer.get(), mCurrentSlot * mSlotSizeBytes };
    }

    bool canAccomodateLayout(MTLSizeAndAlign layout) const {
@@ -191,8 +231,8 @@ public:

 private:
    id<MTLDevice> mDevice;
-    id<MTLBuffer> mBuffer;
-    id<MTLBuffer> mAuxBuffer;
+    TrackedMetalBuffer mBuffer;
+    TrackedMetalBuffer mAuxBuffer;

    MTLResourceOptions mBufferOptions;

--- a/filament/backend/src/metal/MetalBuffer.mm
+++ b/filament/backend/src/metal/MetalBuffer.mm
@@ -15,12 +15,15 @@
 */

 #include "MetalBuffer.h"
+#include "MetalBufferPool.h"

 #include "MetalContext.h"

 namespace filament {
 namespace backend {

+std::atomic<uint64_t> TrackedMetalBuffer::aliveBuffers = 0;
+
 MetalBuffer::MetalBuffer(MetalContext& context, BufferObjectBinding bindingType, BufferUsage usage,
        size_t size, bool forceGpuBuffer) : mBufferSize(size), mContext(context) {
    // If the buffer is less than 4K in size and is updated frequently, we don't use an explicit
@@ -61,7 +64,7 @@ void MetalBuffer::copyIntoBuffer(void* src, size_t size, size_t byteOffset) {
    // Acquire a staging buffer to hold the contents of this update.
    MetalBufferPool* bufferPool = mContext.bufferPool;
    const MetalBufferPoolEntry* const staging = bufferPool->acquireBuffer(size);
-    memcpy(staging->buffer.contents, src, size);
+    memcpy(staging->buffer.get().contents, src, size);

    // The blit below requires that byteOffset be a multiple of 4.
    ASSERT_PRECONDITION(!(byteOffset & 0x3u), "byteOffset must be a multiple of 4");
@@ -70,9 +73,9 @@ void MetalBuffer::copyIntoBuffer(void* src, size_t size, size_t byteOffset) {
    id<MTLCommandBuffer> cmdBuffer = getPendingCommandBuffer(&mContext);
    id<MTLBlitCommandEncoder> blitEncoder = [cmdBuffer blitCommandEncoder];
    blitEncoder.label = @"Buffer upload blit";
-    [blitEncoder copyFromBuffer:staging->buffer
+    [blitEncoder copyFromBuffer:staging->buffer.get()
                   sourceOffset:0
-                       toBuffer:mBuffer
+                       toBuffer:mBuffer.get()
              destinationOffset:byteOffset
                           size:size];
    [blitEncoder endEncoding];
@@ -93,7 +96,7 @@ id<MTLBuffer> MetalBuffer::getGpuBufferForDraw(id<MTLCommandBuffer> cmdBuffer) n
        return nil;
    }
    assert_invariant(mBuffer);
-    return mBuffer;
+    return mBuffer.get();
 }

 void MetalBuffer::bindBuffers(id<MTLCommandBuffer> cmdBuffer, id<MTLCommandEncoder> encoder,
--- a/filament/backend/src/metal/MetalBufferPool.h
+++ b/filament/backend/src/metal/MetalBufferPool.h
@@ -19,6 +19,8 @@

 #include <Metal/Metal.h>

+#include "MetalBuffer.h"
+
 #include <map>
 #include <mutex>
 #include <unordered_set>
@@ -30,7 +32,7 @@ struct MetalContext;

 // Immutable POD representing a shared CPU-GPU buffer.
 struct MetalBufferPoolEntry {
-    id<MTLBuffer> buffer;
+    TrackedMetalBuffer buffer;
    size_t capacity;
    mutable uint64_t lastAccessed;
    mutable uint32_t referenceCount;
--- a/filament/backend/src/metal/MetalBufferPool.mm
+++ b/filament/backend/src/metal/MetalBufferPool.mm
@@ -45,12 +45,12 @@ MetalBufferPoolEntry const* MetalBufferPool::acquireBuffer(size_t numBytes) {
    id<MTLBuffer> buffer = [mContext.device newBufferWithLength:numBytes
                                                        options:MTLResourceStorageModeShared];
    ASSERT_POSTCONDITION(buffer, "Could not allocate Metal staging buffer of size %zu.", numBytes);
-    MetalBufferPoolEntry* stage = new MetalBufferPoolEntry({
+    MetalBufferPoolEntry* stage = new MetalBufferPoolEntry {
        .buffer = buffer,
        .capacity = numBytes,
        .lastAccessed = mCurrentFrame,
        .referenceCount = 1
-    });
+    };
    mUsedStages.insert(stage);

    return stage;
--- a/filament/backend/src/metal/MetalContext.h
+++ b/filament/backend/src/metal/MetalContext.h
@@ -99,6 +99,7 @@ struct MetalContext {
    std::array<BufferState, MAX_SSBO_COUNT> ssboState;
    CullModeStateTracker cullModeState;
    WindingStateTracker windingState;
+    Handle<HwRenderPrimitive> currentRenderPrimitive;

    // State caches.
    DepthStencilStateCache depthStencilStateCache;
--- a/filament/backend/src/metal/MetalDriver.h
+++ b/filament/backend/src/metal/MetalDriver.h
@@ -133,12 +133,9 @@ private:
        mHandleAllocator.deallocate(handle, p);
    }

-    inline void setRenderPrimitiveBuffer(Handle<HwRenderPrimitive> rph,
+    inline void setRenderPrimitiveBuffer(Handle<HwRenderPrimitive> rph, PrimitiveType pt,
            Handle<HwVertexBuffer> vbh, Handle<HwIndexBuffer> ibh);

-    inline void setRenderPrimitiveRange(Handle<HwRenderPrimitive> rph, PrimitiveType pt,
-            uint32_t offset, uint32_t minIndex, uint32_t maxIndex, uint32_t count);
-
    void finalizeSamplerGroup(MetalSamplerGroup* sg);
    void enumerateBoundBuffers(BufferObjectBinding bindingType,
            const std::function<void(const BufferState&, MetalBuffer*, uint32_t)>& f);
--- a/filament/backend/src/metal/MetalDriver.mm
+++ b/filament/backend/src/metal/MetalDriver.mm
@@ -20,6 +20,7 @@
 #include "metal/MetalDriver.h"

 #include "MetalBlitter.h"
+#include "MetalBufferPool.h"
 #include "MetalContext.h"
 #include "MetalDriverFactory.h"
 #include "MetalEnums.h"
@@ -36,6 +37,7 @@

 #include <utils/Log.h>
 #include <utils/Panic.h>
+#include <utils/sstream.h>

 #include <algorithm>

@@ -43,6 +45,42 @@ namespace filament {
 namespace backend {

 Driver* MetalDriverFactory::create(MetalPlatform* const platform, const Platform::DriverConfig& driverConfig) {
+#if 0
+    // this is useful for development, but too verbose even for debug builds
+    // For reference on a 64-bits machine in Release mode:
+    //    MetalTimerQuery              :  16       few
+    //    HwStream                     :  24       few
+    //    MetalRenderPrimitive         :  24       many
+    //    MetalVertexBuffer            :  32       moderate
+    // -- less than or equal 32 bytes
+    //    MetalIndexBuffer             :  40       moderate
+    //    MetalFence                   :  48       few
+    //    MetalBufferObject            :  48       many
+    // -- less than or equal 48 bytes
+    //    MetalSamplerGroup            : 112       few
+    //    MetalProgram                 : 152       moderate
+    //    MetalTexture                 : 152       moderate
+    //    MetalSwapChain               : 184       few
+    //    MetalRenderTarget            : 272       few
+    //    MetalVertexBufferInfo        : 552       moderate
+    // -- less than or equal to 552 bytes
+
+    utils::slog.d
+           << "\nMetalSwapChain: " << sizeof(MetalSwapChain)
+           << "\nMetalBufferObject: " << sizeof(MetalBufferObject)
+           << "\nMetalVertexBuffer: " << sizeof(MetalVertexBuffer)
+           << "\nMetalVertexBufferInfo: " << sizeof(MetalVertexBufferInfo)
+           << "\nMetalIndexBuffer: " << sizeof(MetalIndexBuffer)
+           << "\nMetalSamplerGroup: " << sizeof(MetalSamplerGroup)
+           << "\nMetalRenderPrimitive: " << sizeof(MetalRenderPrimitive)
+           << "\nMetalTexture: " << sizeof(MetalTexture)
+           << "\nMetalTimerQuery: " << sizeof(MetalTimerQuery)
+           << "\nHwStream: " << sizeof(HwStream)
+           << "\nMetalRenderTarget: " << sizeof(MetalRenderTarget)
+           << "\nMetalFence: " << sizeof(MetalFence)
+           << "\nMetalProgram: " << sizeof(MetalProgram)
+           << utils::io::endl;
+#endif
    return MetalDriver::create(platform, driverConfig);
 }

@@ -62,7 +100,9 @@ Dispatcher MetalDriver::getDispatcher() const noexcept {
 MetalDriver::MetalDriver(MetalPlatform* platform, const Platform::DriverConfig& driverConfig) noexcept
        : mPlatform(*platform),
          mContext(new MetalContext(driverConfig.textureUseAfterFreePoolSize)),
-          mHandleAllocator("Handles", driverConfig.handleArenaSize) {
+          mHandleAllocator("Handles",
+                  driverConfig.handleArenaSize,
+                  driverConfig.disableHandleUseAfterFreeCheck) {
    mContext->driver = this;

    mContext->device = mPlatform.createDevice();
@@ -144,7 +184,10 @@ MetalDriver::MetalDriver(MetalPlatform* platform, const Platform::DriverConfig&
        mContext->eventListener = [[MTLSharedEventListener alloc] initWithDispatchQueue:queue];
    }

-    mContext->shaderCompiler = new MetalShaderCompiler(mContext->device, *this);
+    const MetalShaderCompiler::Mode compilerMode = driverConfig.disableParallelShaderCompile
+            ? MetalShaderCompiler::Mode::SYNCHRONOUS
+            : MetalShaderCompiler::Mode::ASYNCHRONOUS;
+    mContext->shaderCompiler = new MetalShaderCompiler(mContext->device, *this, compilerMode);
    mContext->shaderCompiler->init();

 #if defined(FILAMENT_METAL_PROFILING)
@@ -173,6 +216,9 @@ void MetalDriver::beginFrame(int64_t monotonic_clock_ns, uint32_t frameId) {
 #if defined(FILAMENT_METAL_PROFILING)
    os_signpost_interval_begin(mContext->log, mContext->signpostId, "Frame encoding", "%{public}d", frameId);
 #endif
+    if (mPlatform.hasDebugUpdateStatFunc()) {
+        mPlatform.debugUpdateStat("filament.metal.alive_buffers", TrackedMetalBuffer::getAliveBuffers());
+    }
 }

 void MetalDriver::setFrameScheduledCallback(Handle<HwSwapChain> sch,
@@ -245,10 +291,16 @@ void MetalDriver::finish(int) {
    [oneOffBuffer waitUntilCompleted];
 }

-void MetalDriver::createVertexBufferR(Handle<HwVertexBuffer> vbh, uint8_t bufferCount,
-        uint8_t attributeCount, uint32_t vertexCount, AttributeArray attributes) {
-    construct_handle<MetalVertexBuffer>(vbh, *mContext, bufferCount,
-            attributeCount, vertexCount, attributes);
+void MetalDriver::createVertexBufferInfoR(Handle<HwVertexBufferInfo> vbih, uint8_t bufferCount,
+        uint8_t attributeCount, AttributeArray attributes) {
+    construct_handle<MetalVertexBufferInfo>(vbih, *mContext,
+            bufferCount, attributeCount, attributes);
+}
+
+void MetalDriver::createVertexBufferR(Handle<HwVertexBuffer> vbh,
+        uint32_t vertexCount, Handle<HwVertexBufferInfo> vbih) {
+    MetalVertexBufferInfo const* const vbi = handle_cast<const MetalVertexBufferInfo>(vbih);
+    construct_handle<MetalVertexBuffer>(vbh, *mContext, vertexCount, vbi->bufferCount, vbih);
 }

 void MetalDriver::createIndexBufferR(Handle<HwIndexBuffer> ibh, ElementType elementType,
@@ -316,11 +368,9 @@ void MetalDriver::createSamplerGroupR(

 void MetalDriver::createRenderPrimitiveR(Handle<HwRenderPrimitive> rph,
        Handle<HwVertexBuffer> vbh, Handle<HwIndexBuffer> ibh,
-        PrimitiveType pt, uint32_t offset,
-        uint32_t minIndex, uint32_t maxIndex, uint32_t count) {
+        PrimitiveType pt) {
    construct_handle<MetalRenderPrimitive>(rph);
-    MetalDriver::setRenderPrimitiveBuffer(rph, vbh, ibh);
-    MetalDriver::setRenderPrimitiveRange(rph, pt, offset, minIndex, maxIndex, count);
+    MetalDriver::setRenderPrimitiveBuffer(rph, pt, vbh, ibh);
 }

 void MetalDriver::createProgramR(Handle<HwProgram> rph, Program&& program) {
@@ -333,7 +383,7 @@ void MetalDriver::createDefaultRenderTargetR(Handle<HwRenderTarget> rth, int dum

 void MetalDriver::createRenderTargetR(Handle<HwRenderTarget> rth,
        TargetBufferFlags targetBufferFlags, uint32_t width, uint32_t height,
-        uint8_t samples, MRT color,
+        uint8_t samples, uint8_t layerCount, MRT color,
        TargetBufferInfo depth, TargetBufferInfo stencil) {
    ASSERT_PRECONDITION(!isInRenderPass(mContext),
            "createRenderTarget must be called outside of a render pass.");
@@ -406,6 +456,10 @@ void MetalDriver::createTimerQueryR(Handle<HwTimerQuery> tqh, int) {
    // nothing to do, timer query was constructed in createTimerQueryS
 }

+Handle<HwVertexBufferInfo> MetalDriver::createVertexBufferInfoS() noexcept {
+    return alloc_handle<MetalVertexBufferInfo>();
+}
+
 Handle<HwVertexBuffer> MetalDriver::createVertexBufferS() noexcept {
    return alloc_handle<MetalVertexBuffer>();
 }
@@ -470,6 +524,12 @@ Handle<HwTimerQuery> MetalDriver::createTimerQueryS() noexcept {
    return alloc_and_construct_handle<MetalTimerQuery, HwTimerQuery>();
 }

+void MetalDriver::destroyVertexBufferInfo(Handle<HwVertexBufferInfo> vbih) {
+    if (vbih) {
+        destruct_handle<MetalVertexBufferInfo>(vbih);
+    }
+}
+
 void MetalDriver::destroyVertexBuffer(Handle<HwVertexBuffer> vbh) {
    if (vbh) {
        destruct_handle<MetalVertexBuffer>(vbh);
@@ -722,18 +782,33 @@ bool MetalDriver::isSRGBSwapChainSupported() {
    return false;
 }

-bool MetalDriver::isStereoSupported() {
-    return true;
+bool MetalDriver::isProtectedContentSupported() {
+    // the SWAP_CHAIN_CONFIG_PROTECTED_CONTENT flag is not supported
+    return false;
+}
+
+bool MetalDriver::isStereoSupported(backend::StereoscopicType stereoscopicType) {
+    switch (stereoscopicType) {
+    case backend::StereoscopicType::INSTANCED:
+        return true;
+    case backend::StereoscopicType::MULTIVIEW:
+        // TODO: implement multiview feature in Metal.
+        return false;
+    }
 }

 bool MetalDriver::isParallelShaderCompileSupported() {
-    return true;
+    return mContext->shaderCompiler->isParallelShaderCompileSupported();
 }

 bool MetalDriver::isDepthStencilResolveSupported() {
    return false;
 }

+bool MetalDriver::isProtectedTexturesSupported() {
+    return false;
+}
+
 bool MetalDriver::isWorkaroundNeeded(Workaround workaround) {
    switch (workaround) {
        case Workaround::SPLIT_EASU:
@@ -860,9 +935,10 @@ void MetalDriver::setExternalImagePlane(Handle<HwTexture> th, void* image, uint3
 void MetalDriver::setExternalStream(Handle<HwTexture> th, Handle<HwStream> sh) {
 }

-bool MetalDriver::getTimerQueryValue(Handle<HwTimerQuery> tqh, uint64_t* elapsedTime) {
+TimerQueryResult MetalDriver::getTimerQueryValue(Handle<HwTimerQuery> tqh, uint64_t* elapsedTime) {
    auto* tq = handle_cast<MetalTimerQuery>(tqh);
-    return mContext->timerQueryImpl->getQueryResult(tq, elapsedTime);
+    return mContext->timerQueryImpl->getQueryResult(tq, elapsedTime) ?
+           TimerQueryResult::AVAILABLE : TimerQueryResult::NOT_READY;
 }

 void MetalDriver::generateMipmaps(Handle<HwTexture> th) {
@@ -1029,23 +1105,14 @@ void MetalDriver::endRenderPass(int dummy) {
    mContext->currentRenderPassEncoder = nil;
 }

-void MetalDriver::setRenderPrimitiveBuffer(Handle<HwRenderPrimitive> rph,
+void MetalDriver::setRenderPrimitiveBuffer(Handle<HwRenderPrimitive> rph, PrimitiveType pt,
        Handle<HwVertexBuffer> vbh, Handle<HwIndexBuffer> ibh) {
    auto primitive = handle_cast<MetalRenderPrimitive>(rph);
    auto vertexBuffer = handle_cast<MetalVertexBuffer>(vbh);
    auto indexBuffer = handle_cast<MetalIndexBuffer>(ibh);
-    primitive->setBuffers(vertexBuffer, indexBuffer);
-}
-
-void MetalDriver::setRenderPrimitiveRange(Handle<HwRenderPrimitive> rph,
-        PrimitiveType pt, uint32_t offset, uint32_t minIndex, uint32_t maxIndex,
-        uint32_t count) {
-    auto primitive = handle_cast<MetalRenderPrimitive>(rph);
+    MetalVertexBufferInfo const* const vbi = handle_cast<MetalVertexBufferInfo>(vertexBuffer->vbih);
+    primitive->setBuffers(vbi, vertexBuffer, indexBuffer);
    primitive->type = pt;
-    primitive->offset = offset * primitive->indexBuffer->elementSize;
-    primitive->count = count;
-    primitive->minIndex = minIndex;
-    primitive->maxIndex = maxIndex > minIndex ? maxIndex : primitive->maxVertexCount - 1;
 }

 void MetalDriver::makeCurrent(Handle<HwSwapChain> schDraw, Handle<HwSwapChain> schRead) {
@@ -1547,10 +1614,13 @@ void MetalDriver::finalizeSamplerGroup(MetalSamplerGroup* samplerGroup) {
    }
 }

-void MetalDriver::draw(PipelineState ps, Handle<HwRenderPrimitive> rph, uint32_t instanceCount) {
+void MetalDriver::bindPipeline(PipelineState ps) {
    ASSERT_PRECONDITION(mContext->currentRenderPassEncoder != nullptr,
-            "Attempted to draw without a valid command encoder.");
-    auto primitive = handle_cast<MetalRenderPrimitive>(rph);
+            "bindPipeline() without a valid command encoder.");
+
+    MetalVertexBufferInfo const* const vbi =
+            handle_cast<MetalVertexBufferInfo>(ps.vertexBufferInfo);
+
    auto program = handle_cast<MetalProgram>(ps.program);
    const auto& rs = ps.rasterState;

@@ -1564,7 +1634,7 @@ void MetalDriver::draw(PipelineState ps, Handle<HwRenderPrimitive> rph, uint32_t
        return;
    }

-    ASSERT_PRECONDITION(bool(functions), "Attempting to draw with an invalid Metal program.");
+    functions.validate();

    auto [fragment, vertex] = functions.getRasterFunctions();

@@ -1591,7 +1661,7 @@ void MetalDriver::draw(PipelineState ps, Handle<HwRenderPrimitive> rph, uint32_t
    MetalPipelineState const pipelineState {
        .vertexFunction = vertex,
        .fragmentFunction = fragment,
-        .vertexDescription = primitive->vertexDescription,
+        .vertexDescription = vbi->vertexDescription,
        .colorAttachmentPixelFormat = {
            colorPixelFormat[0],
            colorPixelFormat[1],
@@ -1685,50 +1755,6 @@ void MetalDriver::draw(PipelineState ps, Handle<HwRenderPrimitive> rph, uint32_t
        mContext->currentPolygonOffset = ps.polygonOffset;
    }

-    // Set scissor-rectangle.
-    // In order to do this, we compute the intersection between:
-    //  1. the scissor rectangle
-    //  2. the render target attachment dimensions (important, as the scissor can't be set larger)
-    // fmax/min are used below to guard against NaN and because the MTLViewport/MTLRegion
-    // coordinates are doubles.
-    MTLRegion scissor = mContext->currentRenderTarget->getRegionFromClientRect(ps.scissor);
-    const float sleft = scissor.origin.x, sright = scissor.origin.x + scissor.size.width;
-    const float stop = scissor.origin.y, sbottom = scissor.origin.y + scissor.size.height;
-
-    // Attachment extent
-    const auto attachmentSize = mContext->currentRenderTarget->getAttachmentSize();
-    const float aleft = 0.0f, atop = 0.0f;
-    const float aright = static_cast<float>(attachmentSize.x);
-    const float abottom = static_cast<float>(attachmentSize.y);
-
-    const auto left   = std::fmax(sleft, aleft);
-    const auto right  = std::fmin(sright, aright);
-    const auto top    = std::fmax(stop, atop);
-    const auto bottom = std::fmin(sbottom, abottom);
-
-    MTLScissorRect scissorRect = {
-        .x      = static_cast<NSUInteger>(left),
-        .y      = static_cast<NSUInteger>(top),
-        .width  = static_cast<NSUInteger>(right  - left),
-        .height = static_cast<NSUInteger>(bottom - top)
-    };
-
-    [mContext->currentRenderPassEncoder setScissorRect:scissorRect];
-
-    // Bind uniform buffers.
-    MetalBuffer* uniformsToBind[Program::UNIFORM_BINDING_COUNT] = { nil };
-    NSUInteger offsets[Program::UNIFORM_BINDING_COUNT] = { 0 };
-
-    enumerateBoundBuffers(BufferObjectBinding::UNIFORM,
-            [&uniformsToBind, &offsets](const BufferState& state, MetalBuffer* buffer,
-                    uint32_t index) {
-        uniformsToBind[index] = buffer;
-        offsets[index] = state.offset;
-    });
-    MetalBuffer::bindBuffers(getPendingCommandBuffer(mContext), mContext->currentRenderPassEncoder,
-            UNIFORM_BUFFER_BINDING_START, MetalBuffer::Stage::VERTEX | MetalBuffer::Stage::FRAGMENT,
-            uniformsToBind, offsets, Program::UNIFORM_BINDING_COUNT);
-
    // Bind sampler groups (argument buffers).
    for (size_t s = 0; s < Program::SAMPLER_BINDING_COUNT; s++) {
        MetalSamplerGroup* const samplerGroup = mContext->samplerBindings[s];
@@ -1759,19 +1785,29 @@ void MetalDriver::draw(PipelineState ps, Handle<HwRenderPrimitive> rph, uint32_t
                                                          atIndex:(SAMPLER_GROUP_BINDING_START + s)];
        }
    }
+}
+
+void MetalDriver::bindRenderPrimitive(Handle<HwRenderPrimitive> rph) {
+    ASSERT_PRECONDITION(mContext->currentRenderPassEncoder != nullptr,
+            "bindRenderPrimitive() without a valid command encoder.");

    // Bind the user vertex buffers.
-
    MetalBuffer* vertexBuffers[MAX_VERTEX_BUFFER_COUNT] = {};
    size_t vertexBufferOffsets[MAX_VERTEX_BUFFER_COUNT] = {};
    size_t maxBufferIndex = 0;

+    MetalRenderPrimitive const* const primitive = handle_cast<MetalRenderPrimitive>(rph);
+    MetalVertexBufferInfo const* const vbi =
+            handle_cast<MetalVertexBufferInfo>(primitive->vertexBuffer->vbih);
+
+    mContext->currentRenderPrimitive = rph;
+
    auto vb = primitive->vertexBuffer;
-    for (auto m : primitive->bufferMapping) {
+    for (auto m : vbi->bufferMapping) {
        assert_invariant(
                m.bufferArgumentIndex >= USER_VERTEX_BUFFER_BINDING_START &&
                m.bufferArgumentIndex < USER_VERTEX_BUFFER_BINDING_START + MAX_VERTEX_BUFFER_COUNT);
-        size_t vertexBufferIndex = m.bufferArgumentIndex - USER_VERTEX_BUFFER_BINDING_START;
+        size_t const vertexBufferIndex = m.bufferArgumentIndex - USER_VERTEX_BUFFER_BINDING_START;
        vertexBuffers[vertexBufferIndex] = vb->buffers[m.sourceBufferIndex];
        maxBufferIndex = std::max(maxBufferIndex, vertexBufferIndex);
    }
@@ -1786,19 +1822,50 @@ void MetalDriver::draw(PipelineState ps, Handle<HwRenderPrimitive> rph, uint32_t
    [mContext->currentRenderPassEncoder setVertexBytes:bytes
                                                length:16
                                               atIndex:ZERO_VERTEX_BUFFER_BINDING];
+}
+
+void MetalDriver::draw2(uint32_t indexOffset, uint32_t indexCount, uint32_t instanceCount) {
+    ASSERT_PRECONDITION(mContext->currentRenderPassEncoder != nullptr,
+            "draw() without a valid command encoder.");
+
+    // Bind uniform buffers.
+    MetalBuffer* uniformsToBind[Program::UNIFORM_BINDING_COUNT] = { nil };
+    NSUInteger offsets[Program::UNIFORM_BINDING_COUNT] = { 0 };
+
+    enumerateBoundBuffers(BufferObjectBinding::UNIFORM,
+            [&uniformsToBind, &offsets](const BufferState& state, MetalBuffer* buffer,
+                    uint32_t index) {
+                uniformsToBind[index] = buffer;
+                offsets[index] = state.offset;
+            });
+    MetalBuffer::bindBuffers(getPendingCommandBuffer(mContext), mContext->currentRenderPassEncoder,
+            UNIFORM_BUFFER_BINDING_START, MetalBuffer::Stage::VERTEX | MetalBuffer::Stage::FRAGMENT,
+            uniformsToBind, offsets, Program::UNIFORM_BINDING_COUNT);
+
+    auto primitive = handle_cast<MetalRenderPrimitive>(mContext->currentRenderPrimitive);

    MetalIndexBuffer* indexBuffer = primitive->indexBuffer;

    id<MTLCommandBuffer> cmdBuffer = getPendingCommandBuffer(mContext);
    id<MTLBuffer> metalIndexBuffer = indexBuffer->buffer.getGpuBufferForDraw(cmdBuffer);
    [mContext->currentRenderPassEncoder drawIndexedPrimitives:getMetalPrimitiveType(primitive->type)
-                                                   indexCount:primitive->count
+                                                   indexCount:indexCount
                                                    indexType:getIndexType(indexBuffer->elementSize)
                                                  indexBuffer:metalIndexBuffer
-                                            indexBufferOffset:primitive->offset
+                                            indexBufferOffset:indexOffset * primitive->indexBuffer->elementSize
                                                instanceCount:instanceCount];
 }

+void MetalDriver::draw(PipelineState ps, Handle<HwRenderPrimitive> rph,
+        uint32_t const indexOffset, uint32_t const indexCount, uint32_t const instanceCount) {
+    MetalRenderPrimitive const* const rp = handle_cast<MetalRenderPrimitive>(rph);
+    ps.primitiveType = rp->type;
+    ps.vertexBufferInfo = rp->vertexBuffer->vbih;
+    bindPipeline(ps);
+    bindRenderPrimitive(rph);
+    draw2(indexOffset, indexCount, instanceCount);
+}
+
 void MetalDriver::dispatchCompute(Handle<HwProgram> program, math::uint3 workGroupCount) {
    ASSERT_PRECONDITION(!isInRenderPass(mContext),
            "dispatchCompute must be called outside of a render pass.");
@@ -1868,6 +1935,38 @@ void MetalDriver::dispatchCompute(Handle<HwProgram> program, math::uint3 workGro
    [computeEncoder endEncoding];
 }

+void MetalDriver::scissor(Viewport scissorBox) {
+    // Set scissor-rectangle.
+    // In order to do this, we compute the intersection between:
+    //  1. the scissor rectangle
+    //  2. the render target attachment dimensions (important, as the scissor can't be set larger)
+    // fmax/min are used below to guard against NaN and because the MTLViewport/MTLRegion
+    // coordinates are doubles.
+    MTLRegion scissor = mContext->currentRenderTarget->getRegionFromClientRect(scissorBox);
+    const float sleft = scissor.origin.x, sright = scissor.origin.x + scissor.size.width;
+    const float stop = scissor.origin.y, sbottom = scissor.origin.y + scissor.size.height;
+
+    // Attachment extent
+    const auto attachmentSize = mContext->currentRenderTarget->getAttachmentSize();
+    const float aleft = 0.0f, atop = 0.0f;
+    const float aright = static_cast<float>(attachmentSize.x);
+    const float abottom = static_cast<float>(attachmentSize.y);
+
+    const auto left   = std::fmax(sleft, aleft);
+    const auto right  = std::fmin(sright, aright);
+    const auto top    = std::fmax(stop, atop);
+    const auto bottom = std::fmin(sbottom, abottom);
+
+    MTLScissorRect scissorRect = {
+            .x      = static_cast<NSUInteger>(left),
+            .y      = static_cast<NSUInteger>(top),
+            .width  = static_cast<NSUInteger>(right  - left),
+            .height = static_cast<NSUInteger>(bottom - top)
+    };
+
+    [mContext->currentRenderPassEncoder setScissorRect:scissorRect];
+}
+
 void MetalDriver::beginTimerQuery(Handle<HwTimerQuery> tqh) {
    ASSERT_PRECONDITION(!isInRenderPass(mContext),
            "beginTimerQuery must be called outside of a render pass.");
--- a/filament/backend/src/metal/MetalHandles.h
+++ b/filament/backend/src/metal/MetalHandles.h
@@ -145,28 +145,9 @@ private:
    MetalBuffer buffer;
 };

-struct MetalVertexBuffer : public HwVertexBuffer {
-    MetalVertexBuffer(MetalContext& context, uint8_t bufferCount, uint8_t attributeCount,
-            uint32_t vertexCount, AttributeArray const& attributes);
-
-    utils::FixedCapacityVector<MetalBuffer*> buffers;
-};
-
-struct MetalIndexBuffer : public HwIndexBuffer {
-    MetalIndexBuffer(MetalContext& context, BufferUsage usage, uint8_t elementSize,
-            uint32_t indexCount);
-
-    MetalBuffer buffer;
-};
-
-struct MetalRenderPrimitive : public HwRenderPrimitive {
-    MetalRenderPrimitive();
-    void setBuffers(MetalVertexBuffer* vertexBuffer, MetalIndexBuffer* indexBuffer);
-    // The pointers to MetalVertexBuffer and MetalIndexBuffer are "weak".
-    // The MetalVertexBuffer and MetalIndexBuffer must outlive the MetalRenderPrimitive.
-
-    MetalVertexBuffer* vertexBuffer = nullptr;
-    MetalIndexBuffer* indexBuffer = nullptr;
+struct MetalVertexBufferInfo : public HwVertexBufferInfo {
+    MetalVertexBufferInfo(MetalContext& context,
+            uint8_t bufferCount, uint8_t attributeCount, AttributeArray const& attributes);

    // This struct is used to create the pipeline description to describe vertex assembly.
    VertexDescription vertexDescription = {};
@@ -185,6 +166,32 @@ struct MetalRenderPrimitive : public HwRenderPrimitive {
    utils::FixedCapacityVector<Entry> bufferMapping;
 };

+struct MetalVertexBuffer : public HwVertexBuffer {
+    MetalVertexBuffer(MetalContext& context,
+            uint32_t vertexCount, uint32_t bufferCount, Handle<HwVertexBufferInfo> vbih);
+
+    Handle<HwVertexBufferInfo> vbih;
+    utils::FixedCapacityVector<MetalBuffer*> buffers;
+};
+
+struct MetalIndexBuffer : public HwIndexBuffer {
+    MetalIndexBuffer(MetalContext& context, BufferUsage usage, uint8_t elementSize,
+            uint32_t indexCount);
+
+    MetalBuffer buffer;
+};
+
+struct MetalRenderPrimitive : public HwRenderPrimitive {
+    MetalRenderPrimitive();
+    void setBuffers(MetalVertexBufferInfo const* const vbi,
+            MetalVertexBuffer* vertexBuffer, MetalIndexBuffer* indexBuffer);
+    // The pointers to MetalVertexBuffer and MetalIndexBuffer are "weak".
+    // The MetalVertexBuffer and MetalIndexBuffer must outlive the MetalRenderPrimitive.
+
+    MetalVertexBuffer* vertexBuffer = nullptr;
+    MetalIndexBuffer* indexBuffer = nullptr;
+};
+
 class MetalProgram : public HwProgram {
 public:
    MetalProgram(MetalContext& context, Program&& program) noexcept;
--- a/filament/backend/src/metal/MetalHandles.mm
+++ b/filament/backend/src/metal/MetalHandles.mm
@@ -19,6 +19,7 @@
 #include "MetalBlitter.h"
 #include "MetalEnums.h"
 #include "MetalUtils.h"
+#include "MetalBufferPool.h"

 #include <filament/SwapChain.h>

@@ -245,30 +246,53 @@ void MetalSwapChain::present() {
    }
 }

-struct PresentDrawableData {
-    void* drawable = nullptr;
-    MetalDriver* driver = nullptr;
+#ifndef FILAMENT_RELEASE_PRESENT_DRAWABLE_MAIN_THREAD
+#define FILAMENT_RELEASE_PRESENT_DRAWABLE_MAIN_THREAD 1
+#endif
+
+class PresentDrawableData {
+public:
+    PresentDrawableData() = delete;
+    PresentDrawableData(const PresentDrawableData&) = delete;
+    PresentDrawableData& operator=(const PresentDrawableData&) = delete;
+
+    static PresentDrawableData* create(id<CAMetalDrawable> drawable, MetalDriver* driver) {
+        assert_invariant(driver);
+        return new PresentDrawableData(drawable, driver);
+    }
+
+    static void maybePresentAndDestroyAsync(PresentDrawableData* that, bool shouldPresent) {
+        if (shouldPresent) {
+           [that->mDrawable present];
+        }
+
+#if FILAMENT_RELEASE_PRESENT_DRAWABLE_MAIN_THREAD == 1
+        // mDrawable is acquired on the driver thread. Typically, we would release this object on
+        // the same thread, but after receiving consistent crash reports from within
+        // [CAMetalDrawable dealloc], we suspect this object requires releasing on the main thread.
+        dispatch_async(dispatch_get_main_queue(), ^{ cleanupAndDestroy(that); });
+#else
+        that->mDriver->runAtNextTick([that]() { cleanupAndDestroy(that); });
+#endif
+    }
+
+private:
+    PresentDrawableData(id<CAMetalDrawable> drawable, MetalDriver* driver)
+        : mDrawable(drawable), mDriver(driver) {}
+
+    static void cleanupAndDestroy(PresentDrawableData *that) {
+        that->mDrawable = nil;
+        that->mDriver = nullptr;
+        delete that;
+    }
+
+    id<CAMetalDrawable> mDrawable;
+    MetalDriver* mDriver = nullptr;
 };

 void presentDrawable(bool presentFrame, void* user) {
    auto* presentDrawableData = static_cast<PresentDrawableData*>(user);
-
-    // CFBridgingRelease here is used to balance the CFBridgingRetain inside acquireDrawable.
-    id<CAMetalDrawable> drawable =
-            (id<CAMetalDrawable>)CFBridgingRelease(presentDrawableData->drawable);
-    if (presentFrame) {
-        [drawable present];
-    }
-
-    // Schedule the drawable destruction on the driver thread.
-    void* voidDrawable = (void*) CFBridgingRetain(drawable);
-    MetalDriver* driver = presentDrawableData->driver;
-    driver->runAtNextTick([voidDrawable]() {
-        // The drawable is released here.
-        CFBridgingRelease(voidDrawable);
-    });
-
-    delete presentDrawableData;
+    PresentDrawableData::maybePresentAndDestroyAsync(presentDrawableData, presentFrame);
 }

 void MetalSwapChain::scheduleFrameScheduledCallback() {
@@ -277,21 +301,16 @@ void MetalSwapChain::scheduleFrameScheduledCallback() {
    }

    assert_invariant(drawable);
-    FrameScheduledCallback callback = frameScheduledCallback;
-    // This block strongly captures drawable to keep it alive until the handler executes.
-    // We cannot simply reference this->drawable inside the block because the block would then only
-    // capture the _this_ pointer (MetalSwapChain*) instead of the drawable.
-    id<CAMetalDrawable> d = drawable;
+
+    // Destroy this by calling maybePresentAndDestroyAsync() later.
+    auto* presentData = PresentDrawableData::create(drawable, context.driver);
+
+    FrameScheduledCallback userCallback = frameScheduledCallback;
    void* userData = frameScheduledUserData;
-    MetalDriver* driver = context.driver;
+
    [getPendingCommandBuffer(&context) addScheduledHandler:^(id<MTLCommandBuffer> cb) {
-        // CFBridgingRetain is used here to give the drawable a +1 retain count before
-        // casting it to a void*.
-        auto* presentDrawableData = new PresentDrawableData;
-        presentDrawableData->drawable = (void*) CFBridgingRetain(d);
-        presentDrawableData->driver = driver;
-        PresentCallable callable(presentDrawable, (void*) presentDrawableData);
-        callback(callable, userData);
+        PresentCallable callable(presentDrawable, static_cast<void*>(presentData));
+        userCallback(callable, userData);
    }];
 }

@@ -322,23 +341,12 @@ void MetalBufferObject::updateBufferUnsynchronized(void* data, size_t size, uint
    buffer.copyIntoBufferUnsynchronized(data, size, byteOffset);
 }

-MetalVertexBuffer::MetalVertexBuffer(MetalContext& context, uint8_t bufferCount,
-            uint8_t attributeCount, uint32_t vertexCount, AttributeArray const& attributes)
-    : HwVertexBuffer(bufferCount, attributeCount, vertexCount, attributes), buffers(bufferCount, nullptr) {}
+MetalVertexBufferInfo::MetalVertexBufferInfo(MetalContext& context, uint8_t bufferCount,
+        uint8_t attributeCount, AttributeArray const& attributes)
+        : HwVertexBufferInfo(bufferCount, attributeCount),
+          bufferMapping(utils::FixedCapacityVector<Entry>::with_capacity(MAX_VERTEX_BUFFER_COUNT)) {

-MetalIndexBuffer::MetalIndexBuffer(MetalContext& context, BufferUsage usage, uint8_t elementSize,
-        uint32_t indexCount) : HwIndexBuffer(elementSize, indexCount),
-        buffer(context, BufferObjectBinding::VERTEX, usage, elementSize * indexCount, true) { }
-
-MetalRenderPrimitive::MetalRenderPrimitive()
-    : bufferMapping(utils::FixedCapacityVector<Entry>::with_capacity(MAX_VERTEX_BUFFER_COUNT)) {}
-
-void MetalRenderPrimitive::setBuffers(MetalVertexBuffer* vertexBuffer, MetalIndexBuffer*
-        indexBuffer) {
-    this->vertexBuffer = vertexBuffer;
-    this->indexBuffer = indexBuffer;
-
-    const size_t attributeCount = vertexBuffer->attributes.size();
+    const size_t maxAttributeCount = attributes.size();

    auto& mapping = bufferMapping;
    mapping.clear();
@@ -378,8 +386,8 @@ void MetalRenderPrimitive::setBuffers(MetalVertexBuffer* vertexBuffer, MetalInde
        }
    };

-    for (uint32_t attributeIndex = 0; attributeIndex < attributeCount; attributeIndex++) {
-        const auto& attribute = vertexBuffer->attributes[attributeIndex];
+    for (uint32_t attributeIndex = 0; attributeIndex < maxAttributeCount; attributeIndex++) {
+        const auto& attribute = attributes[attributeIndex];

        // If the attribute is unused, bind it to the zero buffer. It's a Metal error for a shader
        // to read from missing vertex attributes.
@@ -406,6 +414,24 @@ void MetalRenderPrimitive::setBuffers(MetalVertexBuffer* vertexBuffer, MetalInde
    }
 }

+MetalVertexBuffer::MetalVertexBuffer(MetalContext& context,
+        uint32_t vertexCount, uint32_t bufferCount, Handle<HwVertexBufferInfo> vbih)
+    : HwVertexBuffer(vertexCount), vbih(vbih), buffers(bufferCount, nullptr) {
+}
+
+MetalIndexBuffer::MetalIndexBuffer(MetalContext& context, BufferUsage usage, uint8_t elementSize,
+        uint32_t indexCount) : HwIndexBuffer(elementSize, indexCount),
+        buffer(context, BufferObjectBinding::VERTEX, usage, elementSize * indexCount, true) { }
+
+MetalRenderPrimitive::MetalRenderPrimitive() {
+}
+
+void MetalRenderPrimitive::setBuffers(MetalVertexBufferInfo const* const vbi,
+        MetalVertexBuffer* vertexBuffer, MetalIndexBuffer* indexBuffer) {
+    this->vertexBuffer = vertexBuffer;
+    this->indexBuffer = indexBuffer;
+}
+
 MetalProgram::MetalProgram(MetalContext& context, Program&& program) noexcept
    : HwProgram(program.getName()), mContext(context) {

@@ -745,13 +771,13 @@ void MetalTexture::loadWithCopyBuffer(uint32_t level, uint32_t slice, MTLRegion
        PixelBufferDescriptor const& data, const PixelBufferShape& shape) {
    const size_t stagingBufferSize = shape.totalBytes;
    auto entry = context.bufferPool->acquireBuffer(stagingBufferSize);
-    memcpy(entry->buffer.contents,
+    memcpy(entry->buffer.get().contents,
            static_cast<uint8_t*>(data.buffer) + shape.sourceOffset,
            stagingBufferSize);
    id<MTLCommandBuffer> blitCommandBuffer = getPendingCommandBuffer(&context);
    id<MTLBlitCommandEncoder> blitCommandEncoder = [blitCommandBuffer blitCommandEncoder];
    blitCommandEncoder.label = @"Texture upload buffer blit";
-    [blitCommandEncoder copyFromBuffer:entry->buffer
+    [blitCommandEncoder copyFromBuffer:entry->buffer.get()
                          sourceOffset:0
                     sourceBytesPerRow:shape.bytesPerRow
                   sourceBytesPerImage:shape.bytesPerSlice
--- a/filament/backend/src/metal/MetalShaderCompiler.h
+++ b/filament/backend/src/metal/MetalShaderCompiler.h
@@ -30,6 +30,8 @@

 #include <array>
 #include <memory>
+#include <tuple>
+#include <variant>

 namespace filament::backend {

@@ -39,43 +41,81 @@ class MetalShaderCompiler {
    struct MetalProgramToken;

 public:
+    enum class Mode {
+        SYNCHRONOUS,    // synchronous shader compilation
+        ASYNCHRONOUS    // asynchronous shader compilation
+    };
+
    class MetalFunctionBundle {
    public:
-        MetalFunctionBundle() = default;
-        MetalFunctionBundle(id<MTLFunction> fragment, id<MTLFunction> vertex)
-            : functions{fragment, vertex} {
+        using Raster = std::tuple<id<MTLFunction>, id<MTLFunction>>;
+        using Compute = id<MTLFunction>;
+        using Error = std::tuple<NSString*, NSString*>; // error message, Program name
+        struct None {};
+
+        MetalFunctionBundle() : mPrograms{None{}} {}
+
+        explicit operator bool() const { return isValid(); }
+
+        bool isValid() const noexcept {
+            return std::holds_alternative<Raster>(mPrograms) ||
+                std::holds_alternative<Compute>(mPrograms);
+        }
+
+        /**
+         * Throws an NSException if this MetalFunctionBundle either contains an error or is empty.
+         *
+         * If this MetalFunctionBundle contains an error, will throw a MetalCompilationFailure
+         * NSException with the error string and program name passed to
+         * MetalFunctionBundle::error(NSString*, NSString*).
+         *
+         * If this MetalFunctionBundle is empty, will throw a MetalEmptyFunctionBundle NSException.
+         */
+        void validate() const;
+
+        Raster getRasterFunctions() const {
+            assert_invariant(std::holds_alternative<Raster>(mPrograms));
+            return std::get<Raster>(mPrograms);
+        }
+
+        Compute getComputeFunction() const {
+            assert_invariant(std::holds_alternative<Compute>(mPrograms));
+            return std::get<Compute>(mPrograms);
+        }
+
+        static MetalFunctionBundle none() {
+            return MetalFunctionBundle(None{});
+        }
+
+        static MetalFunctionBundle raster(id<MTLFunction> fragment, id<MTLFunction> vertex) {
            assert_invariant(fragment && vertex);
            assert_invariant(fragment.functionType == MTLFunctionTypeFragment);
            assert_invariant(vertex.functionType == MTLFunctionTypeVertex);
+            return MetalFunctionBundle(Raster{fragment, vertex});
        }
-        explicit MetalFunctionBundle(id<MTLFunction> compute) : functions{compute, nil} {
+
+        static MetalFunctionBundle compute(id<MTLFunction> compute) {
            assert_invariant(compute);
            assert_invariant(compute.functionType == MTLFunctionTypeKernel);
+            return MetalFunctionBundle(Compute{compute});
        }

-        std::pair<id<MTLFunction>, id<MTLFunction>> getRasterFunctions() const noexcept {
-            assert_invariant(functions[0].functionType == MTLFunctionTypeFragment);
-            assert_invariant(functions[1].functionType == MTLFunctionTypeVertex);
-            return {functions[0], functions[1]};
+        static MetalFunctionBundle error(NSString* errorMessage, NSString* programName) {
+            return MetalFunctionBundle(Error{errorMessage, programName});
        }

-        id<MTLFunction> getComputeFunction() const noexcept {
-            assert_invariant(functions[0].functionType == MTLFunctionTypeKernel);
-            return functions[0];
-        }
-
-        explicit operator bool() const { return functions[0] != nil; }
-
    private:
-        // Can hold two functions, either:
-        // - fragment and vertex (for rasterization pipelines)
-        // - compute (for compute pipelines)
-        id<MTLFunction> functions[2] = {nil, nil};
+        MetalFunctionBundle(None&& t) : mPrograms(std::move(t)) {}
+        MetalFunctionBundle(Raster&& t) : mPrograms(std::move(t)) {}
+        MetalFunctionBundle(Compute&& t) : mPrograms(std::move(t)) {}
+        MetalFunctionBundle(Error&& t) : mPrograms(std::move(t)) {}
+
+        std::variant<Raster, Compute, None, Error> mPrograms;
    };

    using program_token_t = std::shared_ptr<MetalProgramToken>;

-    explicit MetalShaderCompiler(id<MTLDevice> device, MetalDriver& driver);
+    explicit MetalShaderCompiler(id<MTLDevice> device, MetalDriver& driver, Mode mode);

    MetalShaderCompiler(MetalShaderCompiler const& rhs) = delete;
    MetalShaderCompiler(MetalShaderCompiler&& rhs) = delete;
@@ -85,15 +125,15 @@ public:
    void init() noexcept;
    void terminate() noexcept;

-    // Creates a program asynchronously
+    bool isParallelShaderCompileSupported() const noexcept;
+
+    // Creates a program, either synchronously or asynchronously, depending on the Mode
+    // MetalShaderCompiler was constructed with.
    program_token_t createProgram(utils::CString const& name, Program&& program);

    // Returns the functions, blocking if necessary. The Token is destroyed and becomes invalid.
    MetalFunctionBundle getProgram(program_token_t& token);

-    // Destroys a valid token and all associated resources. Used to "cancel" a program compilation.
-    static void terminate(program_token_t& token);
-
    void notifyWhenAllProgramsAreReady(
            CallbackHandler* handler, CallbackHandler::Callback callback, void* user);

@@ -103,6 +143,7 @@ private:
    CompilerThreadPool mCompilerThreadPool;
    id<MTLDevice> mDevice;
    CallbackManager mCallbackManager;
+    Mode mMode;
 };

 } // namespace filament::backend
--- a/filament/backend/src/metal/MetalShaderCompiler.mm
+++ b/filament/backend/src/metal/MetalShaderCompiler.mm
@@ -70,22 +70,31 @@ struct MetalShaderCompiler::MetalProgramToken : ProgramToken {

 MetalShaderCompiler::MetalProgramToken::~MetalProgramToken() = default;

-MetalShaderCompiler::MetalShaderCompiler(id<MTLDevice> device, MetalDriver& driver)
+MetalShaderCompiler::MetalShaderCompiler(id<MTLDevice> device, MetalDriver& driver, Mode mode)
        : mDevice(device),
-          mCallbackManager(driver) {
+          mCallbackManager(driver),
+          mMode(mode) {

 }

 void MetalShaderCompiler::init() noexcept {
-    const uint32_t poolSize = 2;
-    mCompilerThreadPool.init(poolSize, []() {}, []() {});
+    const uint32_t poolSize = 1;
+    if (mMode == Mode::ASYNCHRONOUS) {
+        mCompilerThreadPool.init(poolSize, []() {}, []() {});
+    }
 }

 void MetalShaderCompiler::terminate() noexcept {
-    mCompilerThreadPool.terminate();
+    if (mMode == Mode::ASYNCHRONOUS) {
+        mCompilerThreadPool.terminate();
+    }
    mCallbackManager.terminate();
 }

+bool MetalShaderCompiler::isParallelShaderCompileSupported() const noexcept {
+    return mMode == Mode::ASYNCHRONOUS;
+}
+
 /* static */ MetalShaderCompiler::MetalFunctionBundle MetalShaderCompiler::compileProgram(
        const Program& program, id<MTLDevice> device) {
    std::array<id<MTLFunction>, Program::SHADER_TYPE_COUNT> functions = { nil };
@@ -118,13 +127,16 @@ void MetalShaderCompiler::terminate() noexcept {
                                                      options:options
                                                        error:&error];
        if (library == nil) {
+            NSString* errorMessage = @"unknown error";
            if (error) {
                auto description =
                        [error.localizedDescription cStringUsingEncoding:NSUTF8StringEncoding];
                utils::slog.w << description << utils::io::endl;
+                errorMessage = error.localizedDescription;
            }
            PANIC_LOG("Failed to compile Metal program.");
-            return {};
+            NSString* programName = [NSString stringWithFormat:@"%s", program.getName().c_str_safe()];
+            return MetalFunctionBundle::error(errorMessage, programName);
        }

        MTLFunctionConstantValues* constants = [MTLFunctionConstantValues new];
@@ -160,14 +172,15 @@ void MetalShaderCompiler::terminate() noexcept {
    assert_invariant(isRasterizationProgram != isComputeProgram);

    if (isRasterizationProgram) {
-        return {fragmentFunction, vertexFunction};
+        return MetalFunctionBundle::raster(fragmentFunction, vertexFunction);
    }

    if (isComputeProgram) {
-        return MetalFunctionBundle{computeFunction};
+        return MetalFunctionBundle::compute(computeFunction);
    }

-    return {};
+    // Should never reach here.
+    return MetalFunctionBundle::none();
 }

 MetalShaderCompiler::program_token_t MetalShaderCompiler::createProgram(
@@ -176,14 +189,26 @@ MetalShaderCompiler::program_token_t MetalShaderCompiler::createProgram(

    token->handle = mCallbackManager.get();

-    CompilerPriorityQueue const priorityQueue = program.getPriorityQueue();
-    mCompilerThreadPool.queue(priorityQueue, token,
-            [this, name, device = mDevice, program = std::move(program), token]() {
-                MetalFunctionBundle compiledProgram = compileProgram(program, device);
+    switch (mMode) {
+        case Mode::ASYNCHRONOUS: {
+            CompilerPriorityQueue const priorityQueue = program.getPriorityQueue();
+            mCompilerThreadPool.queue(priorityQueue, token,
+                    [this, name, device = mDevice, program = std::move(program), token]() {
+                        MetalFunctionBundle compiledProgram = compileProgram(program, device);
+                        token->set(compiledProgram);
+                        mCallbackManager.put(token->handle);
+                    });

-                token->set(compiledProgram);
-                mCallbackManager.put(token->handle);
-            });
+            break;
+        }
+
+        case Mode::SYNCHRONOUS: {
+            MetalFunctionBundle compiledProgram = compileProgram(program, mDevice);
+            token->set(compiledProgram);
+            mCallbackManager.put(token->handle);
+            break;
+        }
+    }

    return token;
 }
@@ -191,38 +216,46 @@ MetalShaderCompiler::program_token_t MetalShaderCompiler::createProgram(
 MetalShaderCompiler::MetalFunctionBundle MetalShaderCompiler::getProgram(program_token_t& token) {
    assert_invariant(token);

-    if (!token->isReady()) {
-        auto job = mCompilerThreadPool.dequeue(token);
-        if (job) {
-            job();
+    if (mMode == Mode::ASYNCHRONOUS) {
+        if (!token->isReady()) {
+            auto job = mCompilerThreadPool.dequeue(token);
+            if (job) {
+                job();
+            }
        }
    }

+    // The job isn't guaranteed to have finished yet. We may have failed to dequeue it above,
+    // which means it's currently running. In that case get() will block until it finishes.
+
    MetalShaderCompiler::MetalFunctionBundle program = token->get();
-
    token = nullptr;
-
    return program;
 }

-/* static */ void MetalShaderCompiler::terminate(program_token_t& token) {
-    assert_invariant(token);
-
-    auto job = token->compiler.mCompilerThreadPool.dequeue(token);
-    if (!job) {
-        // The job is being executed right now (or has already executed).
-        token->wait();
-    } else {
-        // The job has not executed yet.
-        token->compiler.mCallbackManager.put(token->handle);
-    }
-
-    token.reset();
-}
-
 void MetalShaderCompiler::notifyWhenAllProgramsAreReady(
        CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
    mCallbackManager.setCallback(handler, callback, user);
 }

+UTILS_NOINLINE
+void MetalShaderCompiler::MetalFunctionBundle::validate() const {
+    if (UTILS_UNLIKELY(std::holds_alternative<Error>(mPrograms))) {
+        auto [errorMessage, programName] = std::get<Error>(mPrograms);
+        NSString* reason =
+                [NSString stringWithFormat:
+                        @"Attempting to draw with an id<MTLFunction> that failed to compile.\n"
+                        @"Program: %@\n"
+                        @"%@", programName, errorMessage];
+        [[NSException exceptionWithName:@"MetalCompilationFailure"
+                                reason:reason
+                              userInfo:nil] raise];
+    } else if (UTILS_UNLIKELY(std::holds_alternative<None>(mPrograms))) {
+        NSString* reason = @"Attempting to draw with an empty id<MTLFunction>.";
+        [[NSException exceptionWithName:@"MetalEmptyFunctionBundle"
+                                reason:reason
+                              userInfo:nil] raise];
+    }
+}
+
 } // namespace filament::backend
--- a/filament/backend/src/noop/NoopDriver.cpp
+++ b/filament/backend/src/noop/NoopDriver.cpp
@@ -77,6 +77,9 @@ void NoopDriver::finish(int) {
 void NoopDriver::destroyRenderPrimitive(Handle<HwRenderPrimitive> rph) {
 }

+void NoopDriver::destroyVertexBufferInfo(Handle<HwVertexBufferInfo> vbih) {
+}
+
 void NoopDriver::destroyVertexBuffer(Handle<HwVertexBuffer> vbh) {
 }

@@ -174,7 +177,11 @@ bool NoopDriver::isSRGBSwapChainSupported() {
    return false;
 }

-bool NoopDriver::isStereoSupported() {
+bool NoopDriver::isProtectedContentSupported() {
+    return false;
+}
+
+bool NoopDriver::isStereoSupported(backend::StereoscopicType) {
    return false;
 }

@@ -186,6 +193,10 @@ bool NoopDriver::isDepthStencilResolveSupported() {
    return true;
 }

+bool NoopDriver::isProtectedTexturesSupported() {
+    return true;
+}
+
 bool NoopDriver::isWorkaroundNeeded(Workaround) {
    return false;
 }
@@ -241,8 +252,8 @@ void NoopDriver::update3DImage(Handle<HwTexture> th,
 void NoopDriver::setupExternalImage(void* image) {
 }

-bool NoopDriver::getTimerQueryValue(Handle<HwTimerQuery> tqh, uint64_t* elapsedTime) {
-    return false;
+TimerQueryResult NoopDriver::getTimerQueryValue(Handle<HwTimerQuery> tqh, uint64_t* elapsedTime) {
+    return TimerQueryResult::ERROR;
 }

 void NoopDriver::setExternalImage(Handle<HwTexture> th, void* image) {
@@ -339,13 +350,26 @@ void NoopDriver::blit(
        math::uint2 size) {
 }

+void NoopDriver::bindPipeline(PipelineState pipelineState) {
+}
+
+void NoopDriver::bindRenderPrimitive(Handle<HwRenderPrimitive> rph) {
+}
+
+void NoopDriver::draw2(uint32_t indexOffset, uint32_t indexCount, uint32_t instanceCount) {
+}
+
 void NoopDriver::draw(PipelineState pipelineState, Handle<HwRenderPrimitive> rph,
-        uint32_t instanceCount) {
+        uint32_t indexOffset, uint32_t indexCount, uint32_t instanceCount) {
 }

 void NoopDriver::dispatchCompute(Handle<HwProgram> program, math::uint3 workGroupCount) {
 }

+void NoopDriver::scissor(
+        Viewport scissor) {
+}
+
 void NoopDriver::beginTimerQuery(Handle<HwTimerQuery> tqh) {
 }

--- a/filament/backend/src/opengl/GLBufferObject.h
+++ b/filament/backend/src/opengl/GLBufferObject.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_OPENGL_GLBUFFEROBJECT_H
+#define TNT_FILAMENT_BACKEND_OPENGL_GLBUFFEROBJECT_H
+
+#include "DriverBase.h"
+
+#include "gl_headers.h"
+
+#include <backend/DriverEnums.h>
+
+#include <stdint.h>
+
+namespace filament::backend {
+
+struct GLBufferObject : public HwBufferObject {
+    using HwBufferObject::HwBufferObject;
+    GLBufferObject(uint32_t size,
+            BufferObjectBinding bindingType, BufferUsage usage) noexcept
+            : HwBufferObject(size), usage(usage), bindingType(bindingType) {
+    }
+
+    struct {
+        GLuint id;
+        union {
+            GLenum binding;
+            void* buffer;
+        };
+    } gl;
+    BufferUsage usage;
+    BufferObjectBinding bindingType;
+    uint16_t age = 0;
+};
+
+} // namespace filament::backend
+
+#endif //TNT_FILAMENT_BACKEND_OPENGL_GLBUFFEROBJECT_H
--- a/filament/backend/src/opengl/GLTexture.h
+++ b/filament/backend/src/opengl/GLTexture.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_OPENGL_GLTEXTURE_H
+#define TNT_FILAMENT_BACKEND_OPENGL_GLTEXTURE_H
+
+#include "DriverBase.h"
+
+#include "gl_headers.h"
+
+#include <backend/platforms/OpenGLPlatform.h>
+
+#include <stdint.h>
+
+namespace filament::backend {
+
+struct GLTexture : public HwTexture {
+    using HwTexture::HwTexture;
+    struct GL {
+        GL() noexcept : imported(false), sidecarSamples(1), reserved(0) {}
+        GLuint id = 0;          // texture or renderbuffer id
+        GLenum target = 0;
+        GLenum internalFormat = 0;
+        GLuint sidecarRenderBufferMS = 0;  // multi-sample sidecar renderbuffer
+
+        // texture parameters go here too
+        GLfloat anisotropy = 1.0;
+        int8_t baseLevel = 127;
+        int8_t maxLevel = -1;
+        uint8_t targetIndex = 0;    // optimization: index corresponding to target
+        bool imported           : 1;
+        uint8_t sidecarSamples  : 4;
+        uint8_t reserved        : 3;
+    } gl;
+
+    OpenGLPlatform::ExternalTexture* externalTexture = nullptr;
+};
+
+
+} // namespace filament::backend
+
+#endif //TNT_FILAMENT_BACKEND_OPENGL_GLTEXTURE_H
--- a/filament/backend/src/opengl/OpenGLContext.cpp
+++ b/filament/backend/src/opengl/OpenGLContext.cpp
@@ -16,10 +16,25 @@

 #include "OpenGLContext.h"

-#include <backend/platforms/OpenGLPlatform.h>
+#include "GLUtils.h"
+#include "OpenGLTimerQuery.h"

+#include <backend/platforms/OpenGLPlatform.h>
+#include <backend/DriverEnums.h>
+
+#include <utils/compiler.h>
+#include <utils/debug.h>
+#include <utils/Log.h>
+#include <utils/ostream.h>
+
+#include <functional>
+#include <string_view>
 #include <utility>

+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
 // change to true to display all GL extensions in the console on start-up
 #define DEBUG_PRINT_EXTENSIONS false

@@ -48,7 +63,9 @@ bool OpenGLContext::queryOpenGLVersion(GLint* major, GLint* minor) noexcept {
 #endif
 }

-OpenGLContext::OpenGLContext() noexcept {
+OpenGLContext::OpenGLContext(OpenGLPlatform& platform) noexcept
+        : mPlatform(platform),
+          mSamplerMap(32) {

    state.vao.p = &mDefaultVAO;

@@ -231,6 +248,58 @@ OpenGLContext::OpenGLContext() noexcept {
        glDebugMessageCallback(cb, nullptr);
    }
 #endif
+
+    mTimerQueryFactory = TimerQueryFactory::init(platform, *this);
+}
+
+OpenGLContext::~OpenGLContext() noexcept {
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
+    if (!isES2()) {
+        for (auto& item: mSamplerMap) {
+            unbindSampler(item.second);
+            glDeleteSamplers(1, &item.second);
+        }
+        mSamplerMap.clear();
+    }
+#endif
+    delete mTimerQueryFactory;
+}
+
+void OpenGLContext::destroyWithContext(
+        size_t index, std::function<void(OpenGLContext&)> const& closure) noexcept {
+    if (index == 0) {
+        // Note: we only need to delay the destruction of objects on the unprotected context
+        // (index 0) because the protected context is always immediately destroyed and all its
+        // active objects and bindings are then automatically destroyed.
+        // TODO: this is only guaranteed for EGLPlatform, but that's the only one we care about.
+        mDestroyWithNormalContext.push_back(closure);
+    }
+}
+
+void OpenGLContext::unbindEverything() noexcept {
+    // TODO:  we're supposed to unbind everything here so that resources don't get
+    //        stuck in this context (contextIndex) when destroyed in the other context.
+    //        However, because EGLPlatform always immediately destroys the protected context (1),
+    //        the bindings will automatically be severed when we switch back to the default context.
+    //        Since bindings now only exist in one context, we don't have a ref-counting issue to
+    //        worry about.
+}
+
+void OpenGLContext::synchronizeStateAndCache(size_t index) noexcept {
+
+    // if we're just switching back to context 0, run all the pending destructors
+    if (index == 0) {
+        auto list = std::move(mDestroyWithNormalContext);
+        for (auto&& fn: list) {
+            fn(*this);
+        }
+    }
+
+    // the default FBO could be invalid
+    mDefaultFbo[index].reset();
+
+    contextIndex = index;
+    resetState();
 }

 void OpenGLContext::setDefaultState() noexcept {
@@ -599,6 +668,7 @@ void OpenGLContext::initExtensionsGLES(Extensions* ext, GLint major, GLint minor
    ext->EXT_disjoint_timer_query = exts.has("GL_EXT_disjoint_timer_query"sv);
    ext->EXT_multisampled_render_to_texture = exts.has("GL_EXT_multisampled_render_to_texture"sv);
    ext->EXT_multisampled_render_to_texture2 = exts.has("GL_EXT_multisampled_render_to_texture2"sv);
+    ext->EXT_protected_textures = exts.has("GL_EXT_protected_textures"sv);
 #endif
    ext->EXT_shader_framebuffer_fetch = exts.has("GL_EXT_shader_framebuffer_fetch"sv);
 #ifndef __EMSCRIPTEN__
@@ -622,6 +692,7 @@ void OpenGLContext::initExtensionsGLES(Extensions* ext, GLint major, GLint minor
    ext->OES_standard_derivatives = exts.has("GL_OES_standard_derivatives"sv);
    ext->OES_texture_npot = exts.has("GL_OES_texture_npot"sv);
    ext->OES_vertex_array_object = exts.has("GL_OES_vertex_array_object"sv);
+    ext->OVR_multiview2 = exts.has("GL_OVR_multiview2"sv);
    ext->WEBGL_compressed_texture_etc = exts.has("WEBGL_compressed_texture_etc"sv);
    ext->WEBGL_compressed_texture_s3tc = exts.has("WEBGL_compressed_texture_s3tc"sv);
    ext->WEBGL_compressed_texture_s3tc_srgb = exts.has("WEBGL_compressed_texture_s3tc_srgb"sv);
@@ -686,6 +757,7 @@ void OpenGLContext::initExtensionsGL(Extensions* ext, GLint major, GLint minor)
    ext->OES_standard_derivatives = true;
    ext->OES_texture_npot = true;
    ext->OES_vertex_array_object = true;
+    ext->OVR_multiview2 = exts.has("GL_OVR_multiview2"sv);
    ext->WEBGL_compressed_texture_etc = false;
    ext->WEBGL_compressed_texture_s3tc = false;
    ext->WEBGL_compressed_texture_s3tc_srgb = false;
@@ -707,6 +779,51 @@ void OpenGLContext::initExtensionsGL(Extensions* ext, GLint major, GLint minor)

 #endif // BACKEND_OPENGL_VERSION_GL

+
+GLuint OpenGLContext::bindFramebuffer(GLenum target, GLuint buffer) noexcept {
+    if (UTILS_UNLIKELY(buffer == 0)) {
+        // we're binding the default frame buffer, resolve its actual name
+        auto& defaultFboForThisContext = mDefaultFbo[contextIndex];
+        if (UTILS_UNLIKELY(!defaultFboForThisContext.has_value())) {
+            defaultFboForThisContext = GLuint(mPlatform.getDefaultFramebufferObject());
+        }
+        buffer = defaultFboForThisContext.value();
+    }
+    bindFramebufferResolved(target, buffer);
+    return buffer;
+}
+
+void OpenGLContext::unbindFramebuffer(GLenum target) noexcept {
+    bindFramebufferResolved(target, 0);
+}
+
+void OpenGLContext::bindFramebufferResolved(GLenum target, GLuint buffer) noexcept {
+    switch (target) {
+        case GL_FRAMEBUFFER:
+            if (state.draw_fbo != buffer || state.read_fbo != buffer) {
+                state.draw_fbo = state.read_fbo = buffer;
+                glBindFramebuffer(target, buffer);
+            }
+            break;
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
+        case GL_DRAW_FRAMEBUFFER:
+            if (state.draw_fbo != buffer) {
+                state.draw_fbo = buffer;
+                glBindFramebuffer(target, buffer);
+            }
+            break;
+        case GL_READ_FRAMEBUFFER:
+            if (state.read_fbo != buffer) {
+                state.read_fbo = buffer;
+                glBindFramebuffer(target, buffer);
+            }
+            break;
+#endif
+        default:
+            break;
+    }
+}
+
 void OpenGLContext::bindBuffer(GLenum target, GLuint buffer) noexcept {
    if (target == GL_ELEMENT_ARRAY_BUFFER) {
        constexpr size_t targetIndex = getIndexForBufferTarget(GL_ELEMENT_ARRAY_BUFFER);
@@ -817,19 +934,53 @@ void OpenGLContext::deleteBuffers(GLsizei n, const GLuint* buffers, GLenum targe
 #endif
 }

-void OpenGLContext::deleteVertexArrays(GLsizei n, const GLuint* arrays) noexcept {
-    procs.deleteVertexArrays(n, arrays);
-    // if one of the destroyed VAO is bound, clear the binding.
-    for (GLsizei i = 0; i < n; ++i) {
-        if (state.vao.p->vao == arrays[i]) {
+void OpenGLContext::deleteVertexArray(GLuint vao) noexcept {
+    if (UTILS_LIKELY(vao)) {
+        procs.deleteVertexArrays(1, &vao);
+        // if the destroyed VAO is bound, clear the binding.
+        if (state.vao.p->vao[contextIndex] == vao) {
            bindVertexArray(nullptr);
-            break;
        }
    }
 }

+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
+GLuint OpenGLContext::getSamplerSlow(SamplerParams params) const noexcept {
+    assert_invariant(mSamplerMap.find(params) == mSamplerMap.end());
+
+    using namespace GLUtils;
+
+    GLuint s;
+    glGenSamplers(1, &s);
+    glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER,   (GLint)getTextureFilter(params.filterMin));
+    glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER,   (GLint)getTextureFilter(params.filterMag));
+    glSamplerParameteri(s, GL_TEXTURE_WRAP_S,       (GLint)getWrapMode(params.wrapS));
+    glSamplerParameteri(s, GL_TEXTURE_WRAP_T,       (GLint)getWrapMode(params.wrapT));
+    glSamplerParameteri(s, GL_TEXTURE_WRAP_R,       (GLint)getWrapMode(params.wrapR));
+    glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, (GLint)getTextureCompareMode(params.compareMode));
+    glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, (GLint)getTextureCompareFunc(params.compareFunc));
+
+#if defined(GL_EXT_texture_filter_anisotropic)
+    if (ext.EXT_texture_filter_anisotropic &&
+        !bugs.texture_filter_anisotropic_broken_on_sampler) {
+        GLfloat const anisotropy = float(1u << params.anisotropyLog2);
+        glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT,
+                std::min(gets.max_anisotropy, anisotropy));
+    }
+#endif
+    CHECK_GL_ERROR(utils::slog.e)
+    mSamplerMap[params] = s;
+    return s;
+}
+#endif
+
+
 void OpenGLContext::resetState() noexcept {
    // Force GL state to match the Filament state
+
+    // increase the state version so other parts of the state know to reset
+    state.age++;
+
    if (state.major > 2) {
 #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
        glBindFramebuffer(GL_DRAW_FRAMEBUFFER, state.draw_fbo);
@@ -846,11 +997,8 @@ void OpenGLContext::resetState() noexcept {
    glUseProgram(state.program.use);

    // state.vao
-    if (state.vao.p) {
-        procs.bindVertexArray(state.vao.p->vao);
-    } else {
-        bindVertexArray(nullptr);
-    }
+    state.vao.p = nullptr;
+    bindVertexArray(nullptr);

    // state.raster
    glFrontFace(state.raster.frontFace);
@@ -1006,7 +1154,22 @@ void OpenGLContext::resetState() noexcept {
        state.window.viewport.w
    );
    glDepthRangef(state.window.depthRange.x, state.window.depthRange.y);
-    
+}
+
+void OpenGLContext::createTimerQuery(GLTimerQuery* query) {
+    mTimerQueryFactory->createTimerQuery(query);
+}
+
+void OpenGLContext::destroyTimerQuery(GLTimerQuery* query) {
+    mTimerQueryFactory->destroyTimerQuery(query);
+}
+
+void OpenGLContext::beginTimeElapsedQuery(GLTimerQuery* query) {
+    mTimerQueryFactory->beginTimeElapsedQuery(query);
+}
+
+void OpenGLContext::endTimeElapsedQuery(OpenGLDriver& driver, GLTimerQuery* query) {
+    mTimerQueryFactory->endTimeElapsedQuery(driver, query);
 }

 } // namesapce filament
--- a/filament/backend/src/opengl/OpenGLContext.h
+++ b/filament/backend/src/opengl/OpenGLContext.h
@@ -17,25 +17,39 @@
 #ifndef TNT_FILAMENT_BACKEND_OPENGLCONTEXT_H
 #define TNT_FILAMENT_BACKEND_OPENGLCONTEXT_H

-#include <math/vec4.h>

-#include <utils/CString.h>
-#include <utils/debug.h>
+#include "OpenGLTimerQuery.h"

+#include <backend/platforms/OpenGLPlatform.h>
+
+#include <backend/DriverEnums.h>
 #include <backend/Handle.h>

-#include "GLUtils.h"
+#include "gl_headers.h"
+
+#include <utils/compiler.h>
+#include <utils/bitset.h>
+#include <utils/debug.h>
+
+#include <math/vec2.h>
+#include <math/vec4.h>
+
+#include <tsl/robin_map.h>

 #include <array>
-#include <set>
+#include <functional>
+#include <optional>
 #include <tuple>
-#include <utility>
+#include <vector>
+
+#include <stddef.h>
+#include <stdint.h>

 namespace filament::backend {

 class OpenGLPlatform;

-class OpenGLContext {
+class OpenGLContext final : public TimerQueryFactoryInterface {
 public:
    static constexpr const size_t MAX_TEXTURE_UNIT_COUNT = MAX_SAMPLER_COUNT;
    static constexpr const size_t DUMMY_TEXTURE_BINDING = 7; // highest binding guaranteed to work with ES2
@@ -46,19 +60,29 @@ public:
    struct RenderPrimitive {
        static_assert(MAX_VERTEX_ATTRIBUTE_COUNT <= 16);

-        GLuint vao = 0;                                         // 4
+        GLuint vao[2] = {};                                     // 4
        GLuint elementArray = 0;                                // 4
-        utils::bitset<uint16_t> vertexAttribArray;              // 2
+        mutable utils::bitset<uint16_t> vertexAttribArray;      // 2

-        // If this version number does not match vertexBufferWithObjects->bufferObjectsVersion,
-        // then the VAO needs to be updated.
+        // if this differs from vertexBufferWithObjects->bufferObjectsVersion, this VAO needs to
+        // be updated (see OpenGLDriver::updateVertexArrayObject())
        uint8_t vertexBufferVersion = 0;                        // 1
+
+        // if this differs from OpenGLContext::state.age, this VAO needs to
+        // be updated (see OpenGLDriver::updateVertexArrayObject())
+        uint8_t stateVersion = 0;                               // 1
+
+        // If this differs from OpenGLContext::state.age, this VAO's name needs to be updated.
+        // See OpenGLContext::bindVertexArray()
+        uint8_t nameVersion = 0;                                // 1
+
+        // Size in bytes of indices in the index buffer
        uint8_t indicesSize = 0;                                // 1

        // The optional 32-bit handle to a GLVertexBuffer is necessary only if the referenced
        // VertexBuffer supports buffer objects. If this is zero, then the VBO handles array is
        // immutable.
-        Handle<HwVertexBuffer> vertexBufferWithObjects = {};    // 4
+        Handle<HwVertexBuffer> vertexBufferWithObjects;         // 4

        GLenum getIndicesType() const noexcept {
            return indicesSize == 4 ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT;
@@ -67,7 +91,18 @@ public:

    static bool queryOpenGLVersion(GLint* major, GLint* minor) noexcept;

-    OpenGLContext() noexcept;
+    explicit OpenGLContext(OpenGLPlatform& platform) noexcept;
+    ~OpenGLContext() noexcept final;
+
+    // TimerQueryInterface ------------------------------------------------------------------------
+
+    // note: OpenGLContext being final ensures (clang) these are not called through the vtable
+    void createTimerQuery(GLTimerQuery* query) override;
+    void destroyTimerQuery(GLTimerQuery* query) override;
+    void beginTimeElapsedQuery(GLTimerQuery* query) override;
+    void endTimeElapsedQuery(OpenGLDriver& driver, GLTimerQuery* query) override;
+
+    // --------------------------------------------------------------------------------------------

    template<int MAJOR, int MINOR>
    inline bool isAtLeastGL() const noexcept {
@@ -123,10 +158,11 @@ public:
    inline void bindBufferRange(GLenum target, GLuint index, GLuint buffer,
            GLintptr offset, GLsizeiptr size) noexcept;

-    inline void bindFramebuffer(GLenum target, GLuint buffer) noexcept;
+    GLuint bindFramebuffer(GLenum target, GLuint buffer) noexcept;
+    void unbindFramebuffer(GLenum target) noexcept;

-    inline void enableVertexAttribArray(GLuint index) noexcept;
-    inline void disableVertexAttribArray(GLuint index) noexcept;
+    inline void enableVertexAttribArray(RenderPrimitive const* rp, GLuint index) noexcept;
+    inline void disableVertexAttribArray(RenderPrimitive const* rp, GLuint index) noexcept;
    inline void enable(GLenum cap) noexcept;
    inline void disable(GLenum cap) noexcept;
    inline void frontFace(GLenum mode) noexcept;
@@ -148,7 +184,9 @@ public:
    inline void depthRange(GLclampf near, GLclampf far) noexcept;

    void deleteBuffers(GLsizei n, const GLuint* buffers, GLenum target) noexcept;
-    void deleteVertexArrays(GLsizei n, const GLuint* arrays) noexcept;
+    void deleteVertexArray(GLuint vao) noexcept;
+
+    void destroyWithContext(size_t index, std::function<void(OpenGLContext&)> const& closure) noexcept;

    // glGet*() values
    struct Gets {
@@ -183,6 +221,7 @@ public:
        bool EXT_discard_framebuffer;
        bool EXT_multisampled_render_to_texture2;
        bool EXT_multisampled_render_to_texture;
+        bool EXT_protected_textures;
        bool EXT_shader_framebuffer_fetch;
        bool EXT_texture_compression_bptc;
        bool EXT_texture_compression_etc2;
@@ -205,6 +244,7 @@ public:
        bool OES_standard_derivatives;
        bool OES_texture_npot;
        bool OES_vertex_array_object;
+        bool OVR_multiview2;
        bool WEBGL_compressed_texture_etc;
        bool WEBGL_compressed_texture_s3tc;
        bool WEBGL_compressed_texture_s3tc_srgb;
@@ -290,8 +330,19 @@ public:

    FeatureLevel getFeatureLevel() const noexcept { return mFeatureLevel; }

+    // This is the index of the context in use. Must be 0 or 1. This is used to manange the
+    // OpenGL name of ContainerObjects within each context.
+    uint32_t contextIndex = 0;
+
    // Try to keep the State structure sorted by data-access patterns
    struct State {
+        State() noexcept = default;
+        // make sure we don't copy this state by accident
+        State(State const& rhs) = delete;
+        State(State&& rhs) noexcept = delete;
+        State& operator=(State const& rhs) = delete;
+        State& operator=(State&& rhs) noexcept = delete;
+
        GLint major = 0;
        GLint minor = 0;

@@ -396,6 +447,7 @@ public:
            vec4gli viewport { 0 };
            vec2glf depthRange { 0.0f, 1.0f };
        } window;
+        uint8_t age = 0;
    } state;

    struct Procs {
@@ -415,9 +467,47 @@ public:
        void (* maxShaderCompilerThreadsKHR)(GLuint count);
    } procs{};

+    void unbindEverything() noexcept;
+    void synchronizeStateAndCache(size_t index) noexcept;
+    void setEs2UniformBinding(size_t index, GLuint id, void const* data, uint16_t age) noexcept {
+        mUniformBindings[index] = { id, data, age };
+    }
+    auto getEs2UniformBinding(size_t index) const noexcept {
+        return mUniformBindings[index];
+    }
+
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
+    GLuint getSamplerSlow(SamplerParams sp) const noexcept;
+
+    inline GLuint getSampler(SamplerParams sp) const noexcept {
+        assert_invariant(!sp.padding0);
+        assert_invariant(!sp.padding1);
+        assert_invariant(!sp.padding2);
+        auto& samplerMap = mSamplerMap;
+        auto pos = samplerMap.find(sp);
+        if (UTILS_UNLIKELY(pos == samplerMap.end())) {
+            return getSamplerSlow(sp);
+        }
+        return pos->second;
+    }
+#endif
+
+
 private:
+    OpenGLPlatform& mPlatform;
    ShaderModel mShaderModel = ShaderModel::MOBILE;
    FeatureLevel mFeatureLevel = FeatureLevel::FEATURE_LEVEL_1;
+    TimerQueryFactoryInterface* mTimerQueryFactory = nullptr;
+    std::vector<std::function<void(OpenGLContext&)>> mDestroyWithNormalContext;
+    RenderPrimitive mDefaultVAO;
+    std::optional<GLuint> mDefaultFbo[2];
+    std::array<
+            std::tuple<GLuint, void const*, uint16_t>,
+            CONFIG_UNIFORM_BINDING_COUNT> mUniformBindings = {};
+    mutable tsl::robin_map<SamplerParams, GLuint,
+            SamplerParams::Hasher, SamplerParams::EqualTo> mSamplerMap;
+
+    void bindFramebufferResolved(GLenum target, GLuint buffer) noexcept;

    const std::array<std::tuple<bool const&, char const*, char const*>, sizeof(bugs)> mBugDatabase{{
            {   bugs.disable_glFlush,
@@ -470,8 +560,6 @@ private:
                    ""},
    }};

-    RenderPrimitive mDefaultVAO;
-
    // this is chosen to minimize code size
 #if defined(BACKEND_OPENGL_VERSION_GLES)
    static void initExtensionsGLES(Extensions* ext, GLint major, GLint minor) noexcept;
@@ -629,11 +717,26 @@ void OpenGLContext::depthRange(GLclampf near, GLclampf far) noexcept {
 void OpenGLContext::bindVertexArray(RenderPrimitive const* p) noexcept {
    RenderPrimitive* vao = p ? const_cast<RenderPrimitive *>(p) : &mDefaultVAO;
    update_state(state.vao.p, vao, [&]() {
-        procs.bindVertexArray(vao->vao);
+
+        // See if we need to create a name for this VAO on the fly, this would happen if:
+        // - we're not the default VAO, because its name is always 0
+        // - our name is 0, this could happen if this VAO was created in the "other" context
+        // - the nameVersion is out of date *and* we're on the protected context, in this case:
+        //      - the name must be stale from a previous use of this context because we always
+        //        destroy the protected context when we're done with it.
+        bool const recreateVaoName = p != &mDefaultVAO &&
+                ((vao->vao[contextIndex] == 0) ||
+                        (vao->nameVersion != state.age && contextIndex == 1));
+        if (UTILS_UNLIKELY(recreateVaoName)) {
+            vao->nameVersion = state.age;
+            procs.genVertexArrays(1, &vao->vao[contextIndex]);
+        }
+
+        procs.bindVertexArray(vao->vao[contextIndex]);
        // update GL_ELEMENT_ARRAY_BUFFER, which is updated by glBindVertexArray
        size_t const targetIndex = getIndexForBufferTarget(GL_ELEMENT_ARRAY_BUFFER);
        state.buffers.genericBinding[targetIndex] = vao->elementArray;
-        if (UTILS_UNLIKELY(bugs.vao_doesnt_store_element_array_buffer_binding)) {
+        if (UTILS_UNLIKELY(bugs.vao_doesnt_store_element_array_buffer_binding || recreateVaoName)) {
            // This shouldn't be needed, but it looks like some drivers don't do the implicit
            // glBindBuffer().
            glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vao->elementArray);
@@ -671,33 +774,6 @@ void OpenGLContext::bindBufferRange(GLenum target, GLuint index, GLuint buffer,
 #endif
 }

-void OpenGLContext::bindFramebuffer(GLenum target, GLuint buffer) noexcept {
-    switch (target) {
-        case GL_FRAMEBUFFER:
-            if (state.draw_fbo != buffer || state.read_fbo != buffer) {
-                state.draw_fbo = state.read_fbo = buffer;
-                glBindFramebuffer(target, buffer);
-            }
-            break;
-#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-        case GL_DRAW_FRAMEBUFFER:
-            if (state.draw_fbo != buffer) {
-                state.draw_fbo = buffer;
-                glBindFramebuffer(target, buffer);
-            }
-            break;
-        case GL_READ_FRAMEBUFFER:
-            if (state.read_fbo != buffer) {
-                state.read_fbo = buffer;
-                glBindFramebuffer(target, buffer);
-            }
-            break;
-#endif
-        default:
-            break;
-    }
-}
-
 void OpenGLContext::bindTexture(GLuint unit, GLuint target, GLuint texId, size_t targetIndex) noexcept {
    assert_invariant(targetIndex == getIndexForTextureTarget(target));
    assert_invariant(targetIndex < TEXTURE_TARGET_COUNT);
@@ -717,20 +793,22 @@ void OpenGLContext::useProgram(GLuint program) noexcept {
    });
 }

-void OpenGLContext::enableVertexAttribArray(GLuint index) noexcept {
-    assert_invariant(state.vao.p);
-    assert_invariant(index < state.vao.p->vertexAttribArray.size());
-    if (UTILS_UNLIKELY(!state.vao.p->vertexAttribArray[index])) {
-        state.vao.p->vertexAttribArray.set(index);
+void OpenGLContext::enableVertexAttribArray(RenderPrimitive const* rp, GLuint index) noexcept {
+    assert_invariant(rp);
+    assert_invariant(index < rp->vertexAttribArray.size());
+    bool const force = rp->stateVersion != state.age;
+    if (UTILS_UNLIKELY(force || !rp->vertexAttribArray[index])) {
+        rp->vertexAttribArray.set(index);
        glEnableVertexAttribArray(index);
    }
 }

-void OpenGLContext::disableVertexAttribArray(GLuint index) noexcept {
-    assert_invariant(state.vao.p);
-    assert_invariant(index < state.vao.p->vertexAttribArray.size());
-    if (UTILS_UNLIKELY(state.vao.p->vertexAttribArray[index])) {
-        state.vao.p->vertexAttribArray.unset(index);
+void OpenGLContext::disableVertexAttribArray(RenderPrimitive const* rp, GLuint index) noexcept {
+    assert_invariant(rp);
+    assert_invariant(index < rp->vertexAttribArray.size());
+    bool const force = rp->stateVersion != state.age;
+    if (UTILS_UNLIKELY(force || rp->vertexAttribArray[index])) {
+        rp->vertexAttribArray.unset(index);
        glDisableVertexAttribArray(index);
    }
 }
--- a/filament/backend/src/opengl/OpenGLDriver.cpp
+++ b/filament/backend/src/opengl/OpenGLDriver.cpp
--- a/filament/backend/src/opengl/OpenGLDriver.h
+++ b/filament/backend/src/opengl/OpenGLDriver.h
@@ -18,27 +18,44 @@
 #define TNT_FILAMENT_BACKEND_OPENGL_OPENGLDRIVER_H

 #include "DriverBase.h"
-#include "GLUtils.h"
 #include "OpenGLContext.h"
+#include "OpenGLTimerQuery.h"
+#include "GLBufferObject.h"
+#include "GLTexture.h"
 #include "ShaderCompilerService.h"

-#include "private/backend/Driver.h"
-#include "private/backend/HandleAllocator.h"
-
 #include <backend/platforms/OpenGLPlatform.h>

 #include <backend/AcquiredImage.h>
+#include <backend/DriverEnums.h>
+#include <backend/Handle.h>
+#include <backend/Platform.h>
 #include <backend/Program.h>
 #include <backend/TargetBufferInfo.h>

+#include "private/backend/Driver.h"
+#include "private/backend/HandleAllocator.h"
+
+#include <utils/FixedCapacityVector.h>
 #include <utils/compiler.h>
-#include <utils/Allocator.h>
+#include <utils/debug.h>

 #include <math/vec4.h>

 #include <tsl/robin_map.h>

-#include <set>
+#include <array>
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <stddef.h>
+#include <stdint.h>

 #ifndef FILAMENT_OPENGL_HANDLE_ARENA_SIZE_IN_MB
 #    define FILAMENT_OPENGL_HANDLE_ARENA_SIZE_IN_MB 4
@@ -51,15 +68,17 @@ class PixelBufferDescriptor;
 struct TargetBufferInfo;

 class OpenGLProgram;
-class OpenGLTimerQueryInterface;
+class TimerQueryFactoryInterface;

 class OpenGLDriver final : public DriverBase {
-    inline explicit OpenGLDriver(OpenGLPlatform* platform, const Platform::DriverConfig& driverConfig) noexcept;
+    inline explicit OpenGLDriver(OpenGLPlatform* platform,
+            const Platform::DriverConfig& driverConfig) noexcept;
    ~OpenGLDriver() noexcept final;
    Dispatcher getDispatcher() const noexcept final;

 public:
-    static Driver* create(OpenGLPlatform* platform, void* sharedGLContext, const Platform::DriverConfig& driverConfig) noexcept;
+    static Driver* create(OpenGLPlatform* platform, void* sharedGLContext,
+            const Platform::DriverConfig& driverConfig) noexcept;

    class DebugMarker {
        OpenGLDriver& driver;
@@ -75,27 +94,22 @@ public:
        bool rec709 = false;
    };

-    struct GLBufferObject : public HwBufferObject {
-        using HwBufferObject::HwBufferObject;
-        GLBufferObject(uint32_t size,
-                BufferObjectBinding bindingType, BufferUsage usage) noexcept
-                : HwBufferObject(size), usage(usage), bindingType(bindingType) {
+    struct GLVertexBufferInfo : public HwVertexBufferInfo {
+        GLVertexBufferInfo() noexcept = default;
+        GLVertexBufferInfo(uint8_t bufferCount, uint8_t attributeCount,
+                AttributeArray const& attributes)
+                : HwVertexBufferInfo(bufferCount, attributeCount),
+                  attributes(attributes) {
        }
-
-        struct {
-            GLuint id;
-            union {
-                GLenum binding;
-                void* buffer;
-            };
-        } gl;
-        BufferUsage usage;
-        BufferObjectBinding bindingType;
-        uint16_t age = 0;
+        AttributeArray attributes;
    };

    struct GLVertexBuffer : public HwVertexBuffer {
-        using HwVertexBuffer::HwVertexBuffer;
+        GLVertexBuffer() noexcept = default;
+        GLVertexBuffer(uint32_t vertexCount, Handle<HwVertexBufferInfo> vbih)
+                : HwVertexBuffer(vertexCount), vbih(vbih) {
+        }
+        Handle<HwVertexBufferInfo> vbih;
        struct {
            // 4 * MAX_VERTEX_ATTRIBUTE_COUNT bytes
            std::array<GLuint, MAX_VERTEX_ATTRIBUTE_COUNT> buffers{};
@@ -109,7 +123,6 @@ public:
        } gl;
    };

-    struct GLTexture;
    struct GLSamplerGroup : public HwSamplerGroup {
        using HwSamplerGroup::HwSamplerGroup;
        struct Entry {
@@ -123,39 +136,14 @@ public:
    struct GLRenderPrimitive : public HwRenderPrimitive {
        using HwRenderPrimitive::HwRenderPrimitive;
        OpenGLContext::RenderPrimitive gl;
+        Handle<HwVertexBufferInfo> vbih;
    };

-    struct GLTexture : public HwTexture {
-        using HwTexture::HwTexture;
-        struct GL {
-            GL() noexcept : imported(false), sidecarSamples(1), reserved(0) {}
-            GLuint id = 0;          // texture or renderbuffer id
-            GLenum target = 0;
-            GLenum internalFormat = 0;
-            GLuint sidecarRenderBufferMS = 0;  // multi-sample sidecar renderbuffer
+    using GLBufferObject = filament::backend::GLBufferObject;

-            // texture parameters go here too
-            GLfloat anisotropy = 1.0;
-            int8_t baseLevel = 127;
-            int8_t maxLevel = -1;
-            uint8_t targetIndex = 0;    // optimization: index corresponding to target
-            bool imported           : 1;
-            uint8_t sidecarSamples  : 4;
-            uint8_t reserved        : 3;
-        } gl;
+    using GLTexture = filament::backend::GLTexture;

-        OpenGLPlatform::ExternalTexture* externalTexture = nullptr;
-    };
-
-    struct GLTimerQuery : public HwTimerQuery {
-        struct State {
-            struct {
-                GLuint query;
-            } gl;
-            std::atomic<int64_t> elapsed{};
-        };
-        std::shared_ptr<State> state;
-    };
+    using GLTimerQuery = filament::backend::GLTimerQuery;

    struct GLStream : public HwStream {
        using HwStream::HwStream;
@@ -209,8 +197,8 @@ private:
    OpenGLContext mContext;
    ShaderCompilerService mShaderCompilerService;

-    friend class OpenGLTimerQueryFactory;
-    friend class TimerQueryNative;
+    friend class TimerQueryFactory;
+    friend class TimerQueryNativeFactory;
    OpenGLContext& getContext() noexcept { return mContext; }

    ShaderCompilerService& getShaderCompilerService() noexcept {
@@ -244,13 +232,13 @@ private:
    HandleAllocatorGL mHandleAllocator;

    template<typename D, typename ... ARGS>
-    Handle<D> initHandle(ARGS&& ... args) noexcept {
+    Handle<D> initHandle(ARGS&& ... args) {
        return mHandleAllocator.allocateAndConstruct<D>(std::forward<ARGS>(args) ...);
    }

    template<typename D, typename B, typename ... ARGS>
    typename std::enable_if<std::is_base_of<B, D>::value, D>::type*
-    construct(Handle<B> const& handle, ARGS&& ... args) noexcept {
+    construct(Handle<B> const& handle, ARGS&& ... args) {
        return mHandleAllocator.destroyAndConstruct<D, B>(handle, std::forward<ARGS>(args) ...);
    }

@@ -264,7 +252,7 @@ private:
    typename std::enable_if_t<
            std::is_pointer_v<Dp> &&
            std::is_base_of_v<B, typename std::remove_pointer_t<Dp>>, Dp>
-    handle_cast(Handle<B>& handle) noexcept {
+    handle_cast(Handle<B>& handle) {
        return mHandleAllocator.handle_cast<Dp, B>(handle);
    }

@@ -272,7 +260,7 @@ private:
    inline typename std::enable_if_t<
            std::is_pointer_v<Dp> &&
            std::is_base_of_v<B, typename std::remove_pointer_t<Dp>>, Dp>
-    handle_cast(Handle<B> const& handle) noexcept {
+    handle_cast(Handle<B> const& handle) {
        return mHandleAllocator.handle_cast<Dp, B>(handle);
    }

@@ -290,7 +278,7 @@ private:
    void updateVertexArrayObject(GLRenderPrimitive* rp, GLVertexBuffer const* vb);

    void framebufferTexture(TargetBufferInfo const& binfo,
-            GLRenderTarget const* rt, GLenum attachment) noexcept;
+            GLRenderTarget const* rt, GLenum attachment, uint8_t layerCount) noexcept;

    void setRasterState(RasterState rs) noexcept;

@@ -311,8 +299,8 @@ private:
    void renderBufferStorage(GLuint rbo, GLenum internalformat, uint32_t width,
            uint32_t height, uint8_t samples) const noexcept;

-    void textureStorage(GLTexture* t,
-            uint32_t width, uint32_t height, uint32_t depth) noexcept;
+    void textureStorage(OpenGLDriver::GLTexture* t, uint32_t width, uint32_t height,
+            uint32_t depth, bool useProtectedMemory) noexcept;

    /* State tracking GL wrappers... */

@@ -324,29 +312,13 @@ private:
    void resolvePass(ResolveAction action, GLRenderTarget const* rt,
            TargetBufferFlags discardFlags) noexcept;

-#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-    GLuint getSamplerSlow(SamplerParams sp) const noexcept;
-
-    inline GLuint getSampler(SamplerParams sp) const noexcept {
-        assert_invariant(!sp.padding0);
-        assert_invariant(!sp.padding1);
-        assert_invariant(!sp.padding2);
-        auto& samplerMap = mSamplerMap;
-        auto pos = samplerMap.find(sp);
-        if (UTILS_UNLIKELY(pos == samplerMap.end())) {
-            return getSamplerSlow(sp);
-        }
-        return pos->second;
-    }
-#endif
-
    const std::array<GLSamplerGroup*, Program::SAMPLER_BINDING_COUNT>& getSamplerBindings() const {
        return mSamplerBindings;
    }

    using AttachmentArray = std::array<GLenum, MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT + 2>;
-    static GLsizei getAttachments(AttachmentArray& attachments,
-            GLRenderTarget const* rt, TargetBufferFlags buffers) noexcept;
+    static GLsizei getAttachments(AttachmentArray& attachments, TargetBufferFlags buffers,
+            bool isDefaultFramebuffer) noexcept;

    // state required to represent the current render pass
    Handle<HwRenderTarget> mRenderPassTarget;
@@ -355,6 +327,10 @@ private:
    GLboolean mRenderPassDepthWrite{};
    GLboolean mRenderPassStencilWrite{};

+    GLRenderPrimitive const* mBoundRenderPrimitive = nullptr;
+    bool mValidProgram = false;
+
+
    void clearWithRasterPipe(TargetBufferFlags clearFlags,
            math::float4 const& linearColor, GLfloat depth, GLint stencil) noexcept;

@@ -362,14 +338,10 @@ private:

    // ES2 only. Uniform buffer emulation binding points
    GLuint mLastAssignedEmulatedUboId = 0;
-    std::array<std::tuple<GLuint, void const*, uint16_t>, Program::UNIFORM_BINDING_COUNT> mUniformBindings = {};

    // sampler buffer binding points (nullptr if not used)
    std::array<GLSamplerGroup*, Program::SAMPLER_BINDING_COUNT> mSamplerBindings = {};   // 4 pointers

-    mutable tsl::robin_map<SamplerParams, GLuint,
-            SamplerParams::Hasher, SamplerParams::EqualTo> mSamplerMap;
-
    // this must be accessed from the driver thread only
    std::vector<GLTexture*> mTexturesWithStreamsAttached;

@@ -397,9 +369,6 @@ private:
    void executeEveryNowAndThenOps() noexcept;
    std::vector<std::function<bool()>> mEveryNowAndThenOps;

-    // timer query implementation
-    OpenGLTimerQueryInterface* mTimerQueryImpl = nullptr;
-
    const Platform::DriverConfig mDriverConfig;
    Platform::DriverConfig const& getDriverConfig() const noexcept { return mDriverConfig; }

--- a/filament/backend/src/opengl/OpenGLPlatform.cpp
+++ b/filament/backend/src/opengl/OpenGLPlatform.cpp
@@ -18,6 +18,17 @@

 #include "OpenGLDriverFactory.h"

+#include <backend/AcquiredImage.h>
+#include <backend/DriverEnums.h>
+#include <backend/Platform.h>
+
+#include <utils/compiler.h>
+
+#include <utils/Invocable.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
 namespace filament::backend {

 Driver* OpenGLPlatform::createDefaultDriver(OpenGLPlatform* platform,
@@ -27,14 +38,27 @@ Driver* OpenGLPlatform::createDefaultDriver(OpenGLPlatform* platform,

 OpenGLPlatform::~OpenGLPlatform() noexcept = default;

+void OpenGLPlatform::makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain,
+        utils::Invocable<void()>, utils::Invocable<void(size_t)>) noexcept {
+    makeCurrent(getCurrentContextType(), drawSwapChain, readSwapChain);
+}
+
+bool OpenGLPlatform::isProtectedContextSupported() const noexcept {
+    return false;
+}
+
 bool OpenGLPlatform::isSRGBSwapChainSupported() const noexcept {
    return false;
 }

-uint32_t OpenGLPlatform::createDefaultRenderTarget() noexcept {
+uint32_t OpenGLPlatform::getDefaultFramebufferObject() noexcept {
    return 0;
 }

+OpenGLPlatform::ContextType OpenGLPlatform::getCurrentContextType() const noexcept {
+    return ContextType::UNPROTECTED;
+}
+
 void OpenGLPlatform::setPresentationTime(
        UTILS_UNUSED int64_t presentationTimeInNanosecond) noexcept {
 }
@@ -105,10 +129,14 @@ AcquiredImage OpenGLPlatform::transformAcquiredImage(AcquiredImage source) noexc
    return source;
 }

-TargetBufferFlags OpenGLPlatform::getPreservedFlags(UTILS_UNUSED SwapChain* swapChain) noexcept {
+TargetBufferFlags OpenGLPlatform::getPreservedFlags(UTILS_UNUSED SwapChain*) noexcept {
    return TargetBufferFlags::NONE;
 }

+bool OpenGLPlatform::isSwapChainProtected(UTILS_UNUSED SwapChain*) noexcept {
+    return false;
+}
+
 bool OpenGLPlatform::isExtraContextSupported() const noexcept {
    return false;
 }
--- a/filament/backend/src/opengl/OpenGLProgram.cpp
+++ b/filament/backend/src/opengl/OpenGLProgram.cpp
@@ -16,16 +16,25 @@

 #include "OpenGLProgram.h"

-#include "BlobCacheKey.h"
+#include "GLUtils.h"
 #include "OpenGLDriver.h"
 #include "ShaderCompilerService.h"

+#include <backend/Program.h>
+
+#include <private/backend/BackendUtils.h>
+
 #include <utils/debug.h>
 #include <utils/compiler.h>
 #include <utils/Log.h>
 #include <utils/Systrace.h>

-#include <private/backend/BackendUtils.h>
+#include <array>
+#include <string_view>
+#include <utility>
+#include <new>
+
+#include <stddef.h>

 namespace filament::backend {

--- a/filament/backend/src/opengl/OpenGLProgram.h
+++ b/filament/backend/src/opengl/OpenGLProgram.h
@@ -28,6 +28,9 @@
 #include <utils/compiler.h>
 #include <utils/FixedCapacityVector.h>

+#include <array>
+#include <limits>
+
 #include <stddef.h>
 #include <stdint.h>

@@ -67,14 +70,14 @@ public:
        }
    }

-    struct {
-        GLuint program = 0;
-    } gl; // 12 bytes
-
    // For ES2 only
    void updateUniforms(uint32_t index, GLuint id, void const* buffer, uint16_t age) noexcept;
    void setRec709ColorSpace(bool rec709) const noexcept;

+    struct {
+        GLuint program = 0;
+    } gl;                                               // 4 bytes
+
 private:
    // keep these away from of other class attributes
    struct LazyInitializationData;
@@ -86,14 +89,17 @@ private:

    void updateSamplers(OpenGLDriver* gld) const noexcept;

-    ShaderCompilerService::program_token_t mToken{};
-
    // number of bindings actually used by this program
-    uint8_t mUsedBindingsCount = 0u;
-    UTILS_UNUSED uint8_t padding[3] = {};
    std::array<uint8_t, Program::SAMPLER_BINDING_COUNT> mUsedSamplerBindingPoints;   // 4 bytes

+    ShaderCompilerService::program_token_t mToken{};    // 16 bytes
+
+    uint8_t mUsedBindingsCount = 0u;                    // 1 byte
+    UTILS_UNUSED uint8_t padding[3] = {};               // 3 bytes
+
+
    // only needed for ES2
+    GLint mRec709Location = -1; // 4 bytes
    using LocationInfo = utils::FixedCapacityVector<GLint>;
    struct UniformsRecord {
        Program::UniformInfo uniforms;
@@ -102,11 +108,10 @@ private:
        mutable uint16_t age = std::numeric_limits<uint16_t>::max();
    };
    UniformsRecord const* mUniformsRecords = nullptr;
-    GLint mRec709Location = -1;
 };

 // if OpenGLProgram is larger tha 64 bytes, it'll fall in a larger Handle bucket.
-static_assert(sizeof(OpenGLProgram) <= 64);
+static_assert(sizeof(OpenGLProgram) <= 64); // currently 48 bytes

 } // namespace filament::backend

--- a/filament/backend/src/opengl/OpenGLTimerQuery.cpp
+++ b/filament/backend/src/opengl/OpenGLTimerQuery.cpp
@@ -16,110 +16,132 @@

 #include "OpenGLTimerQuery.h"

+#include "GLUtils.h"
+#include "OpenGLDriver.h"
+
+#include <backend/Platform.h>
 #include <backend/platforms/OpenGLPlatform.h>
+#include <backend/DriverEnums.h>

 #include <utils/compiler.h>
+#include <utils/debug.h>
 #include <utils/JobSystem.h>
 #include <utils/Log.h>
+#include <utils/Mutex.h>
 #include <utils/Systrace.h>
-#include <utils/debug.h>
+
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <new>
+#include <utility>
+
+#include <stdint.h>

 namespace filament::backend {

 using namespace backend;
 using namespace GLUtils;

+class OpenGLDriver;
+
 // ------------------------------------------------------------------------------------------------

-bool OpenGLTimerQueryFactory::mGpuTimeSupported = false;
+bool TimerQueryFactory::mGpuTimeSupported = false;

-OpenGLTimerQueryInterface* OpenGLTimerQueryFactory::init(
-        OpenGLPlatform& platform, OpenGLDriver& driver) noexcept {
-    (void)driver;
+TimerQueryFactoryInterface* TimerQueryFactory::init(
+        OpenGLPlatform& platform, OpenGLContext& context) noexcept {
+    (void)context;

-    OpenGLTimerQueryInterface* impl;
+    TimerQueryFactoryInterface* impl = nullptr;

 #if defined(BACKEND_OPENGL_VERSION_GL) || defined(GL_EXT_disjoint_timer_query)
-    auto& context = driver.getContext();
    if (context.ext.EXT_disjoint_timer_query) {
        // timer queries are available
        if (context.bugs.dont_use_timer_query && platform.canCreateFence()) {
            // however, they don't work well, revert to using fences if we can.
-            impl = new(std::nothrow) OpenGLTimerQueryFence(platform);
+            impl = new(std::nothrow) TimerQueryFenceFactory(platform);
        } else {
-            impl = new(std::nothrow) TimerQueryNative(driver);
+            impl = new(std::nothrow) TimerQueryNativeFactory(context);
        }
        mGpuTimeSupported = true;
    } else
 #endif
    if (platform.canCreateFence()) {
        // no timer queries, but we can use fences
-        impl = new(std::nothrow) OpenGLTimerQueryFence(platform);
+        impl = new(std::nothrow) TimerQueryFenceFactory(platform);
        mGpuTimeSupported = true;
    } else {
        // no queries, no fences -- that's a problem
-        impl = new(std::nothrow) TimerQueryFallback();
+        impl = new(std::nothrow) TimerQueryFallbackFactory();
        mGpuTimeSupported = false;
    }
+    assert_invariant(impl);
    return impl;
 }

 // ------------------------------------------------------------------------------------------------

-OpenGLTimerQueryInterface::~OpenGLTimerQueryInterface() = default;
+TimerQueryFactoryInterface::~TimerQueryFactoryInterface() = default;

 // This is a backend synchronous call
-bool OpenGLTimerQueryInterface::getTimerQueryValue(GLTimerQuery* tq, uint64_t* elapsedTime) noexcept {
+TimerQueryResult TimerQueryFactoryInterface::getTimerQueryValue(
+        GLTimerQuery* tq, uint64_t* elapsedTime) noexcept {
    if (UTILS_LIKELY(tq->state)) {
        int64_t const elapsed = tq->state->elapsed.load(std::memory_order_relaxed);
-        bool const available = elapsed > 0;
-        if (available) {
+        if (elapsed > 0) {
            *elapsedTime = elapsed;
+            return TimerQueryResult::AVAILABLE;
        }
-        return available;
+        return TimerQueryResult(elapsed);
    }
-    return false;
+    return TimerQueryResult::ERROR;
 }

 // ------------------------------------------------------------------------------------------------

 #if defined(BACKEND_OPENGL_VERSION_GL) || defined(GL_EXT_disjoint_timer_query)

-TimerQueryNative::TimerQueryNative(OpenGLDriver& driver)
-        : mDriver(driver) {
+TimerQueryNativeFactory::TimerQueryNativeFactory(OpenGLContext& context)
+        : mContext(context) {
 }

-TimerQueryNative::~TimerQueryNative() = default;
+TimerQueryNativeFactory::~TimerQueryNativeFactory() = default;

-void TimerQueryNative::createTimerQuery(GLTimerQuery* tq) {
-    if (UTILS_UNLIKELY(!tq->state)) {
-        tq->state = std::make_shared<GLTimerQuery::State>();
-    }
-    mDriver.getContext().procs.genQueries(1u, &tq->state->gl.query);
+void TimerQueryNativeFactory::createTimerQuery(GLTimerQuery* tq) {
+    assert_invariant(!tq->state);
+
+    tq->state = std::make_shared<GLTimerQuery::State>();
+    mContext.procs.genQueries(1u, &tq->state->gl.query);
    CHECK_GL_ERROR(utils::slog.e)
 }

-void TimerQueryNative::destroyTimerQuery(GLTimerQuery* tq) {
+void TimerQueryNativeFactory::destroyTimerQuery(GLTimerQuery* tq) {
    assert_invariant(tq->state);
-    mDriver.getContext().procs.deleteQueries(1u, &tq->state->gl.query);
+
+    mContext.procs.deleteQueries(1u, &tq->state->gl.query);
+    CHECK_GL_ERROR(utils::slog.e)
+
+    tq->state.reset();
+}
+
+void TimerQueryNativeFactory::beginTimeElapsedQuery(GLTimerQuery* tq) {
+    assert_invariant(tq->state);
+
+    tq->state->elapsed.store(int64_t(TimerQueryResult::NOT_READY), std::memory_order_relaxed);
+    mContext.procs.beginQuery(GL_TIME_ELAPSED, tq->state->gl.query);
    CHECK_GL_ERROR(utils::slog.e)
 }

-void TimerQueryNative::beginTimeElapsedQuery(GLTimerQuery* tq) {
+void TimerQueryNativeFactory::endTimeElapsedQuery(OpenGLDriver& driver, GLTimerQuery* tq) {
    assert_invariant(tq->state);
-    tq->state->elapsed.store(0);
-    mDriver.getContext().procs.beginQuery(GL_TIME_ELAPSED, tq->state->gl.query);
-    CHECK_GL_ERROR(utils::slog.e)
-}

-void TimerQueryNative::endTimeElapsedQuery(GLTimerQuery* tq) {
-    assert_invariant(tq->state);
-    mDriver.getContext().procs.endQuery(GL_TIME_ELAPSED);
+    mContext.procs.endQuery(GL_TIME_ELAPSED);
    CHECK_GL_ERROR(utils::slog.e)

    std::weak_ptr<GLTimerQuery::State> const weak = tq->state;

-    mDriver.runEveryNowAndThen([context = mDriver.getContext(), weak]() -> bool {
+    driver.runEveryNowAndThen([&context = mContext, weak]() -> bool {
        auto state = weak.lock();
        if (state) {
            GLuint available = 0;
@@ -133,6 +155,8 @@ void TimerQueryNative::endTimeElapsedQuery(GLTimerQuery* tq) {
            // we won't end-up here if we're on ES and don't have GL_EXT_disjoint_timer_query
            context.procs.getQueryObjectui64v(state->gl.query, GL_QUERY_RESULT, &elapsedTime);
            state->elapsed.store((int64_t)elapsedTime, std::memory_order_relaxed);
+        } else {
+            state->elapsed.store(int64_t(TimerQueryResult::ERROR), std::memory_order_relaxed);
        }
        return true;
    });
@@ -142,7 +166,7 @@ void TimerQueryNative::endTimeElapsedQuery(GLTimerQuery* tq) {

 // ------------------------------------------------------------------------------------------------

-OpenGLTimerQueryFence::OpenGLTimerQueryFence(OpenGLPlatform& platform)
+TimerQueryFenceFactory::TimerQueryFenceFactory(OpenGLPlatform& platform)
        : mPlatform(platform) {
    mQueue.reserve(2);
    mThread = std::thread([this]() {
@@ -166,7 +190,8 @@ OpenGLTimerQueryFence::OpenGLTimerQueryFence(OpenGLPlatform& platform)
    });
 }

-OpenGLTimerQueryFence::~OpenGLTimerQueryFence() {
+TimerQueryFenceFactory::~TimerQueryFenceFactory() {
+    assert_invariant(mQueue.empty());
    if (mThread.joinable()) {
        std::unique_lock<utils::Mutex> lock(mLock);
        mExitRequested = true;
@@ -178,27 +203,26 @@ OpenGLTimerQueryFence::~OpenGLTimerQueryFence() {
    }
 }

-void OpenGLTimerQueryFence::enqueue(OpenGLTimerQueryFence::Job&& job) {
+void TimerQueryFenceFactory::push(TimerQueryFenceFactory::Job&& job) {
    std::unique_lock<utils::Mutex> const lock(mLock);
-    mQueue.push_back(std::forward<Job>(job));
+    mQueue.push_back(std::move(job));
    mCondition.notify_one();
 }

-void OpenGLTimerQueryFence::createTimerQuery(GLTimerQuery* tq) {
-    if (UTILS_UNLIKELY(!tq->state)) {
-        tq->state = std::make_shared<GLTimerQuery::State>();
-    }
+void TimerQueryFenceFactory::createTimerQuery(GLTimerQuery* tq) {
+    assert_invariant(!tq->state);
+    tq->state = std::make_shared<GLTimerQuery::State>();
 }

-void OpenGLTimerQueryFence::destroyTimerQuery(GLTimerQuery* tq) {
+void TimerQueryFenceFactory::destroyTimerQuery(GLTimerQuery* tq) {
    assert_invariant(tq->state);
+    tq->state.reset();
 }

-void OpenGLTimerQueryFence::beginTimeElapsedQuery(GLTimerQuery* tq) {
+void TimerQueryFenceFactory::beginTimeElapsedQuery(GLTimerQuery* tq) {
    assert_invariant(tq->state);
-    tq->state->elapsed.store(0);
+    tq->state->elapsed.store(int64_t(TimerQueryResult::NOT_READY), std::memory_order_relaxed);

-    Platform::Fence* fence = mPlatform.createFence();
    std::weak_ptr<GLTimerQuery::State> const weak = tq->state;

    // FIXME: this implementation of beginTimeElapsedQuery is usually wrong; it ends up
@@ -207,12 +231,11 @@ void OpenGLTimerQueryFence::beginTimeElapsedQuery(GLTimerQuery* tq) {
    //    on a dummy target for instance, or somehow latch the begin time at the next renderpass
    //    start.

-    push([&platform = mPlatform, fence, weak]() {
+    push([&platform = mPlatform, fence = mPlatform.createFence(), weak]() {
        auto state = weak.lock();
        if (state) {
            platform.waitFence(fence, FENCE_WAIT_FOR_EVER);
-            int64_t const then = clock::now().time_since_epoch().count();
-            state->elapsed.store(-then, std::memory_order_relaxed);
+            state->then = clock::now().time_since_epoch().count();
            SYSTRACE_CONTEXT();
            SYSTRACE_ASYNC_BEGIN("OpenGLTimerQueryFence", intptr_t(state.get()));
        }
@@ -220,19 +243,16 @@ void OpenGLTimerQueryFence::beginTimeElapsedQuery(GLTimerQuery* tq) {
    });
 }

-void OpenGLTimerQueryFence::endTimeElapsedQuery(GLTimerQuery* tq) {
+void TimerQueryFenceFactory::endTimeElapsedQuery(OpenGLDriver&, GLTimerQuery* tq) {
    assert_invariant(tq->state);
-    Platform::Fence* fence = mPlatform.createFence();
    std::weak_ptr<GLTimerQuery::State> const weak = tq->state;

-    push([&platform = mPlatform, fence, weak]() {
+    push([&platform = mPlatform, fence = mPlatform.createFence(), weak]() {
        auto state = weak.lock();
        if (state) {
            platform.waitFence(fence, FENCE_WAIT_FOR_EVER);
            int64_t const now = clock::now().time_since_epoch().count();
-            int64_t const then = state->elapsed.load(std::memory_order_relaxed);
-            assert_invariant(then < 0);
-            state->elapsed.store(now + then, std::memory_order_relaxed);
+            state->elapsed.store(now - state->then, std::memory_order_relaxed);
            SYSTRACE_CONTEXT();
            SYSTRACE_ASYNC_END("OpenGLTimerQueryFence", intptr_t(state.get()));
        }
@@ -242,34 +262,32 @@ void OpenGLTimerQueryFence::endTimeElapsedQuery(GLTimerQuery* tq) {

 // ------------------------------------------------------------------------------------------------

-TimerQueryFallback::TimerQueryFallback() = default;
+TimerQueryFallbackFactory::TimerQueryFallbackFactory() = default;

-TimerQueryFallback::~TimerQueryFallback() = default;
+TimerQueryFallbackFactory::~TimerQueryFallbackFactory() = default;

-void TimerQueryFallback::createTimerQuery(GLTimerQuery* tq) {
-    if (UTILS_UNLIKELY(!tq->state)) {
-        tq->state = std::make_shared<GLTimerQuery::State>();
-    }
+void TimerQueryFallbackFactory::createTimerQuery(GLTimerQuery* tq) {
+    assert_invariant(!tq->state);
+    tq->state = std::make_shared<GLTimerQuery::State>();
 }

-void TimerQueryFallback::destroyTimerQuery(GLTimerQuery* tq) {
+void TimerQueryFallbackFactory::destroyTimerQuery(GLTimerQuery* tq) {
    assert_invariant(tq->state);
+    tq->state.reset();
 }

-void TimerQueryFallback::beginTimeElapsedQuery(OpenGLTimerQueryInterface::GLTimerQuery* tq) {
+void TimerQueryFallbackFactory::beginTimeElapsedQuery(GLTimerQuery* tq) {
    assert_invariant(tq->state);
    // this implementation measures the CPU time, but we have no h/w support
-    int64_t const then = clock::now().time_since_epoch().count();
-    tq->state->elapsed.store(-then, std::memory_order_relaxed);
+    tq->state->then = clock::now().time_since_epoch().count();
+    tq->state->elapsed.store(int64_t(TimerQueryResult::NOT_READY), std::memory_order_relaxed);
 }

-void TimerQueryFallback::endTimeElapsedQuery(OpenGLTimerQueryInterface::GLTimerQuery* tq) {
+void TimerQueryFallbackFactory::endTimeElapsedQuery(OpenGLDriver&, GLTimerQuery* tq) {
    assert_invariant(tq->state);
    // this implementation measures the CPU time, but we have no h/w support
    int64_t const now = clock::now().time_since_epoch().count();
-    int64_t const then = tq->state->elapsed.load(std::memory_order_relaxed);
-    assert_invariant(then < 0);
-    tq->state->elapsed.store(now + then, std::memory_order_relaxed);
+    tq->state->elapsed.store(now - tq->state->then, std::memory_order_relaxed);
 }

 } // namespace filament::backend
--- a/filament/backend/src/opengl/OpenGLTimerQuery.h
+++ b/filament/backend/src/opengl/OpenGLTimerQuery.h
@@ -17,18 +17,41 @@
 #ifndef TNT_FILAMENT_BACKEND_OPENGL_TIMERQUERY_H
 #define TNT_FILAMENT_BACKEND_OPENGL_TIMERQUERY_H

-#include "OpenGLDriver.h"
+#include <backend/DriverEnums.h>
+
+#include "DriverBase.h"

 #include <utils/Condition.h>
 #include <utils/Mutex.h>

+#include "gl_headers.h"
+
+#include <atomic>
+#include <chrono>
+#include <functional>
+#include <memory>
 #include <thread>
 #include <vector>

+#include <stdint.h>
+
 namespace filament::backend {

 class OpenGLPlatform;
-class OpenGLTimerQueryInterface;
+class OpenGLContext;
+class OpenGLDriver;
+class TimerQueryFactoryInterface;
+
+struct GLTimerQuery : public HwTimerQuery {
+    struct State {
+        struct {
+            GLuint query;
+        } gl;
+        int64_t then{};
+        std::atomic<int64_t> elapsed{};
+    };
+    std::shared_ptr<State> state;
+};

 /*
 * We need two implementation of timer queries (only elapsed time), because
@@ -38,83 +61,80 @@ class OpenGLTimerQueryInterface;
 * These classes implement the various strategies...
 */

-
-class OpenGLTimerQueryFactory {
+class TimerQueryFactory {
    static bool mGpuTimeSupported;
 public:
-    static OpenGLTimerQueryInterface* init(
-            OpenGLPlatform& platform, OpenGLDriver& driver) noexcept;
+    static TimerQueryFactoryInterface* init(
+            OpenGLPlatform& platform, OpenGLContext& context) noexcept;

    static bool isGpuTimeSupported() noexcept {
        return mGpuTimeSupported;
    }
 };

-class OpenGLTimerQueryInterface {
+class TimerQueryFactoryInterface {
 protected:
-    using GLTimerQuery = OpenGLDriver::GLTimerQuery;
+    using GLTimerQuery = filament::backend::GLTimerQuery;
    using clock = std::chrono::steady_clock;

 public:
-    virtual ~OpenGLTimerQueryInterface();
+    virtual ~TimerQueryFactoryInterface();
    virtual void createTimerQuery(GLTimerQuery* query) = 0;
    virtual void destroyTimerQuery(GLTimerQuery* query) = 0;
    virtual void beginTimeElapsedQuery(GLTimerQuery* query) = 0;
-    virtual void endTimeElapsedQuery(GLTimerQuery* query) = 0;
+    virtual void endTimeElapsedQuery(OpenGLDriver& driver, GLTimerQuery* query) = 0;

-    static bool getTimerQueryValue(GLTimerQuery* tq, uint64_t* elapsedTime) noexcept;
+    static TimerQueryResult getTimerQueryValue(GLTimerQuery* tq, uint64_t* elapsedTime) noexcept;
 };

 #if defined(BACKEND_OPENGL_VERSION_GL) || defined(GL_EXT_disjoint_timer_query)

-class TimerQueryNative : public OpenGLTimerQueryInterface {
+class TimerQueryNativeFactory final : public TimerQueryFactoryInterface {
 public:
-    explicit TimerQueryNative(OpenGLDriver& driver);
-    ~TimerQueryNative() override;
+    explicit TimerQueryNativeFactory(OpenGLContext& context);
+    ~TimerQueryNativeFactory() override;
 private:
    void createTimerQuery(GLTimerQuery* query) override;
    void destroyTimerQuery(GLTimerQuery* query) override;
    void beginTimeElapsedQuery(GLTimerQuery* query) override;
-    void endTimeElapsedQuery(GLTimerQuery* query) override;
-    OpenGLDriver& mDriver;
+    void endTimeElapsedQuery(OpenGLDriver& driver, GLTimerQuery* query) override;
+    OpenGLContext& mContext;
 };

 #endif

-class OpenGLTimerQueryFence : public OpenGLTimerQueryInterface {
+class TimerQueryFenceFactory final : public TimerQueryFactoryInterface {
 public:
-    explicit OpenGLTimerQueryFence(OpenGLPlatform& platform);
-    ~OpenGLTimerQueryFence() override;
+    explicit TimerQueryFenceFactory(OpenGLPlatform& platform);
+    ~TimerQueryFenceFactory() override;
 private:
    using Job = std::function<void()>;
+    using Container = std::vector<Job>;
+
    void createTimerQuery(GLTimerQuery* query) override;
    void destroyTimerQuery(GLTimerQuery* query) override;
    void beginTimeElapsedQuery(GLTimerQuery* tq) override;
-    void endTimeElapsedQuery(GLTimerQuery* tq) override;
+    void endTimeElapsedQuery(OpenGLDriver& driver, GLTimerQuery* tq) override;

-    void enqueue(Job&& job);
-    template<typename CALLABLE, typename ... ARGS>
-    void push(CALLABLE&& func, ARGS&& ... args) {
-        enqueue(Job(std::bind(std::forward<CALLABLE>(func), std::forward<ARGS>(args)...)));
-    }
+    void push(Job&& job);

    OpenGLPlatform& mPlatform;
    std::thread mThread;
    mutable utils::Mutex mLock;
    mutable utils::Condition mCondition;
-    std::vector<Job> mQueue;
+    Container mQueue;
    bool mExitRequested = false;
 };

-class TimerQueryFallback : public OpenGLTimerQueryInterface {
+class TimerQueryFallbackFactory final : public TimerQueryFactoryInterface {
 public:
-    explicit TimerQueryFallback();
-    ~TimerQueryFallback() override;
+    explicit TimerQueryFallbackFactory();
+    ~TimerQueryFallbackFactory() override;
 private:
    void createTimerQuery(GLTimerQuery* query) override;
    void destroyTimerQuery(GLTimerQuery* query) override;
    void beginTimeElapsedQuery(GLTimerQuery* query) override;
-    void endTimeElapsedQuery(GLTimerQuery* query) override;
+    void endTimeElapsedQuery(OpenGLDriver& driver, GLTimerQuery* query) override;
 };

 } // namespace filament::backend
--- a/filament/backend/src/opengl/ShaderCompilerService.cpp
+++ b/filament/backend/src/opengl/ShaderCompilerService.cpp
@@ -30,6 +30,7 @@
 #include <utils/Log.h>
 #include <utils/Systrace.h>

+#include <cctype>
 #include <chrono>
 #include <string>
 #include <string_view>
@@ -261,12 +262,12 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
                [this, &gl, program = std::move(program), token]() mutable {
                    // compile the shaders
                    std::array<GLuint, Program::SHADER_TYPE_COUNT> shaders{};
-                    std::array<utils::CString, Program::SHADER_TYPE_COUNT> shaderSourceCode;
                    compileShaders(gl,
                            std::move(program.getShadersSource()),
                            program.getSpecializationConstants(),
+                            program.isMultiview(),
                            shaders,
-                            shaderSourceCode);
+                            token->shaderSourceCode);

                    // link the program
                    GLuint const glProgram = linkProgram(gl, shaders, token->attributes);
@@ -300,6 +301,7 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
        compileShaders(gl,
                std::move(program.getShadersSource()),
                program.getSpecializationConstants(),
+                program.isMultiview(),
                token->gl.shaders,
                token->shaderSourceCode);

@@ -502,6 +504,7 @@ GLuint ShaderCompilerService::initialize(program_token_t& token) noexcept {
 void ShaderCompilerService::compileShaders(OpenGLContext& context,
        Program::ShaderSource shadersSource,
        utils::FixedCapacityVector<Program::SpecializationConstant> const& specializationConstants,
+        bool multiview,
        std::array<GLuint, Program::SHADER_TYPE_COUNT>& outShaders,
        UTILS_UNUSED_IN_RELEASE std::array<CString, Program::SHADER_TYPE_COUNT>& outShaderSourceCode) noexcept {

@@ -515,8 +518,16 @@ void ShaderCompilerService::compileShaders(OpenGLContext& context,
    };

    std::string specializationConstantString;
+    int32_t numViews = 2;
    for (auto const& sc : specializationConstants) {
        appendSpecConstantString(specializationConstantString, sc);
+        if (sc.id == 8) {
+            // This constant must match
+            // ReservedSpecializationConstants::CONFIG_STEREO_EYE_COUNT
+            // which we can't use here because it's defined in EngineEnums.h.
+            // (we're breaking layering here, but it's for the good cause).
+            numViews = std::get<int32_t>(sc.value);
+        }
    }
    if (!specializationConstantString.empty()) {
        specializationConstantString += '\n';
@@ -545,17 +556,23 @@ void ShaderCompilerService::compileShaders(OpenGLContext& context,

        if (UTILS_LIKELY(!shadersSource[i].empty())) {
            Program::ShaderBlob& shader = shadersSource[i];
+            char* shader_src = reinterpret_cast<char*>(shader.data());
+            size_t shader_len = shader.size();

            // remove GOOGLE_cpp_style_line_directive
-            std::string_view const source = process_GOOGLE_cpp_style_line_directive(context,
-                    reinterpret_cast<char*>(shader.data()), shader.size());
+            process_GOOGLE_cpp_style_line_directive(context, shader_src, shader_len);
+
+            // replace the value of layout(num_views = X) for multiview extension
+            if (multiview && stage == ShaderStage::VERTEX) {
+                process_OVR_multiview2(context, numViews, shader_src, shader_len);
+            }

            // add support for ARB_shading_language_packing if needed
            auto const packingFunctions = process_ARB_shading_language_packing(context);

            // split shader source, so we can insert the specialization constants and the packing
            // functions
-            auto const [prolog, body] = splitShaderSource(source);
+            auto const [prolog, body] = splitShaderSource({ shader_src, shader_len });

            const std::array<const char*, 4> sources = {
                    prolog.data(),
@@ -578,7 +595,7 @@ void ShaderCompilerService::compileShaders(OpenGLContext& context,
 #ifndef NDEBUG
            // for debugging we return the original shader source (without the modifications we
            // made here), otherwise the line numbers wouldn't match.
-            outShaderSourceCode[i] = { source.data(), source.length() };
+            outShaderSourceCode[i] = { shader_src, shader_len };
 #endif

            outShaders[i] = shaderId;
@@ -587,15 +604,59 @@ void ShaderCompilerService::compileShaders(OpenGLContext& context,
 }

 // If usages of the Google-style line directive are present, remove them, as some
-// drivers don't allow the quotation marks. This happens in-place.
-std::string_view ShaderCompilerService::process_GOOGLE_cpp_style_line_directive(OpenGLContext& context,
+// drivers don't allow the quotation marks. This source modification happens in-place.
+void ShaderCompilerService::process_GOOGLE_cpp_style_line_directive(OpenGLContext& context,
        char* source, size_t len) noexcept {
    if (!context.ext.GOOGLE_cpp_style_line_directive) {
        if (UTILS_UNLIKELY(requestsGoogleLineDirectivesExtension({ source, len }))) {
            removeGoogleLineDirectives(source, len); // length is unaffected
        }
    }
-    return { source, len };
+}
+
+// Look up the `source` to replace the number of eyes for multiview with the given number. This is
+// necessary for OpenGL because OpenGL relies on the number specified in shader files to determine
+// the number of views, which is assumed as a single digit, for multiview.
+// This source modification happens in-place.
+void ShaderCompilerService::process_OVR_multiview2(OpenGLContext& context,
+        int32_t eyeCount, char* source, size_t len) noexcept {
+    // We don't use regular expression in favor of performance.
+    if (context.ext.OVR_multiview2) {
+        const std::string_view shader{ source, len };
+        const std::string_view layout = "layout";
+        const std::string_view num_views = "num_views";
+        size_t found = 0;
+        while (true) {
+            found = shader.find(layout, found);
+            if (found == std::string_view::npos) {
+                break;
+            }
+            found = shader.find_first_not_of(' ', found + layout.size());
+            if (found == std::string_view::npos || shader[found] != '(') {
+                continue;
+            }
+            found = shader.find_first_not_of(' ', found + 1);
+            if (found == std::string_view::npos) {
+                continue;
+            }
+            if (shader.compare(found, num_views.size(), num_views) != 0) {
+                continue;
+            }
+            found = shader.find_first_not_of(' ', found + num_views.size());
+            if (found == std::string_view::npos || shader[found] != '=') {
+                continue;
+            }
+            found = shader.find_first_not_of(' ', found + 1);
+            if (found == std::string_view::npos) {
+                continue;
+            }
+            // We assume the value should be one-digit number.
+            assert_invariant(eyeCount < 10);
+            assert_invariant(!::isdigit(source[found + 1]));
+            source[found] = '0' + eyeCount;
+            break;
+        }
+    }
 }

 // Tragically, OpenGL 4.1 doesn't support unpackHalf2x16 (appeared in 4.2) and
--- a/filament/backend/src/opengl/ShaderCompilerService.h
+++ b/filament/backend/src/opengl/ShaderCompilerService.h
@@ -134,10 +134,14 @@ private:
            OpenGLContext& context,
            Program::ShaderSource shadersSource,
            utils::FixedCapacityVector<Program::SpecializationConstant> const& specializationConstants,
+            bool multiview,
            std::array<GLuint, Program::SHADER_TYPE_COUNT>& outShaders,
            std::array<utils::CString, Program::SHADER_TYPE_COUNT>& outShaderSourceCode) noexcept;

-    static std::string_view process_GOOGLE_cpp_style_line_directive(OpenGLContext& context,
+    static void process_GOOGLE_cpp_style_line_directive(OpenGLContext& context,
+            char* source, size_t len) noexcept;
+
+    static void process_OVR_multiview2(OpenGLContext& context, int32_t eyeCount,
            char* source, size_t len) noexcept;

    static std::string_view process_ARB_shading_language_packing(OpenGLContext& context) noexcept;
--- a/filament/backend/src/opengl/gl_headers.cpp
+++ b/filament/backend/src/opengl/gl_headers.cpp
@@ -67,6 +67,9 @@ PFNGLDISCARDFRAMEBUFFEREXTPROC glDiscardFramebufferEXT;
 #ifdef GL_KHR_parallel_shader_compile
 PFNGLMAXSHADERCOMPILERTHREADSKHRPROC glMaxShaderCompilerThreadsKHR;
 #endif
+#ifdef GL_OVR_multiview
+PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glFramebufferTextureMultiviewOVR;
+#endif

 #if defined(__ANDROID__) && !defined(FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2)
 // On Android, If we want to support a build system less than ANDROID_API 21, we need to
@@ -117,6 +120,9 @@ void importGLESExtensionsEntryPoints() {
 #ifdef GL_KHR_parallel_shader_compile
        getProcAddress(glMaxShaderCompilerThreadsKHR, "glMaxShaderCompilerThreadsKHR");
 #endif
+#ifdef GL_OVR_multiview
+        getProcAddress(glFramebufferTextureMultiviewOVR, "glFramebufferTextureMultiviewOVR");
+#endif
 #if defined(__ANDROID__) && !defined(FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2)
        getProcAddress(glDispatchCompute, "glDispatchCompute");
 #endif
--- a/filament/backend/src/opengl/gl_headers.h
+++ b/filament/backend/src/opengl/gl_headers.h
@@ -151,6 +151,9 @@ extern PFNGLDISCARDFRAMEBUFFEREXTPROC glDiscardFramebufferEXT;
 #ifdef GL_KHR_parallel_shader_compile
 extern PFNGLMAXSHADERCOMPILERTHREADSKHRPROC glMaxShaderCompilerThreadsKHR;
 #endif
+#ifdef GL_OVR_multiview
+extern PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glFramebufferTextureMultiviewOVR;
+#endif
 #if defined(__ANDROID__) && !defined(FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2)
 extern PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
 #endif
--- a/filament/backend/src/opengl/platforms/PlatformCocoaGL.mm
+++ b/filament/backend/src/opengl/platforms/PlatformCocoaGL.mm
@@ -247,8 +247,8 @@ void PlatformCocoaGL::destroySwapChain(Platform::SwapChain* swapChain) noexcept
    delete cocoaSwapChain;
 }

-void PlatformCocoaGL::makeCurrent(Platform::SwapChain* drawSwapChain,
-        Platform::SwapChain* readSwapChain) noexcept {
+bool PlatformCocoaGL::makeCurrent(ContextType type, SwapChain* drawSwapChain,
+        SwapChain* readSwapChain) noexcept {
    ASSERT_PRECONDITION_NON_FATAL(drawSwapChain == readSwapChain,
            "ContextManagerCocoa does not support using distinct draw/read swap chains.");
    CocoaGLSwapChain* swapChain = (CocoaGLSwapChain*)drawSwapChain;
@@ -287,6 +287,7 @@ void PlatformCocoaGL::makeCurrent(Platform::SwapChain* drawSwapChain,

    swapChain->previousBounds = currentBounds;
    swapChain->previousWindowFrame = currentWindowFrame;
+    return true;
 }

 void PlatformCocoaGL::commit(Platform::SwapChain* swapChain) noexcept {
--- a/filament/backend/src/opengl/platforms/PlatformCocoaTouchGL.mm
+++ b/filament/backend/src/opengl/platforms/PlatformCocoaTouchGL.mm
@@ -143,11 +143,12 @@ void PlatformCocoaTouchGL::destroySwapChain(Platform::SwapChain* swapChain) noex
    }
 }

-uint32_t PlatformCocoaTouchGL::createDefaultRenderTarget() noexcept {
+uint32_t PlatformCocoaTouchGL::getDefaultFramebufferObject() noexcept {
    return pImpl->mDefaultFramebuffer;
 }

-void PlatformCocoaTouchGL::makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept {
+bool PlatformCocoaTouchGL::makeCurrent(ContextType type, SwapChain* drawSwapChain,
+        SwapChain* readSwapChain) noexcept {
    ASSERT_PRECONDITION_NON_FATAL(drawSwapChain == readSwapChain,
            "PlatformCocoaTouchGL does not support using distinct draw/read swap chains.");
    CAEAGLLayer* const glLayer = (__bridge CAEAGLLayer*) drawSwapChain;
@@ -182,6 +183,7 @@ void PlatformCocoaTouchGL::makeCurrent(SwapChain* drawSwapChain, SwapChain* read
        ASSERT_POSTCONDITION(status == GL_FRAMEBUFFER_COMPLETE, "Incomplete framebuffer.");
        glBindFramebuffer(GL_FRAMEBUFFER, oldFramebuffer);
    }
+    return true;
 }

 void PlatformCocoaTouchGL::commit(Platform::SwapChain* swapChain) noexcept {
--- a/filament/backend/src/opengl/platforms/PlatformEGL.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformEGL.cpp
@@ -18,16 +18,33 @@

 #include "opengl/GLUtils.h"

+#include <backend/platforms/OpenGLPlatform.h>
+
+#include <backend/Platform.h>
+#include <backend/DriverEnums.h>
+
 #include <EGL/egl.h>
 #include <EGL/eglext.h>
+#include <EGL/eglplatform.h>

 #if defined(__ANDROID__)
 #include <sys/system_properties.h>
 #endif
-
 #include <utils/compiler.h>
+
+#include <utils/debug.h>
+#include <utils/Invocable.h>
 #include <utils/Log.h>
-#include <utils/Panic.h>
+#include <utils/ostream.h>
+
+#include <algorithm>
+#include <new>
+#include <initializer_list>
+#include <utility>
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>

 #ifndef EGL_CONTEXT_OPENGL_BACKWARDS_COMPATIBLE_ANGLE
 #   define EGL_CONTEXT_OPENGL_BACKWARDS_COMPATIBLE_ANGLE 0x3483
@@ -48,7 +65,6 @@ UTILS_PRIVATE PFNEGLDESTROYIMAGEKHRPROC eglDestroyImageKHR = {};
 }
 using namespace glext;

-
 // ---------------------------------------------------------------------------------------------
 // Utilities
 // ---------------------------------------------------------------------------------------------
@@ -137,7 +153,8 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon
    ext.egl.KHR_gl_colorspace = extensions.has("EGL_KHR_gl_colorspace");
    ext.egl.KHR_create_context = extensions.has("EGL_KHR_create_context");
    ext.egl.KHR_no_config_context = extensions.has("EGL_KHR_no_config_context");
-    ext.egl.KHR_surfaceless_context = extensions.has("KHR_surfaceless_context");
+    ext.egl.KHR_surfaceless_context = extensions.has("EGL_KHR_surfaceless_context");
+    ext.egl.EXT_protected_content = extensions.has("EGL_EXT_protected_content");
    if (ext.egl.KHR_create_context) {
        // KHR_create_context implies KHR_surfaceless_context for ES3.x contexts
        ext.egl.KHR_surfaceless_context = true;
@@ -266,12 +283,14 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon
        }
    }

-    if (UTILS_UNLIKELY(!makeCurrent(mEGLDummySurface, mEGLDummySurface))) {
+    if (UTILS_UNLIKELY(
+            egl.makeCurrent(mEGLContext, mEGLDummySurface, mEGLDummySurface) == EGL_FALSE)) {
        // eglMakeCurrent failed
        logEglError("eglMakeCurrent");
        goto error;
    }

+    mCurrentContextType = ContextType::UNPROTECTED;
    mContextAttribs = std::move(contextAttribs);

    initializeGlExtensions();
@@ -290,9 +309,13 @@ error:
    if (mEGLContext) {
        eglDestroyContext(mEGLDisplay, mEGLContext);
    }
+    if (mEGLContextProtected) {
+        eglDestroyContext(mEGLDisplay, mEGLContextProtected);
+    }

    mEGLDummySurface = EGL_NO_SURFACE;
    mEGLContext = EGL_NO_CONTEXT;
+    mEGLContextProtected = EGL_NO_CONTEXT;

    eglTerminate(mEGLDisplay);
    eglReleaseThread();
@@ -304,6 +327,10 @@ bool PlatformEGL::isExtraContextSupported() const noexcept {
    return ext.egl.KHR_surfaceless_context;
 }

+bool PlatformEGL::isProtectedContextSupported() const noexcept {
+    return ext.egl.EXT_protected_content;
+}
+
 void PlatformEGL::createContext(bool shared) {
    EGLConfig config = ext.egl.KHR_no_config_context ? EGL_NO_CONFIG_KHR : mEGLConfig;

@@ -338,15 +365,6 @@ void PlatformEGL::releaseContext() noexcept {
    eglReleaseThread();
 }

-EGLBoolean PlatformEGL::makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) noexcept {
-    if (UTILS_UNLIKELY((drawSurface != mCurrentDrawSurface || readSurface != mCurrentReadSurface))) {
-        mCurrentDrawSurface = drawSurface;
-        mCurrentReadSurface = readSurface;
-        return eglMakeCurrent(mEGLDisplay, drawSurface, readSurface, mEGLContext);
-    }
-    return EGL_TRUE;
-}
-
 void PlatformEGL::terminate() noexcept {
    // it's always allowed to use EGL_NO_SURFACE, EGL_NO_CONTEXT
    eglMakeCurrent(mEGLDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
@@ -354,6 +372,9 @@ void PlatformEGL::terminate() noexcept {
        eglDestroySurface(mEGLDisplay, mEGLDummySurface);
    }
    eglDestroyContext(mEGLDisplay, mEGLContext);
+    if (mEGLContextProtected != EGL_NO_CONTEXT) {
+        eglDestroyContext(mEGLDisplay, mEGLContextProtected);
+    }
    for (auto context : mAdditionalContexts) {
        eglDestroyContext(mEGLDisplay, context);
    }
@@ -424,6 +445,8 @@ EGLConfig PlatformEGL::findSwapChainConfig(uint64_t flags, bool window, bool pbu
    return config;
 }

+// -----------------------------------------------------------------------------------------------
+
 bool PlatformEGL::isSRGBSwapChainSupported() const noexcept {
    return ext.egl.KHR_gl_colorspace;
 }
@@ -431,6 +454,23 @@ bool PlatformEGL::isSRGBSwapChainSupported() const noexcept {
 Platform::SwapChain* PlatformEGL::createSwapChain(
        void* nativeWindow, uint64_t flags) noexcept {

+    Config attribs;
+    if (ext.egl.KHR_gl_colorspace) {
+        if (flags & SWAP_CHAIN_CONFIG_SRGB_COLORSPACE) {
+            attribs[EGL_GL_COLORSPACE_KHR] = EGL_GL_COLORSPACE_SRGB_KHR;
+        }
+    } else {
+        flags &= ~SWAP_CHAIN_CONFIG_SRGB_COLORSPACE;
+    }
+
+    if (ext.egl.EXT_protected_content) {
+        if (flags & SWAP_CHAIN_CONFIG_PROTECTED_CONTENT) {
+            attribs[EGL_PROTECTED_CONTENT_EXT] = EGL_TRUE;
+        }
+    } else {
+        flags &= ~SWAP_CHAIN_CONFIG_PROTECTED_CONTENT;
+    }
+
    EGLConfig config = EGL_NO_CONFIG_KHR;
    if (UTILS_LIKELY(ext.egl.KHR_no_config_context)) {
        config = findSwapChainConfig(flags, true, false);
@@ -438,46 +478,34 @@ Platform::SwapChain* PlatformEGL::createSwapChain(
        config = mEGLConfig;
    }

-    if (UTILS_UNLIKELY(config == EGL_NO_CONFIG_KHR)) {
-        return nullptr;
-    }
+    EGLSurface sur = EGL_NO_SURFACE;
+    if (UTILS_LIKELY(config != EGL_NO_CONFIG_KHR)) {
+        sur = eglCreateWindowSurface(mEGLDisplay, config,
+                (EGLNativeWindowType)nativeWindow, attribs.data());

-    Config attribs;
-
-    if (ext.egl.KHR_gl_colorspace) {
-        if (flags & SWAP_CHAIN_CONFIG_SRGB_COLORSPACE) {
-            attribs[EGL_GL_COLORSPACE_KHR] = EGL_GL_COLORSPACE_SRGB_KHR;
+        if (UTILS_LIKELY(sur != EGL_NO_SURFACE)) {
+            // this is not fatal
+            eglSurfaceAttrib(mEGLDisplay, sur, EGL_SWAP_BEHAVIOR, EGL_BUFFER_DESTROYED);
+        } else {
+            logEglError("PlatformEGL::createSwapChain: eglCreateWindowSurface");
        }
+    } else {
+        // error already logged
    }

-    EGLSurface sur = eglCreateWindowSurface(mEGLDisplay, config,
-            (EGLNativeWindowType)nativeWindow, attribs.data());
-
-    if (UTILS_UNLIKELY(sur == EGL_NO_SURFACE)) {
-        logEglError("eglCreateWindowSurface");
-        return nullptr;
-    }
-
-    // this is not fatal
-    eglSurfaceAttrib(mEGLDisplay, sur, EGL_SWAP_BEHAVIOR, EGL_BUFFER_DESTROYED);
-
-    return (SwapChain*)sur;
+    SwapChainEGL* const sc = new(std::nothrow) SwapChainEGL({
+        .sur = sur,
+        .attribs = std::move(attribs),
+        .nativeWindow = (EGLNativeWindowType)nativeWindow,
+        .config = config,
+        .flags = flags
+    });
+    return sc;
 }

 Platform::SwapChain* PlatformEGL::createSwapChain(
        uint32_t width, uint32_t height, uint64_t flags) noexcept {

-    EGLConfig config = EGL_NO_CONFIG_KHR;
-    if (UTILS_LIKELY(ext.egl.KHR_no_config_context)) {
-        config = findSwapChainConfig(flags, false, true);
-    } else {
-        config = mEGLConfig;
-    }
-
-    if (UTILS_UNLIKELY(config == EGL_NO_CONFIG_KHR)) {
-        return nullptr;
-    }
-
    Config attribs = {
            { EGL_WIDTH,  EGLint(width) },
            { EGL_HEIGHT, EGLint(height) },
@@ -487,41 +515,149 @@ Platform::SwapChain* PlatformEGL::createSwapChain(
        if (flags & SWAP_CHAIN_CONFIG_SRGB_COLORSPACE) {
            attribs[EGL_GL_COLORSPACE_KHR] = EGL_GL_COLORSPACE_SRGB_KHR;
        }
+    } else {
+        flags &= ~SWAP_CHAIN_CONFIG_SRGB_COLORSPACE;
    }

-    EGLSurface sur = eglCreatePbufferSurface(mEGLDisplay, config, attribs.data());
-
-    if (UTILS_UNLIKELY(sur == EGL_NO_SURFACE)) {
-        logEglError("eglCreatePbufferSurface");
-        return nullptr;
+    if (ext.egl.EXT_protected_content) {
+        if (flags & SWAP_CHAIN_CONFIG_PROTECTED_CONTENT) {
+            attribs[EGL_PROTECTED_CONTENT_EXT] = EGL_TRUE;
+        }
+    } else {
+        flags &= ~SWAP_CHAIN_CONFIG_PROTECTED_CONTENT;
    }
-    return (SwapChain*)sur;
+
+    EGLConfig config = EGL_NO_CONFIG_KHR;
+    if (UTILS_LIKELY(ext.egl.KHR_no_config_context)) {
+        config = findSwapChainConfig(flags, true, false);
+    } else {
+        config = mEGLConfig;
+    }
+
+    EGLSurface sur = EGL_NO_SURFACE;
+    if (UTILS_LIKELY(config != EGL_NO_CONFIG_KHR)) {
+        sur = eglCreatePbufferSurface(mEGLDisplay, config, attribs.data());
+        if (UTILS_UNLIKELY(sur == EGL_NO_SURFACE)) {
+            logEglError("PlatformEGL::createSwapChain: eglCreatePbufferSurface");
+        }
+    } else {
+        // error already logged
+    }
+
+    SwapChainEGL* const sc = new(std::nothrow) SwapChainEGL({
+            .sur = sur,
+            .attribs = std::move(attribs),
+            .config = config,
+            .flags = flags
+    });
+    return sc;
 }

 void PlatformEGL::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
-    EGLSurface sur = (EGLSurface) swapChain;
-    if (sur != EGL_NO_SURFACE) {
-        makeCurrent(mEGLDummySurface, mEGLDummySurface);
-        eglDestroySurface(mEGLDisplay, sur);
+    if (swapChain) {
+        SwapChainEGL const* const sc = static_cast<SwapChainEGL const*>(swapChain);
+        if (sc->sur != EGL_NO_SURFACE) {
+            egl.makeCurrent(mEGLDummySurface, mEGLDummySurface);
+            eglDestroySurface(mEGLDisplay, sc->sur);
+            delete sc;
+        }
    }
 }

+bool PlatformEGL::isSwapChainProtected(Platform::SwapChain* swapChain) noexcept {
+    if (swapChain) {
+        SwapChainEGL const* const sc = static_cast<SwapChainEGL const*>(swapChain);
+        return bool(sc->flags & SWAP_CHAIN_CONFIG_PROTECTED_CONTENT);
+    }
+    return false;
+}
+
+OpenGLPlatform::ContextType PlatformEGL::getCurrentContextType() const noexcept {
+    return mCurrentContextType;
+}
+
+bool PlatformEGL::makeCurrent(ContextType type,
+        SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept {
+    SwapChainEGL const* const dsc = static_cast<SwapChainEGL const*>(drawSwapChain);
+    SwapChainEGL const* const rsc = static_cast<SwapChainEGL const*>(readSwapChain);
+    EGLContext context = getContextForType(type);
+    EGLBoolean const success = egl.makeCurrent(context, dsc->sur, rsc->sur);
+    return success == EGL_TRUE ? true : false;
+}
+
 void PlatformEGL::makeCurrent(Platform::SwapChain* drawSwapChain,
-                              Platform::SwapChain* readSwapChain) noexcept {
-    EGLSurface drawSur = (EGLSurface) drawSwapChain;
-    EGLSurface readSur = (EGLSurface) readSwapChain;
-    if (drawSur != EGL_NO_SURFACE || readSur != EGL_NO_SURFACE) {
-        makeCurrent(drawSur, readSur);
+        Platform::SwapChain* readSwapChain,
+        utils::Invocable<void()> preContextChange,
+        utils::Invocable<void(size_t index)> postContextChange) noexcept {
+
+    assert_invariant(drawSwapChain);
+    assert_invariant(readSwapChain);
+
+    ContextType type = ContextType::UNPROTECTED;
+    if (ext.egl.EXT_protected_content) {
+        bool const swapChainProtected = isSwapChainProtected(drawSwapChain);
+        if (UTILS_UNLIKELY(swapChainProtected)) {
+            // we need a protected context
+            if (UTILS_UNLIKELY(mEGLContextProtected == EGL_NO_CONTEXT)) {
+                // we don't have one, create it!
+                EGLConfig config = ext.egl.KHR_no_config_context ? EGL_NO_CONFIG_KHR : mEGLConfig;
+                Config protectedContextAttribs{ mContextAttribs };
+                protectedContextAttribs[EGL_PROTECTED_CONTENT_EXT] = EGL_TRUE;
+                mEGLContextProtected = eglCreateContext(mEGLDisplay, config, mEGLContext,
+                        protectedContextAttribs.data());
+                if (UTILS_UNLIKELY(mEGLContextProtected == EGL_NO_CONTEXT)) {
+                    // couldn't create the protected context
+                    logEglError("eglCreateContext[EGL_PROTECTED_CONTENT_EXT]");
+                    ext.egl.EXT_protected_content = false;
+                    goto error;
+                }
+            }
+            type = ContextType::PROTECTED;
+            error: ;
+        }
+
+        bool const contextChange = type != mCurrentContextType;
+        mCurrentContextType = type;
+
+        if (UTILS_UNLIKELY(contextChange)) {
+            preContextChange();
+            bool const success = makeCurrent(mCurrentContextType, drawSwapChain, readSwapChain);
+            if (UTILS_UNLIKELY(!success)) {
+                logEglError("PlatformEGL::makeCurrent");
+                if (mEGLContextProtected != EGL_NO_CONTEXT) {
+                    eglDestroyContext(mEGLDisplay, mEGLContextProtected);
+                    mEGLContextProtected = EGL_NO_CONTEXT;
+                }
+                mCurrentContextType = ContextType::UNPROTECTED;
+            }
+            if (UTILS_LIKELY(!swapChainProtected && mEGLContextProtected != EGL_NO_CONTEXT)) {
+                // We don't need the protected context anymore, unbind and destroy right away.
+                eglDestroyContext(mEGLDisplay, mEGLContextProtected);
+                mEGLContextProtected = EGL_NO_CONTEXT;
+            }
+            size_t const contextIndex = (mCurrentContextType == ContextType::PROTECTED) ? 1 : 0;
+            postContextChange(contextIndex);
+            return;
+        }
+    }
+
+    bool const success = makeCurrent(mCurrentContextType, drawSwapChain, readSwapChain);
+    if (UTILS_UNLIKELY(!success)) {
+        logEglError("PlatformEGL::makeCurrent");
    }
 }

 void PlatformEGL::commit(Platform::SwapChain* swapChain) noexcept {
-    EGLSurface sur = (EGLSurface) swapChain;
-    if (sur != EGL_NO_SURFACE) {
-        eglSwapBuffers(mEGLDisplay, sur);
+    if (swapChain) {
+        SwapChainEGL const* const sc = static_cast<SwapChainEGL const*>(swapChain);
+        if (sc->sur != EGL_NO_SURFACE) {
+            eglSwapBuffers(mEGLDisplay, sc->sur);
+        }
    }
 }

+// -----------------------------------------------------------------------------------------------
+
 bool PlatformEGL::canCreateFence() noexcept {
    return true;
 }
@@ -560,8 +696,10 @@ FenceStatus PlatformEGL::waitFence(
    return FenceStatus::ERROR;
 }

+// -----------------------------------------------------------------------------------------------
+
 OpenGLPlatform::ExternalTexture* PlatformEGL::createExternalImageTexture() noexcept {
-    ExternalTexture* outTexture = new ExternalTexture{};
+    ExternalTexture* outTexture = new(std::nothrow) ExternalTexture{};
    glGenTextures(1, &outTexture->id);
    if (UTILS_LIKELY(ext.gl.OES_EGL_image_external_essl3)) {
        outTexture->target = GL_TEXTURE_EXTERNAL_OES;
@@ -590,6 +728,8 @@ bool PlatformEGL::setExternalImage(void* externalImage,
    return true;
 }

+// -----------------------------------------------------------------------------------------------
+
 void PlatformEGL::initializeGlExtensions() noexcept {
    // We're guaranteed to be on an ES platform, since we're using EGL
    GLUtils::unordered_string_set glExtensions;
@@ -598,6 +738,17 @@ void PlatformEGL::initializeGlExtensions() noexcept {
    ext.gl.OES_EGL_image_external_essl3 = glExtensions.has("GL_OES_EGL_image_external_essl3");
 }

+EGLContext PlatformEGL::getContextForType(OpenGLPlatform::ContextType type) const noexcept {
+    switch (type) {
+        case ContextType::NONE:
+            return EGL_NO_CONTEXT;
+        case ContextType::UNPROTECTED:
+            return mEGLContext;
+        case ContextType::PROTECTED:
+            return mEGLContextProtected;
+    }
+}
+
 // ---------------------------------------------------------------------------------------------

 PlatformEGL::Config::Config() = default;
@@ -634,6 +785,23 @@ void PlatformEGL::Config::erase(EGLint name) noexcept {
    }
 }

-} // namespace filament::backend
+// ------------------------------------------------------------------------------------------------

-// ---------------------------------------------------------------------------------------------
+EGLBoolean PlatformEGL::EGL::makeCurrent(EGLContext context, EGLSurface drawSurface,
+        EGLSurface readSurface) noexcept {
+    if (UTILS_UNLIKELY((
+            mCurrentContext != context ||
+            drawSurface != mCurrentDrawSurface || readSurface != mCurrentReadSurface))) {
+        EGLBoolean const success = eglMakeCurrent(
+                mEGLDisplay, drawSurface, readSurface, context);
+        if (success) {
+            mCurrentDrawSurface = drawSurface;
+            mCurrentReadSurface = readSurface;
+            mCurrentContext = context;
+        }
+        return success;
+    }
+    return EGL_TRUE;
+}
+
+} // namespace filament::backend
--- a/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp
@@ -14,21 +14,34 @@
 * limitations under the License.
 */

+#include <backend/AcquiredImage.h>
+#include <backend/Platform.h>
+#include <backend/platforms/PlatformEGL.h>
 #include <backend/platforms/PlatformEGLAndroid.h>

 #include "opengl/GLUtils.h"
 #include "ExternalStreamManagerAndroid.h"

 #include <android/api-level.h>
+#include <android/hardware_buffer.h>
+
+#include <utils/compiler.h>
+#include <utils/ostream.h>
+#include <utils/Log.h>

 #include <EGL/egl.h>
 #include <EGL/eglext.h>

-#include <utils/compiler.h>
-#include <utils/Log.h>
-
 #include <sys/system_properties.h>

+#include <jni.h>
+
+#include <new>
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
 // We require filament to be built with an API 19 toolchain, before that, OpenGLES 3.0 didn't exist
 // Actually, OpenGL ES 3.0 was added to API 18, but API 19 is the better target and
 // the minimum for Jetpack at the time of this comment.
@@ -129,11 +142,12 @@ Driver* PlatformEGLAndroid::createDriver(void* sharedContext,
 }

 void PlatformEGLAndroid::setPresentationTime(int64_t presentationTimeInNanosecond) noexcept {
-    if (mCurrentDrawSurface != EGL_NO_SURFACE) {
+    EGLSurface currentDrawSurface = eglGetCurrentSurface(EGL_DRAW);
+    if (currentDrawSurface != EGL_NO_SURFACE) {
        if (eglPresentationTimeANDROID) {
            eglPresentationTimeANDROID(
                    mEGLDisplay,
-                    mCurrentDrawSurface,
+                    currentDrawSurface,
                    presentationTimeInNanosecond);
        }
    }
@@ -165,14 +179,28 @@ int PlatformEGLAndroid::getOSVersion() const noexcept {

 AcquiredImage PlatformEGLAndroid::transformAcquiredImage(AcquiredImage source) noexcept {
    // Convert the AHardwareBuffer to EGLImage.
-    EGLClientBuffer clientBuffer = eglGetNativeClientBufferANDROID((const AHardwareBuffer*)source.image);
+    AHardwareBuffer const* const pHardwareBuffer = (const AHardwareBuffer*)source.image;
+
+    EGLClientBuffer clientBuffer = eglGetNativeClientBufferANDROID(pHardwareBuffer);
    if (!clientBuffer) {
        slog.e << "Unable to get EGLClientBuffer from AHardwareBuffer." << io::endl;
        return {};
    }
-    // Note that this cannot be used to stream protected video (for now) because we do not set EGL_PROTECTED_CONTENT_EXT.
-    EGLint attrs[] = { EGL_NONE, EGL_NONE };
-    EGLImageKHR eglImage = eglCreateImageKHR(mEGLDisplay, EGL_NO_CONTEXT, EGL_NATIVE_BUFFER_ANDROID, clientBuffer, attrs);
+
+    PlatformEGL::Config attributes;
+
+    if (__builtin_available(android 26, *)) {
+        AHardwareBuffer_Desc desc;
+        AHardwareBuffer_describe(pHardwareBuffer, &desc);
+        bool const isProtectedContent =
+                desc.usage & AHardwareBuffer_UsageFlags::AHARDWAREBUFFER_USAGE_PROTECTED_CONTENT;
+        if (isProtectedContent) {
+            attributes[EGL_PROTECTED_CONTENT_EXT] = EGL_TRUE;
+        }
+    }
+
+    EGLImageKHR eglImage = eglCreateImageKHR(mEGLDisplay,
+            EGL_NO_CONTEXT, EGL_NATIVE_BUFFER_ANDROID, clientBuffer, attributes.data());
    if (eglImage == EGL_NO_IMAGE_KHR) {
        slog.e << "eglCreateImageKHR returned no image." << io::endl;
        return {};
--- a/filament/backend/src/opengl/platforms/PlatformGLX.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformGLX.cpp
@@ -265,10 +265,11 @@ void PlatformGLX::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
    }
 }

-void PlatformGLX::makeCurrent(
-        Platform::SwapChain* drawSwapChain, Platform::SwapChain* readSwapChain) noexcept {
+bool PlatformGLX::makeCurrent(ContextType type, SwapChain* drawSwapChain,
+        SwapChain* readSwapChain) noexcept {
    g_glx.setCurrentContext(mGLXDisplay,
            (GLXDrawable)drawSwapChain, (GLXDrawable)readSwapChain, mGLXContext);
+    return true;
 }

 void PlatformGLX::commit(Platform::SwapChain* swapChain) noexcept {
--- a/filament/backend/src/opengl/platforms/PlatformWGL.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformWGL.cpp
@@ -255,8 +255,8 @@ void PlatformWGL::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
    wglMakeCurrent(mWhdc, mContext);
 }

-void PlatformWGL::makeCurrent(Platform::SwapChain* drawSwapChain,
-                              Platform::SwapChain* readSwapChain) noexcept {
+bool PlatformWGL::makeCurrent(ContextType type, SwapChain* drawSwapChain,
+        SwapChain* readSwapChain) noexcept {
    ASSERT_PRECONDITION_NON_FATAL(drawSwapChain == readSwapChain,
                                  "PlatformWGL does not support distinct draw/read swap chains.");

@@ -269,6 +269,7 @@ void PlatformWGL::makeCurrent(Platform::SwapChain* drawSwapChain,
            wglMakeCurrent(0, NULL);
        }
    }
+    return true;
 }

 void PlatformWGL::commit(Platform::SwapChain* swapChain) noexcept {
--- a/filament/backend/src/opengl/platforms/PlatformWebGL.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformWebGL.cpp
@@ -46,8 +46,9 @@ Platform::SwapChain* PlatformWebGL::createSwapChain(
 void PlatformWebGL::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
 }

-void PlatformWebGL::makeCurrent(Platform::SwapChain* drawSwapChain,
-        Platform::SwapChain* readSwapChain) noexcept {
+bool PlatformWebGL::makeCurrent(ContextType type, SwapChain* drawSwapChain,
+        SwapChain* readSwapChain) noexcept {
+    return true;
 }

 void PlatformWebGL::commit(Platform::SwapChain* swapChain) noexcept {
--- a/filament/backend/src/vulkan/VulkanBlitter.cpp
+++ b/filament/backend/src/vulkan/VulkanBlitter.cpp
@@ -154,15 +154,9 @@ struct BlitterUniforms {

 }// anonymous namespace

-VulkanBlitter::VulkanBlitter() noexcept = default;
-
-void VulkanBlitter::initialize(VkPhysicalDevice physicalDevice, VkDevice device,
-        VmaAllocator allocator, VulkanCommands* commands) noexcept {
-    mPhysicalDevice = physicalDevice;
-    mDevice = device;
-    mAllocator = allocator;
-    mCommands = commands;
-}
+VulkanBlitter::VulkanBlitter(VkPhysicalDevice physicalDevice, VulkanCommands* commands) noexcept
+    : mPhysicalDevice(physicalDevice),
+      mCommands(commands) {}

 void VulkanBlitter::resolve(VulkanAttachment dst, VulkanAttachment src) {

--- a/filament/backend/src/vulkan/VulkanBlitter.h
+++ b/filament/backend/src/vulkan/VulkanBlitter.h
@@ -17,6 +17,7 @@
 #ifndef TNT_FILAMENT_BACKEND_VULKANBLITTER_H
 #define TNT_FILAMENT_BACKEND_VULKANBLITTER_H

+#include "VulkanCommands.h"
 #include "VulkanContext.h"

 #include <utils/compiler.h>
@@ -32,10 +33,7 @@ struct VulkanProgram;

 class VulkanBlitter {
 public:
-    VulkanBlitter() noexcept;
-
-    void initialize(VkPhysicalDevice physicalDevice, VkDevice device,
-            VmaAllocator allocator, VulkanCommands* commands) noexcept;
+    VulkanBlitter(VkPhysicalDevice physicalDevice, VulkanCommands* commands) noexcept;

    void blit(VkFilter filter,
            VulkanAttachment dst, const VkOffset3D* dstRectPair,
@@ -47,8 +45,6 @@ public:

 private:
    UTILS_UNUSED VkPhysicalDevice mPhysicalDevice;
-    VkDevice mDevice;
-    VmaAllocator mAllocator;
    VulkanCommands* mCommands;
 };

--- a/filament/backend/src/vulkan/VulkanBuffer.h
+++ b/filament/backend/src/vulkan/VulkanBuffer.h
@@ -19,6 +19,7 @@

 #include "VulkanContext.h"
 #include "VulkanStagePool.h"
+#include "VulkanMemory.h"

 namespace filament::backend {

--- a/filament/backend/src/vulkan/VulkanCommands.cpp
+++ b/filament/backend/src/vulkan/VulkanCommands.cpp
@@ -47,7 +47,8 @@ VulkanCmdFence::VulkanCmdFence(VkFence ifence)

 VulkanCommandBuffer::VulkanCommandBuffer(VulkanResourceAllocator* allocator, VkDevice device,
        VkCommandPool pool)
-    : mResourceManager(allocator) {
+    : mResourceManager(allocator),
+      mPipeline(VK_NULL_HANDLE) {
    // Create the low-level command buffer.
    const VkCommandBufferAllocateInfo allocateInfo{
            .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
@@ -155,7 +156,7 @@ VulkanCommands::VulkanCommands(VkDevice device, VkQueue queue, uint32_t queueFam
 #endif
 }

-VulkanCommands::~VulkanCommands() {
+void VulkanCommands::terminate() {
    wait();
    gc();
    vkDestroyCommandPool(mDevice, mPool, VKALLOC);
@@ -435,8 +436,8 @@ void VulkanCommands::popGroupMarker() {
        auto const [marker, startTime] = mGroupMarkers->pop();
        auto const endTime = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double> diff = endTime - startTime;
-        utils::slog.d << "<---- " << marker << " elapsed: " << (diff.count() * 1000) << " ms\n"
-                      << utils::io::flush;
+        utils::slog.d << "<---- " << marker << " elapsed: " << (diff.count() * 1000) << " ms"
+                      << utils::io::endl;
 #else
        mGroupMarkers->pop();
 #endif
--- a/filament/backend/src/vulkan/VulkanCommands.h
+++ b/filament/backend/src/vulkan/VulkanCommands.h
@@ -89,6 +89,15 @@ struct VulkanCommandBuffer {
    inline void reset() {
        fence.reset();
        mResourceManager.clear();
+        mPipeline = VK_NULL_HANDLE;
+    }
+
+    inline void setPipeline(VkPipeline pipeline) {
+        mPipeline = pipeline;
+    }
+
+    inline VkPipeline pipeline() const {
+        return mPipeline;
    }

    inline VkCommandBuffer buffer() const {
@@ -103,6 +112,7 @@ struct VulkanCommandBuffer {
 private:
    VulkanAcquireOnlyResourceManager mResourceManager;
    VkCommandBuffer mBuffer;
+    VkPipeline mPipeline;
 };

 // Allows classes to be notified after a new command buffer has been activated.
@@ -139,71 +149,72 @@ public:
 //    - We do this because vkGetFenceStatus must be called from the rendering thread.
 //
 class VulkanCommands {
-    public:
-        VulkanCommands(VkDevice device, VkQueue queue, uint32_t queueFamilyIndex,
-                VulkanContext* context, VulkanResourceAllocator* allocator);
-        ~VulkanCommands();
+public:
+    VulkanCommands(VkDevice device, VkQueue queue, uint32_t queueFamilyIndex,
+            VulkanContext* context, VulkanResourceAllocator* allocator);

-        // Creates a "current" command buffer if none exists, otherwise returns the current one.
-        VulkanCommandBuffer& get();
+    void terminate();

-        // Submits the current command buffer if it exists, then sets "current" to null.
-        // If there are no outstanding commands then nothing happens and this returns false.
-        bool flush();
+    // Creates a "current" command buffer if none exists, otherwise returns the current one.
+    VulkanCommandBuffer& get();

-        // Returns the "rendering finished" semaphore for the most recent flush and removes
-        // it from the existing dependency chain. This is especially useful for setting up
-        // vkQueuePresentKHR.
-        VkSemaphore acquireFinishedSignal();
+    // Submits the current command buffer if it exists, then sets "current" to null.
+    // If there are no outstanding commands then nothing happens and this returns false.
+    bool flush();

-        // Takes a semaphore that signals when the next flush can occur. Only one injected
-        // semaphore is allowed per flush. Useful after calling vkAcquireNextImageKHR.
-        void injectDependency(VkSemaphore next);
+    // Returns the "rendering finished" semaphore for the most recent flush and removes
+    // it from the existing dependency chain. This is especially useful for setting up
+    // vkQueuePresentKHR.
+    VkSemaphore acquireFinishedSignal();

-        // Destroys all command buffers that are no longer in use.
-        void gc();
+    // Takes a semaphore that signals when the next flush can occur. Only one injected
+    // semaphore is allowed per flush. Useful after calling vkAcquireNextImageKHR.
+    void injectDependency(VkSemaphore next);

-        // Waits for all outstanding command buffers to finish.
-        void wait();
+    // Destroys all command buffers that are no longer in use.
+    void gc();

-        // Updates the atomic "status" variable in every extant fence.
-        void updateFences();
+    // Waits for all outstanding command buffers to finish.
+    void wait();

-        // Sets an observer who is notified every time a new command buffer has been made "current".
-        // The observer's event handler can only be called during get().
-        void setObserver(CommandBufferObserver* observer) { mObserver = observer; }
+    // Updates the atomic "status" variable in every extant fence.
+    void updateFences();
+
+    // Sets an observer who is notified every time a new command buffer has been made "current".
+    // The observer's event handler can only be called during get().
+    void setObserver(CommandBufferObserver* observer) { mObserver = observer; }

 #if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
-        void pushGroupMarker(char const* str, VulkanGroupMarkers::Timestamp timestamp = {});
+    void pushGroupMarker(char const* str, VulkanGroupMarkers::Timestamp timestamp = {});

-        void popGroupMarker();
+    void popGroupMarker();

-        void insertEventMarker(char const* string, uint32_t len);
+    void insertEventMarker(char const* string, uint32_t len);

-        std::string getTopGroupMarker() const;
+    std::string getTopGroupMarker() const;
 #endif

-    private:
-        static constexpr int CAPACITY = FVK_MAX_COMMAND_BUFFERS;
-        VkDevice const mDevice;
-        VkQueue const mQueue;
-        VkCommandPool const mPool;
-        VulkanContext const* mContext;
+private:
+    static constexpr int CAPACITY = FVK_MAX_COMMAND_BUFFERS;
+    VkDevice const mDevice;
+    VkQueue const mQueue;
+    VkCommandPool const mPool;
+    VulkanContext const* mContext;

-        // int8 only goes up to 127, therefore capacity must be less than that.
-        static_assert(CAPACITY < 128);
-        int8_t mCurrentCommandBufferIndex = -1;
-        VkSemaphore mSubmissionSignal = {};
-        VkSemaphore mInjectedSignal = {};
-        utils::FixedCapacityVector<std::unique_ptr<VulkanCommandBuffer>> mStorage;
-        VkFence mFences[CAPACITY] = {};
-        VkSemaphore mSubmissionSignals[CAPACITY] = {};
-        uint8_t mAvailableBufferCount = CAPACITY;
-        CommandBufferObserver* mObserver = nullptr;
+    // int8 only goes up to 127, therefore capacity must be less than that.
+    static_assert(CAPACITY < 128);
+    int8_t mCurrentCommandBufferIndex = -1;
+    VkSemaphore mSubmissionSignal = {};
+    VkSemaphore mInjectedSignal = {};
+    utils::FixedCapacityVector<std::unique_ptr<VulkanCommandBuffer>> mStorage;
+    VkFence mFences[CAPACITY] = {};
+    VkSemaphore mSubmissionSignals[CAPACITY] = {};
+    uint8_t mAvailableBufferCount = CAPACITY;
+    CommandBufferObserver* mObserver = nullptr;

 #if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
-        std::unique_ptr<VulkanGroupMarkers> mGroupMarkers;
-        std::unique_ptr<VulkanGroupMarkers> mCarriedOverMarkers;
+    std::unique_ptr<VulkanGroupMarkers> mGroupMarkers;
+    std::unique_ptr<VulkanGroupMarkers> mCarriedOverMarkers;
 #endif
 };

--- a/filament/backend/src/vulkan/VulkanConstants.h
+++ b/filament/backend/src/vulkan/VulkanConstants.h
@@ -39,8 +39,16 @@
 // not required for validation.

 // FVK is short for Filament Vulkan
+
+// Enables Android systrace
 #define FVK_DEBUG_SYSTRACE                0x00000001
-#define FVK_DEBUG_GROUP_MARKERS           0x00000002
+
+// Group markers are used to denote collections of GPU commands.  It is typically at the
+// granualarity of a renderpass. You can enable this along with FVK_DEBUG_DEBUG_UTILS to take
+// advantage of vkCmdBegin/EndDebugUtilsLabelEXT. You can also just enable this with
+// FVK_DEBUG_PRINT_GROUP_MARKERS to print the current marker to stdout.
+#define FVK_DEBUG_GROUP_MARKERS 0x00000002
+
 #define FVK_DEBUG_TEXTURE                 0x00000004
 #define FVK_DEBUG_LAYOUT_TRANSITION       0x00000008
 #define FVK_DEBUG_COMMAND_BUFFER          0x00000010
@@ -55,11 +63,17 @@
 #define FVK_DEBUG_PIPELINE_CACHE          0x00002000
 #define FVK_DEBUG_ALLOCATION              0x00004000

-// Usefaul default combinations
+// Enable the debug utils extension if it is available.
+#define FVK_DEBUG_DEBUG_UTILS             0x00008000
+
+// Use this to debug potential Handle/Resource leakage. It will print out reference counts for all
+// the currently active resources.
+#define FVK_DEBUG_RESOURCE_LEAK           0x00010000
+
+// Useful default combinations
 #define FVK_DEBUG_EVERYTHING              0xFFFFFFFF
 #define FVK_DEBUG_PERFORMANCE     \
-    FVK_DEBUG_SYSTRACE |          \
-    FVK_DEBUG_GROUP_MARKERS
+    FVK_DEBUG_SYSTRACE

 #define FVK_DEBUG_CORRECTNESS     \
    FVK_DEBUG_VALIDATION |        \
@@ -72,21 +86,40 @@
    FVK_DEBUG_PRINT_GROUP_MARKERS

 #ifndef NDEBUG
-#define FVK_DEBUG_FLAGS FVK_DEBUG_PERFORMANCE
+#define FVK_DEBUG_FLAGS (FVK_DEBUG_PERFORMANCE)
 #else
 #define FVK_DEBUG_FLAGS 0
 #endif

-#define FVK_ENABLED(flags) ((FVK_DEBUG_FLAGS) & (flags))
-#define FVK_ENABLED_BOOL(flags) ((bool) FVK_ENABLED(flags))
+#define FVK_ENABLED(flags) (((FVK_DEBUG_FLAGS) & (flags)) == (flags))
+
+// Group marker only works only if validation or debug utils is enabled since it uses
+// vkCmd(Begin/End)DebugUtilsLabelEXT or vkCmdDebugMarker(Begin/End)EXT
+#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
+static_assert(FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) || FVK_ENABLED(FVK_DEBUG_VALIDATION));
+#endif

 // Ensure dependencies are met between debug options
 #if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
 static_assert(FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS));
 #endif

+// Only enable debug utils if validation is enabled.
+#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS)
+static_assert(FVK_ENABLED(FVK_DEBUG_VALIDATION));
+#endif
+
 // end dependcy checks

+// Shorthand for combination of enabled debug flags
+#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS) || FVK_ENABLED(FVK_DEBUG_TEXTURE)
+#define FVK_ENABLED_DEBUG_SAMPLER_NAME 1
+#else
+#define FVK_ENABLED_DEBUG_SAMPLER_NAME 0
+#endif
+
+// end shorthands
+
 #if FVK_ENABLED(FVK_DEBUG_SYSTRACE)

 #include <utils/Systrace.h>
--- a/filament/backend/src/vulkan/VulkanContext.h
+++ b/filament/backend/src/vulkan/VulkanContext.h
@@ -19,7 +19,6 @@

 #include "VulkanConstants.h"
 #include "VulkanImageUtility.h"
-#include "VulkanPipelineCache.h"
 #include "VulkanUtility.h"

 #include <utils/bitset.h>
@@ -42,7 +41,7 @@ struct VulkanTimerQuery;
 struct VulkanCommandBuffer;

 struct VulkanAttachment {
-    VulkanTexture* texture;
+    VulkanTexture* texture = nullptr;
    uint8_t level = 0;
    uint16_t layer = 0;
    VkImage getImage() const;
--- a/filament/backend/src/vulkan/VulkanDriver.cpp
+++ b/filament/backend/src/vulkan/VulkanDriver.cpp
--- a/filament/backend/src/vulkan/VulkanDriver.h
+++ b/filament/backend/src/vulkan/VulkanDriver.h
@@ -28,6 +28,8 @@
 #include "VulkanSamplerCache.h"
 #include "VulkanStagePool.h"
 #include "VulkanUtility.h"
+#include "caching/VulkanDescriptorSetManager.h"
+#include "caching/VulkanPipelineLayoutCache.h"

 #include "DriverBase.h"
 #include "private/backend/Driver.h"
@@ -45,7 +47,33 @@ public:
    static Driver* create(VulkanPlatform* platform, VulkanContext const& context,
            Platform::DriverConfig const& driverConfig) noexcept;

+#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS)
+    // Encapsulates the VK_EXT_debug_utils extension.  In particular, we use
+    // vkSetDebugUtilsObjectNameEXT and vkCreateDebugUtilsMessengerEXT
+    class DebugUtils {
+    public:
+        static void setName(VkObjectType type, uint64_t handle, char const* name);
+
+    private:
+        static DebugUtils* get();
+
+        DebugUtils(VkInstance instance, VkDevice device, VulkanContext const* context);
+        ~DebugUtils();
+
+        VkInstance const mInstance;
+        VkDevice const mDevice;
+        bool const mEnabled;
+        VkDebugUtilsMessengerEXT mDebugMessenger = VK_NULL_HANDLE;
+
+        static DebugUtils* mSingleton;
+
+        friend class VulkanDriver;
+    };
+#endif // FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS)
+
 private:
+    static constexpr uint8_t MAX_SAMPLER_BINDING_COUNT = Program::SAMPLER_BINDING_COUNT;
+
    void debugCommandBegin(CommandStream* cmds, bool synchronous,
            const char* methodName) noexcept override;

@@ -77,25 +105,20 @@ private:
    VulkanDriver& operator=(VulkanDriver const&) = delete;

 private:
-    inline void setRenderPrimitiveBuffer(Handle<HwRenderPrimitive> rph, Handle<HwVertexBuffer> vbh,
-            Handle<HwIndexBuffer> ibh);
-
-    inline void setRenderPrimitiveRange(Handle<HwRenderPrimitive> rph, PrimitiveType pt,
-            uint32_t offset, uint32_t minIndex, uint32_t maxIndex, uint32_t count);
-
    void collectGarbage();

    VulkanPlatform* mPlatform = nullptr;
-    std::unique_ptr<VulkanCommands> mCommands;
    std::unique_ptr<VulkanTimestamps> mTimestamps;
-    std::unique_ptr<VulkanTexture> mEmptyTexture;
+
+    // Placeholder resources
+    VulkanTexture* mEmptyTexture;
+    VulkanBufferObject* mEmptyBufferObject;

    VulkanSwapChain* mCurrentSwapChain = nullptr;
    VulkanRenderTarget* mDefaultRenderTarget = nullptr;
    VulkanRenderPass mCurrentRenderPass = {};
    VmaAllocator mAllocator = VK_NULL_HANDLE;
    VkDebugReportCallbackEXT mDebugCallback = VK_NULL_HANDLE;
-    VkDebugUtilsMessengerEXT mDebugMessenger = VK_NULL_HANDLE;

    VulkanContext mContext = {};
    VulkanResourceAllocator mResourceAllocator;
@@ -105,13 +128,18 @@ private:
    // thread.
    VulkanThreadSafeResourceManager mThreadSafeResourceManager;

+    VulkanCommands mCommands;
+    VulkanPipelineLayoutCache mPipelineLayoutCache;
    VulkanPipelineCache mPipelineCache;
    VulkanStagePool mStagePool;
    VulkanFboCache mFramebufferCache;
    VulkanSamplerCache mSamplerCache;
    VulkanBlitter mBlitter;
-    VulkanSamplerGroup* mSamplerBindings[VulkanPipelineCache::SAMPLER_BINDING_COUNT] = {};
+    VulkanSamplerGroup* mSamplerBindings[MAX_SAMPLER_BINDING_COUNT] = {};
    VulkanReadPixels mReadPixels;
+    VulkanDescriptorSetManager mDescriptorSetManager;
+
+    VulkanDescriptorSetManager::GetPipelineLayoutFunction mGetPipelineFunction;

    bool const mIsSRGBSwapChainSupported;
 };
--- a/filament/backend/src/vulkan/VulkanFboCache.cpp
+++ b/filament/backend/src/vulkan/VulkanFboCache.cpp
@@ -64,7 +64,8 @@ bool VulkanFboCache::FboKeyEqualFn::operator()(const FboKey& k1, const FboKey& k
    return true;
 }

-void VulkanFboCache::initialize(VkDevice device) noexcept { mDevice = device; }
+VulkanFboCache::VulkanFboCache(VkDevice device)
+    : mDevice(device) {}

 VulkanFboCache::~VulkanFboCache() {
    ASSERT_POSTCONDITION(mFramebufferCache.empty() && mRenderPassCache.empty(),
@@ -238,7 +239,7 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept {
            .format = config.colorFormat[i],
            .samples = (VkSampleCountFlagBits) config.samples,
            .loadOp = clear ? kClear : (discard ? kDontCare : kKeep),
-            .storeOp = config.samples == 1 ? kEnableStore : kDisableStore,
+            .storeOp = kEnableStore,
            .stencilLoadOp = kDontCare,
            .stencilStoreOp = kDisableStore,
            .initialLayout = ((!discard && config.initialColorLayoutMask & (1 << i)) || clear)
--- a/filament/backend/src/vulkan/VulkanFboCache.h
+++ b/filament/backend/src/vulkan/VulkanFboCache.h
@@ -104,10 +104,9 @@ public:
        bool operator()(const FboKey& k1, const FboKey& k2) const;
    };

+    explicit VulkanFboCache(VkDevice device);
    ~VulkanFboCache();

-    void initialize(VkDevice device) noexcept;
-
    // Retrieves or creates a VkFramebuffer handle.
    VkFramebuffer getFramebuffer(FboKey config) noexcept;

--- a/filament/backend/src/vulkan/VulkanHandles.cpp
+++ b/filament/backend/src/vulkan/VulkanHandles.cpp
@@ -16,9 +16,13 @@

 #include "VulkanHandles.h"

+#include "VulkanDriver.h"
 #include "VulkanConstants.h"
+#include "VulkanDriver.h"
 #include "VulkanMemory.h"
+#include "VulkanUtility.h"
 #include "spirv/VulkanSpirvUtils.h"
+#include "utils/Log.h"

 #include <backend/platforms/VulkanPlatform.h>

@@ -28,15 +32,17 @@ using namespace bluevk;

 namespace filament::backend {

-static void flipVertically(VkRect2D* rect, uint32_t framebufferHeight) {
+namespace {
+
+void flipVertically(VkRect2D* rect, uint32_t framebufferHeight) {
    rect->offset.y = framebufferHeight - rect->offset.y - rect->extent.height;
 }

-static void flipVertically(VkViewport* rect, uint32_t framebufferHeight) {
+void flipVertically(VkViewport* rect, uint32_t framebufferHeight) {
    rect->y = framebufferHeight - rect->y - rect->height;
 }

-static void clampToFramebuffer(VkRect2D* rect, uint32_t fbWidth, uint32_t fbHeight) {
+void clampToFramebuffer(VkRect2D* rect, uint32_t fbWidth, uint32_t fbHeight) {
    int32_t x = std::max(rect->offset.x, 0);
    int32_t y = std::max(rect->offset.y, 0);
    int32_t right = std::min(rect->offset.x + (int32_t) rect->extent.width, (int32_t) fbWidth);
@@ -47,17 +53,125 @@ static void clampToFramebuffer(VkRect2D* rect, uint32_t fbWidth, uint32_t fbHeig
    rect->extent.height = std::max(top - y, 0);
 }

+template<typename Bitmask>
+static constexpr Bitmask fromStageFlags(ShaderStageFlags2 flags, uint8_t binding) {
+    Bitmask ret = 0;
+    if (flags & ShaderStageFlags2::VERTEX) {
+        ret |= (getVertexStage<Bitmask>() << binding);
+    }
+    if (flags & ShaderStageFlags2::FRAGMENT) {
+        ret |= (getFragmentStage<Bitmask>() << binding);
+    }
+    return ret;
+}
+
+UsageFlags getUsageFlags(uint16_t binding, ShaderStageFlags flags, UsageFlags src) {
+    // NOTE: if you modify this function, you also need to modify getShaderStageFlags.
+    assert_invariant(binding < MAX_SAMPLER_COUNT);
+    if (any(flags & ShaderStageFlags::VERTEX)) {
+        src.set(binding);
+    }
+    if (any(flags & ShaderStageFlags::FRAGMENT)) {
+        src.set(MAX_SAMPLER_COUNT + binding);
+    }
+    // TODO: add support for compute by extending SHADER_MODULE_COUNT and ensuring UsageFlags
+    // has 186 bits (MAX_SAMPLER_COUNT * 3)
+    // assert_invariant(!any(flags & ~(ShaderStageFlags::VERTEX | ShaderStageFlags::FRAGMENT)));
+    return src;
+}
+
+constexpr decltype(VulkanProgram::MAX_SHADER_MODULES) MAX_SHADER_MODULES =
+        VulkanProgram::MAX_SHADER_MODULES;
+
+using LayoutDescriptionList = VulkanProgram::LayoutDescriptionList;
+
+template<typename Bitmask>
+void addDescriptors(Bitmask mask,
+        utils::FixedCapacityVector<DescriptorSetLayoutBinding>& outputList) {
+    constexpr uint8_t MODULE_OFFSET = (sizeof(Bitmask) * 8) / MAX_SHADER_MODULES;
+    for (uint8_t i = 0; i < MODULE_OFFSET; ++i) {
+        bool const hasVertex = (mask & (1ULL << i)) != 0;
+        bool const hasFragment = (mask & (1ULL << (MODULE_OFFSET + i))) != 0;
+        if (!hasVertex && !hasFragment) {
+            continue;
+        }
+
+        DescriptorSetLayoutBinding binding{
+            .binding = i,
+            .flags = DescriptorFlags::NONE,
+            .count = 0,// This is always 0 for now as we pass the size of the UBOs in the Driver API
+                       // instead.
+        };
+        if (hasVertex) {
+            binding.stageFlags = ShaderStageFlags2::VERTEX;
+        }
+        if (hasFragment) {
+            binding.stageFlags = static_cast<ShaderStageFlags2>(
+                    binding.stageFlags | ShaderStageFlags2::FRAGMENT);
+        }
+        if constexpr (std::is_same_v<Bitmask, UniformBufferBitmask>) {
+            binding.type = DescriptorType::UNIFORM_BUFFER;
+        } else if constexpr (std::is_same_v<Bitmask, SamplerBitmask>) {
+            binding.type = DescriptorType::SAMPLER;
+        } else if constexpr (std::is_same_v<Bitmask, InputAttachmentBitmask>) {
+            binding.type = DescriptorType::INPUT_ATTACHMENT;
+        }
+        outputList.push_back(binding);
+    }
+}
+
+inline VkDescriptorSetLayout createDescriptorSetLayout(VkDevice device,
+        VkDescriptorSetLayoutCreateInfo const& info) {
+    VkDescriptorSetLayout layout;
+    vkCreateDescriptorSetLayout(device, &info, VKALLOC, &layout);
+    return layout;
+}
+
+} // anonymous namespace
+
+
+VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(VkDevice device, VkDescriptorSetLayoutCreateInfo const& info,
+        Bitmask const& bitmask)
+    : VulkanResource(VulkanResourceType::DESCRIPTOR_SET_LAYOUT),
+      mDevice(device),
+      vklayout(createDescriptorSetLayout(device, info)),
+      bitmask(bitmask),
+      bindings(getBindings(bitmask)),
+      count(Count::fromLayoutBitmask(bitmask)) {
+}
+
+VulkanDescriptorSetLayout::~VulkanDescriptorSetLayout() {
+    vkDestroyDescriptorSetLayout(mDevice, vklayout, VKALLOC);
+}
+
 VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept
    : HwProgram(builder.getName()),
      VulkanResource(VulkanResourceType::PROGRAM),
      mInfo(new PipelineInfo()),
      mDevice(device) {
+
+    constexpr uint8_t UBO_MODULE_OFFSET = (sizeof(UniformBufferBitmask) * 8) / MAX_SHADER_MODULES;
+    constexpr uint8_t SAMPLER_MODULE_OFFSET = (sizeof(SamplerBitmask) * 8) / MAX_SHADER_MODULES;
+    constexpr uint8_t INPUT_ATTACHMENT_MODULE_OFFSET =
+            (sizeof(InputAttachmentBitmask) * 8) / MAX_SHADER_MODULES;
+
    Program::ShaderSource const& blobs = builder.getShadersSource();
    auto& modules = mInfo->shaders;

    auto const& specializationConstants = builder.getSpecializationConstants();

    std::vector<uint32_t> shader;
+
+    // TODO: this will be moved out of the shader as the descriptor set layout will be provided by
+    // Filament instead of parsed from the shaders. See [GDSR] in VulkanDescriptorSetManager.h
+    UniformBufferBitmask uboMask = 0;
+    SamplerBitmask samplerMask = 0;
+    InputAttachmentBitmask inputAttachmentMask = 0;
+
+    static_assert(static_cast<ShaderStage>(0) == ShaderStage::VERTEX &&
+            static_cast<ShaderStage>(1) == ShaderStage::FRAGMENT &&
+            MAX_SHADER_MODULES == 2);
+
    for (size_t i = 0; i < MAX_SHADER_MODULES; i++) {
        Program::ShaderBlob const& blob = blobs[i];

@@ -70,6 +184,12 @@ VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept
            dataSize = shader.size() * 4;
        }

+        auto const [ubo, sampler, inputAttachment] = getProgramBindings(blob);
+        uboMask |= (static_cast<UniformBufferBitmask>(ubo) << (UBO_MODULE_OFFSET * i));
+        samplerMask |= (static_cast<SamplerBitmask>(sampler) << (SAMPLER_MODULE_OFFSET * i));
+        inputAttachmentMask |= (static_cast<InputAttachmentBitmask>(inputAttachment)
+                                << (INPUT_ATTACHMENT_MODULE_OFFSET * i));
+
        VkShaderModule& module = modules[i];
        VkShaderModuleCreateInfo moduleInfo = {
            .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -78,10 +198,44 @@ VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept
        };
        VkResult result = vkCreateShaderModule(mDevice, &moduleInfo, VKALLOC, &module);
        ASSERT_POSTCONDITION(result == VK_SUCCESS, "Unable to create shader module.");
+
+#if FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS)
+        std::string name{ builder.getName().c_str(), builder.getName().size() };
+        switch (static_cast<ShaderStage>(i)) {
+            case ShaderStage::VERTEX:
+                name += "_vs";
+                break;
+            case ShaderStage::FRAGMENT:
+                name += "_fs";
+                break;
+            default:
+                PANIC_POSTCONDITION("Unexpected stage");
+                break;
+        }
+        VulkanDriver::DebugUtils::setName(VK_OBJECT_TYPE_SHADER_MODULE,
+                reinterpret_cast<uint64_t>(module), name.c_str());
+#endif
    }

+    LayoutDescriptionList& layouts = mInfo->layouts;
+    layouts[0].bindings = utils::FixedCapacityVector<DescriptorSetLayoutBinding>::with_capacity(
+            countBits(collapseStages(uboMask)));
+    layouts[1].bindings = utils::FixedCapacityVector<DescriptorSetLayoutBinding>::with_capacity(
+            countBits(collapseStages(samplerMask)));
+    layouts[2].bindings = utils::FixedCapacityVector<DescriptorSetLayoutBinding>::with_capacity(
+            countBits(collapseStages(inputAttachmentMask)));
+
+    addDescriptors(uboMask, layouts[0].bindings);
+    addDescriptors(samplerMask, layouts[1].bindings);
+    addDescriptors(inputAttachmentMask, layouts[2].bindings);
+
+#if FVK_ENABLED_DEBUG_SAMPLER_NAME
+    auto& bindingToName = mInfo->bindingToName;
+#endif
+
    auto& groupInfo = builder.getSamplerGroupInfo();
    auto& bindingToSamplerIndex = mInfo->bindingToSamplerIndex;
+    auto& bindings = mInfo->bindings;
    auto& usage = mInfo->usage;
    for (uint8_t groupInd = 0; groupInd < Program::SAMPLER_BINDING_COUNT; groupInd++) {
        auto const& group = groupInfo[groupInd];
@@ -89,32 +243,20 @@ VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept
        for (size_t i = 0; i < samplers.size(); ++i) {
            uint32_t const binding = samplers[i].binding;
            bindingToSamplerIndex[binding] = (groupInd << 8) | (0xff & i);
-            usage = VulkanPipelineCache::getUsageFlags(binding, group.stageFlags, usage);
+            assert_invariant(bindings.find(binding) == bindings.end());
+            bindings.insert(binding);
+            usage = getUsageFlags(binding, group.stageFlags, usage);
+
+#if FVK_ENABLED_DEBUG_SAMPLER_NAME
+            bindingToName[binding] = samplers[i].name.c_str();
+#endif
        }
    }

-    #if FVK_ENABLED(FVK_DEBUG_SHADER_MODULE)
-        utils::slog.d << "Created VulkanProgram " << builder << ", shaders = (" << bundle.vertex
-                      << ", " << bundle.fragment << ")" << utils::io::endl;
-    #endif
-}
-
-VulkanProgram::VulkanProgram(VkDevice device, VkShaderModule vs, VkShaderModule fs,
-        CustomSamplerInfoList const& samplerInfo) noexcept
-    : VulkanResource(VulkanResourceType::PROGRAM),
-      mInfo(new PipelineInfo()),
-      mDevice(device) {
-    mInfo->shaders[0] = vs;
-    mInfo->shaders[1] = fs;
-    auto& bindingToSamplerIndex = mInfo->bindingToSamplerIndex;
-    auto& usage = mInfo->usage;
-    bindingToSamplerIndex.resize(samplerInfo.size());
-    for (uint16_t binding = 0; binding < samplerInfo.size(); ++binding) {
-        auto const& sampler = samplerInfo[binding];
-        bindingToSamplerIndex[binding]
-                = (sampler.groupIndex << 8) | (0xff & sampler.samplerIndex);
-        usage = VulkanPipelineCache::getUsageFlags(binding, sampler.flags, usage);
-    }
+#if FVK_ENABLED(FVK_DEBUG_SHADER_MODULE)
+    utils::slog.d << "Created VulkanProgram " << builder << ", shaders = (" << modules[0]
+                  << ", " << modules[1] << ")" << utils::io::endl;
+#endif
 }

 VulkanProgram::~VulkanProgram() {
@@ -262,22 +404,21 @@ uint8_t VulkanRenderTarget::getColorTargetCount(const VulkanRenderPass& pass) co
    return count;
 }

-VulkanVertexBuffer::VulkanVertexBuffer(VulkanContext& context, VulkanStagePool& stagePool,
-        VulkanResourceAllocator* allocator, uint8_t bufferCount, uint8_t attributeCount,
-        uint32_t elementCount, AttributeArray const& attribs)
-    : HwVertexBuffer(bufferCount, attributeCount, elementCount, attribs),
-      VulkanResource(VulkanResourceType::VERTEX_BUFFER),
-      mInfo(new PipelineInfo(attribs.size())),
-      mResources(allocator) {
-    auto attribDesc = mInfo->mSoa.data<PipelineInfo::ATTRIBUTE_DESCRIPTION>();
-    auto bufferDesc = mInfo->mSoa.data<PipelineInfo::BUFFER_DESCRIPTION>();
-    auto offsets = mInfo->mSoa.data<PipelineInfo::OFFSETS>();
-    auto attribToBufferIndex = mInfo->mSoa.data<PipelineInfo::ATTRIBUTE_TO_BUFFER_INDEX>();
-    std::fill(mInfo->mSoa.begin<PipelineInfo::ATTRIBUTE_TO_BUFFER_INDEX>(),
-            mInfo->mSoa.end<PipelineInfo::ATTRIBUTE_TO_BUFFER_INDEX>(), -1);
+VulkanVertexBufferInfo::VulkanVertexBufferInfo(
+        uint8_t bufferCount, uint8_t attributeCount, AttributeArray const& attributes)
+    : HwVertexBufferInfo(bufferCount, attributeCount),
+      VulkanResource(VulkanResourceType::VERTEX_BUFFER_INFO),
+      mInfo(attributes.size()) {

-    for (uint32_t attribIndex = 0; attribIndex < attribs.size(); attribIndex++) {
-        Attribute attrib = attribs[attribIndex];
+    auto attribDesc = mInfo.mSoa.data<PipelineInfo::ATTRIBUTE_DESCRIPTION>();
+    auto bufferDesc = mInfo.mSoa.data<PipelineInfo::BUFFER_DESCRIPTION>();
+    auto offsets = mInfo.mSoa.data<PipelineInfo::OFFSETS>();
+    auto attribToBufferIndex = mInfo.mSoa.data<PipelineInfo::ATTRIBUTE_TO_BUFFER_INDEX>();
+    std::fill(mInfo.mSoa.begin<PipelineInfo::ATTRIBUTE_TO_BUFFER_INDEX>(),
+            mInfo.mSoa.end<PipelineInfo::ATTRIBUTE_TO_BUFFER_INDEX>(), -1);
+
+    for (uint32_t attribIndex = 0; attribIndex < attributes.size(); attribIndex++) {
+        Attribute attrib = attributes[attribIndex];
        bool const isInteger = attrib.flags & Attribute::FLAG_INTEGER_TARGET;
        bool const isNormalized = attrib.flags & Attribute::FLAG_NORMALIZED;
        VkFormat vkformat = getVkFormat(attrib.type, isNormalized, isInteger);
@@ -289,7 +430,7 @@ VulkanVertexBuffer::VulkanVertexBuffer(VulkanContext& context, VulkanStagePool&
        // expects to receive floats or ints.
        if (attrib.buffer == Attribute::BUFFER_UNUSED) {
            vkformat = isInteger ? VK_FORMAT_R8G8B8A8_UINT : VK_FORMAT_R8G8B8A8_SNORM;
-            attrib = attribs[0];
+            attrib = attributes[0];
        }
        offsets[attribIndex] = attrib.offset;
        attribDesc[attribIndex] = {
@@ -305,14 +446,23 @@ VulkanVertexBuffer::VulkanVertexBuffer(VulkanContext& context, VulkanStagePool&
    }
 }

-VulkanVertexBuffer::~VulkanVertexBuffer() {
-    delete mInfo;
+VulkanVertexBuffer::VulkanVertexBuffer(VulkanContext& context, VulkanStagePool& stagePool,
+        VulkanResourceAllocator* allocator,
+        uint32_t vertexCount, Handle<HwVertexBufferInfo> vbih)
+    : HwVertexBuffer(vertexCount),
+      VulkanResource(VulkanResourceType::VERTEX_BUFFER),
+      vbih(vbih),
+      mBuffers(MAX_VERTEX_BUFFER_COUNT), // TODO: can we do better here?
+      mResources(allocator) {
 }

-void VulkanVertexBuffer::setBuffer(VulkanBufferObject* bufferObject, uint32_t index) {
-    size_t count = attributes.size();
-    auto vkbuffers = mInfo->mSoa.data<PipelineInfo::VK_BUFFER>();
-    auto attribToBuffer = mInfo->mSoa.data<PipelineInfo::ATTRIBUTE_TO_BUFFER_INDEX>();
+void VulkanVertexBuffer::setBuffer(VulkanResourceAllocator const& allocator,
+        VulkanBufferObject* bufferObject, uint32_t index) {
+    VulkanVertexBufferInfo const* const vbi =
+            const_cast<VulkanResourceAllocator&>(allocator).handle_cast<VulkanVertexBufferInfo*>(vbih);
+    size_t const count = vbi->getAttributeCount();
+    VkBuffer* const vkbuffers = getVkBuffers();
+    int8_t const* const attribToBuffer = vbi->getAttributeToBuffer();
    for (uint8_t attribIndex = 0; attribIndex < count; attribIndex++) {
        if (attribToBuffer[attribIndex] == static_cast<int8_t>(index)) {
            vkbuffers[attribIndex] = bufferObject->buffer.getGpuBuffer();
@@ -328,35 +478,6 @@ VulkanBufferObject::VulkanBufferObject(VmaAllocator allocator, VulkanStagePool&
      buffer(allocator, stagePool, getBufferObjectUsage(bindingType), byteCount),
      bindingType(bindingType) {}

-void VulkanRenderPrimitive::setPrimitiveType(PrimitiveType pt) {
-    this->type = pt;
-    switch (pt) {
-        case PrimitiveType::POINTS:
-            primitiveTopology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
-            break;
-        case PrimitiveType::LINES:
-            primitiveTopology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
-            break;
-        case PrimitiveType::LINE_STRIP:
-            primitiveTopology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
-            break;
-        case PrimitiveType::TRIANGLES:
-            primitiveTopology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
-            break;
-        case PrimitiveType::TRIANGLE_STRIP:
-            primitiveTopology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
-            break;
-    }
-}
-
-void VulkanRenderPrimitive::setBuffers(VulkanVertexBuffer* vertexBuffer,
-        VulkanIndexBuffer* indexBuffer) {
-    this->vertexBuffer = vertexBuffer;
-    this->indexBuffer = indexBuffer;
-    mResources.acquire(vertexBuffer);
-    mResources.acquire(indexBuffer);
-}
-
 VulkanTimerQuery::VulkanTimerQuery(std::tuple<uint32_t, uint32_t> indices)
    : VulkanThreadSafeResource(VulkanResourceType::TIMER_QUERY),
      mStartingQueryIndex(std::get<0>(indices)),
@@ -388,4 +509,46 @@ bool VulkanTimerQuery::isCompleted() noexcept {

 VulkanTimerQuery::~VulkanTimerQuery() = default;

+VulkanRenderPrimitive::VulkanRenderPrimitive(VulkanResourceAllocator* resourceAllocator,
+        PrimitiveType pt, Handle<HwVertexBuffer> vbh, Handle<HwIndexBuffer> ibh)
+        : VulkanResource(VulkanResourceType::RENDER_PRIMITIVE),
+          mResources(resourceAllocator) {
+    type = pt;
+    vertexBuffer = resourceAllocator->handle_cast<VulkanVertexBuffer*>(vbh);
+    indexBuffer = resourceAllocator->handle_cast<VulkanIndexBuffer*>(ibh);
+    mResources.acquire(vertexBuffer);
+    mResources.acquire(indexBuffer);
+}
+
+using Bitmask = VulkanDescriptorSetLayout::Bitmask;
+
+Bitmask Bitmask::fromBackendLayout(descset::DescriptorSetLayout const& layout) {
+    Bitmask mask;
+    for (auto const& binding: layout.bindings) {
+        switch (binding.type) {
+            case descset::DescriptorType::UNIFORM_BUFFER: {
+                if (binding.flags == descset::DescriptorFlags::DYNAMIC_OFFSET) {
+                    mask.dynamicUbo |= fromStageFlags<UniformBufferBitmask>(binding.stageFlags,
+                            binding.binding);
+                } else {
+                    mask.ubo |= fromStageFlags<UniformBufferBitmask>(binding.stageFlags,
+                            binding.binding);
+                }
+                break;
+            }
+            case descset::DescriptorType::SAMPLER: {
+                mask.sampler |= fromStageFlags<SamplerBitmask>(binding.stageFlags, binding.binding);
+                break;
+            }
+            case descset::DescriptorType::INPUT_ATTACHMENT: {
+                mask.inputAttachment |=
+                        fromStageFlags<InputAttachmentBitmask>(binding.stageFlags, binding.binding);
+                break;
+            }
+        }
+    }
+    return mask;
+}
+
+
 } // namespace filament::backend
--- a/Show More
+++ b/Show More