Fixup shader code

Reduce the number of prepared mipmap pipelines- this should be configurable really if we want to preserve this logic in the long run.
Add check for if the format (Or non srgb format) is storage compatible
2025-06-10 18:04:41 -04:00 · 2025-06-10 16:54:28 -04:00 · 2025-06-10 16:54:03 -04:00 · 2025-06-10 16:18:17 -04:00 · 2025-06-10 15:58:02 -04:00 · 2025-06-10 15:57:48 -04:00
247 changed files with 9814 additions and 3710 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -57,7 +57,7 @@ SpaceBeforeInheritanceColon: true
 SpaceBeforeParens: ControlStatements
 SpaceBeforeRangeBasedForLoopColon: false
 SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 0
+SpacesBeforeTrailingComments: 1
 SpacesInAngles: false
 SpacesInCStyleCastParentheses: false
 SpacesInContainerLiterals: false
--- a/.github/actions/get-commit-msg/action.yml
+++ b/.github/actions/get-commit-msg/action.yml
@@ -0,0 +1,43 @@
+name: 'Get commit message'
+outputs:
+  msg:
+    value: ${{ steps.action_output.outputs.msg }}
+runs:
+  using: "composite"
+  steps:
+    - name: Find commit message (on push)
+      if: github.event_name == 'push'
+      shell: bash
+      run: |
+        AUTHOR_NAME="${{ github.event.head_commit.author.name }}"
+        AUTHOR_EMAIL="${{ github.event.head_commit.author.email }}"
+        TSTAMP="${{ github.event.head_commit.timestamp }}"
+        echo "commit ${{ github.event.head_commit.id }}" >> /tmp/commit_msg.txt
+        echo "Author: ${AUTHOR_NAME}<${AUTHOR_EMAIL}>" >> /tmp/commit_msg.txt
+        echo "Date: ${TSTAMP}" >> /tmp/commit_msg.txt
+        echo "" >> /tmp/commit_msg.txt
+        echo "${{ github.event.head_commit.message }}" >> /tmp/commit_msg.txt
+    - name: Checkout code
+      shell: bash
+      id: checkout_code
+      if: github.event_name == 'pull_request'
+      run: |
+        echo "hash=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
+        git checkout ${{ github.event.pull_request.head.ref }}
+        COMMIT_MESSAGE=$(git log -1)
+        echo "$(git log -1)" >> /tmp/commit_msg.txt
+    - shell: bash
+      id: action_output
+      run: |
+        DELIMITER="EOF_FILE_CONTENT_$(date +%s)" # Using timestamp to make it more unique
+        echo "msg<<$DELIMITER" >> "$GITHUB_OUTPUT"
+        cat /tmp/commit_msg.txt >> "$GITHUB_OUTPUT"
+        echo "$DELIMITER" >> "$GITHUB_OUTPUT"
+        echo "----- got commit message ---"
+        cat /tmp/commit_msg.txt
+        echo "----------------------------"
+    - name: Uncheckout code
+      shell: bash
+      if: github.event_name == 'pull_request'
+      run: |
+        git checkout ${{ steps.checkout_code.outputs.hash }}
--- a/.github/workflows/postsubmit.yml
+++ b/.github/workflows/postsubmit.yml
@@ -0,0 +1,35 @@
+name: 'Post-submit tasks'
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  update-renderdiff-goldens:
+    name: update-renderdiff-goldens
+    runs-on: 'ubuntu-24.04-4core'
+    steps:
+      - uses: actions/checkout@v4.1.6
+        with:
+          fetch-depth: 0
+      - uses: ./.github/actions/linux-prereq
+      - id: get_commit_msg
+        uses: ./.github/actions/get-commit-msg
+      - name: Prerequisites
+        run: pip install tifffile numpy
+      - name: Run update script
+        env:
+          GH_TOKEN: ${{ secrets.FILAMENTBOT_TOKEN }}
+        run: |
+          GOLDEN_BRANCH=$(echo "${{ steps.get_commit_msg.outputs.msg }}" | python3 test/renderdiff/src/commit_msg.py)
+          COMMIT_HASH=$(echo "${{ steps.get_commit_msg.outputs.msg }}" | head -n 1 | tr -d 'commit ')
+          if [[ "${GOLDEN_BRANCH}" != "main" ]]; then
+              git config --global user.email "filament.bot@gmail.com"
+              git config --global user.name "Filament Bot"
+              git config --global credential.helper cache
+              echo "branch==${GOLDEN_BRANCH}"
+              echo "hash==${COMMIT_HASH}"
+              python3 test/renderdiff/src/update_golden.py --branch=${GOLDEN_BRANCH} \
+                  --merge-to-main --filament-tag=${COMMIT_HASH} --golden-repo-token=${GH_TOKEN}
+          fi
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -3,10 +3,10 @@ name: Presubmit
 on:
  push:
    branches:
-    - main
+      - main
  pull_request:
    branches:
-    - main
+      - main

 jobs:
  build-desktop-mac:
@@ -41,8 +41,7 @@ jobs:

  build-windows:
    name: build-windows
-    runs-on: win-2019-16core
-
+    runs-on: windows-2022-32core
    steps:
      - uses: actions/checkout@v4.1.6
        with:
@@ -110,31 +109,35 @@ jobs:
          fetch-depth: 0
      - name: Check for manual edits to /docs
        run: |
-           echo "${{ github.event.pull_request.head.sha }} -- ${{ github.event.pull_request.head.sha }}"
-           # disable for now
-           # bash docs_src/build/presubmit_check.sh ${{ github.event.pull_request.head.sha }}
+          echo "${{ github.event.pull_request.head.sha }} -- ${{ github.event.pull_request.head.sha }}"
+          # disable for now
+          # bash docs_src/build/presubmit_check.sh ${{ github.event.pull_request.head.sha }}

  test-renderdiff:
    name: test-renderdiff
    runs-on: macos-14-xlarge
    steps:
+      - uses: actions/checkout@v4.1.6
+        with:
+          fetch-depth: 0
+      - id: get_commit_msg
+        uses: ./.github/actions/get-commit-msg
      - uses: actions/checkout@v4.1.6
        with:
          fetch-depth: 0
      - uses: ./.github/actions/mac-prereq
      - name: Cache Mesa and deps
-        id: mesa-cache
        uses: actions/cache@v4
        with:
          path: mesa
          key: ${{ runner.os }}-mesa-deps-2-${{ vars.MESA_VERSION }}
      - name: Prerequisites
-        id: prereqs
        run: |
          bash build/common/get-mesa.sh
          pip install tifffile numpy
      - name: Run Test
-        run: bash test/renderdiff/test.sh
+        run: |
+          echo "${{ steps.get_commit_msg.outputs.msg }}" | bash test/renderdiff/test.sh
      - uses: actions/upload-artifact@v4
        with:
          name: presubmit-renderdiff-result
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -226,7 +226,7 @@ jobs:

  build-windows:
    name: build-windows
-    runs-on: windows-2019-32core
+    runs-on: windows-2022-32core
    if: github.event_name == 'release' || github.event.inputs.platform == 'windows'

    steps:
--- a/.github/workflows/windows-continuous.yml
+++ b/.github/workflows/windows-continuous.yml
@@ -10,7 +10,7 @@ on:
 jobs:
  build-windows:
    name: build-windows
-    runs-on: windows-2019-32core
+    runs-on: windows-2022-32core

    steps:
      - uses: actions/checkout@v4.1.6
--- a/BUILDING.md
+++ b/BUILDING.md
@@ -97,6 +97,10 @@ Make sure you've installed the following dependencies:
 - `libxcomposite-dev` (`libXcomposite-devel` on Fedora)
 - `libxxf86vm-dev` (`libXxf86vm-devel` on Fedora)

+```shell
+sudo apt install clang-14 libglu1-mesa-dev libc++-14-dev libc++abi-14-dev ninja-build libxi-dev libxcomposite-dev libxxf86vm-dev -y
+```
+
 After dependencies have been installed, we highly recommend using the [easy build](#easy-build)
 script.

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,6 +49,10 @@ option(FILAMENT_SUPPORTS_OSMESA "Enable OSMesa (headless GL context) for Filamen

 option(FILAMENT_ENABLE_FGVIEWER "Enable the frame graph viewer" OFF)

+# This is to disable GTAO for the short-term while we investigate a way to better manage size increases.
+# On the regular filament build (where size is of less concern), we enable GTAO by default.
+option(FILAMENT_DISABLE_GTAO "Disable GTAO" OFF)
+
 set(FILAMENT_NDK_VERSION "" CACHE STRING
    "Android NDK version or version prefix to be used when building for Android."
 )
--- a/NEW_RELEASE_NOTES.md
+++ b/NEW_RELEASE_NOTES.md
@@ -1,9 +1,13 @@
 # Filament Release Notes log

 **If you are merging a PR into main**: please add the release note below, under the *Release notes
-for next branch cut* header.
+We are chaning the way Vulkan buffers are handled. We need to switch over to a managed (or view-based) model where the data stored inside the object is a proxy to a Vulkan object that can dynamically be swapped around.

 **If you are cherry-picking a commit into an rc/ branch**: add the release note under the
 appropriate header in [RELEASE_NOTES.md](./RELEASE_NOTES.md).

 ## Release notes for next branch cut
+
+- samples: samples now have a CLI to select backend api
+- materials: sampler now export their type in the material binary [⚠️ **New Material Version**]
+- materials: new mutable specialization constants feature. See the [materials documentation](https://google.github.io/filament/Materials.html) for details. [⚠️ **New Material Version**]
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ repositories {
 }

 dependencies {
-    implementation 'com.google.android.filament:filament-android:1.60.1'
+    implementation 'com.google.android.filament:filament-android:1.61.0'
 }
 ```

@@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`:
 iOS projects can use CocoaPods to install the latest release:

 ```shell
-pod 'Filament', '~> 1.60.1'
+pod 'Filament', '~> 1.61.0'
 ```

 ## Documentation
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -7,6 +7,9 @@ A new header is inserted each time a *tag* is created.
 Instead, if you are authoring a PR for the main branch, add your release note to
 [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md).

+## v1.61.1
+
+
 ## v1.61.0

 - materials: sampler now export their type in the material binary [⚠️ **New Material Version**]
--- a/android/filament-android/src/main/cpp/MaterialInstance.cpp
+++ b/android/filament-android/src/main/cpp/MaterialInstance.cpp
@@ -60,6 +60,14 @@ static void setParameter(JNIEnv* env, jlong nativeMaterialInstance, jstring name
    env->ReleaseStringUTFChars(name_, name);
 }

+template<typename T>
+static void setConstant(JNIEnv* env, jlong nativeMaterialInstance, jstring name_, T v) {
+    MaterialInstance* instance = (MaterialInstance*) nativeMaterialInstance;
+    const char *name = env->GetStringUTFChars(name_, 0);
+    instance->setConstant(name, v);
+    env->ReleaseStringUTFChars(name_, name);
+}
+
 extern "C"
 JNIEXPORT void JNICALL
 Java_com_google_android_filament_MaterialInstance_nSetParameterBool(JNIEnv *env, jclass,
@@ -264,6 +272,13 @@ Java_com_google_android_filament_MaterialInstance_nSetParameterTexture(
    env->ReleaseStringUTFChars(name_, name);
 }

+extern "C"
+JNIEXPORT void JNICALL
+Java_com_google_android_filament_MaterialInstance_nSetConstantBool(JNIEnv *env, jclass,
+        jlong nativeMaterialInstance, jstring name_, jboolean x) {
+    setConstant(env, nativeMaterialInstance, name_, bool(x));
+}
+
 extern "C"
 JNIEXPORT void JNICALL
 Java_com_google_android_filament_MaterialInstance_nSetScissor(
--- a/android/filament-android/src/main/java/com/google/android/filament/MaterialInstance.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/MaterialInstance.java
@@ -402,6 +402,16 @@ public class MaterialInstance {
        nSetParameterFloat4(getNativeObject(), name, color[0], color[1], color[2], color[3]);
    }

+    /**
+     * Sets the value of a bool constant.
+     *
+     * @param name the name of the material constant
+     * @param x    the value of the material constant
+     */
+    public void setConstant(@NonNull String name, boolean x) {
+        nSetConstantBool(getNativeObject(), name, x);
+    }
+
    /**
     * Set-up a custom scissor rectangle; by default it is disabled.
     *
@@ -921,6 +931,9 @@ public class MaterialInstance {
            @NonNull String name, int element, @NonNull @Size(min = 1) float[] v,
            @IntRange(from = 0) int offset, @IntRange(from = 1) int count);

+    private static native void nSetConstantBool(long nativeMaterialInstance,
+            @NonNull String name, boolean x);
+
    private static native void nSetParameterTexture(long nativeMaterialInstance,
            @NonNull String name, long nativeTexture, long sampler);

--- a/android/gradle.properties
+++ b/android/gradle.properties
@@ -1,5 +1,5 @@
 GROUP=com.google.android.filament
-VERSION_NAME=1.60.1
+VERSION_NAME=1.61.0

 POM_DESCRIPTION=Real-time physically based rendering engine for Android.

--- a/build/common/get-mesa.sh
+++ b/build/common/get-mesa.sh
@@ -41,6 +41,8 @@ for cmd in "${NEEDED_PYTHON_DEPS[@]}"; do
 done
 deactivate

+LOCAL_PKG_CONFIG_PATH=
+
 # Install system deps
 if [[ "$OS_NAME" == "Linux" ]]; then
    if [[ "$GITHUB_WORKFLOW" ]]; then
@@ -82,6 +84,9 @@ elif [[ "$OS_NAME" == "Darwin" ]]; then
        fi
    fi
    HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=true brew install autoconf automake libx11 libxext libxrandr llvm@${LLVM_VERSION} ninja meson pkg-config libxshmfence
+
+    # For reasons unknown, this is necessary for pkg-config to find homebrew's packages
+    LOCAL_PKG_CONFIG_PATH="/opt/homebrew/lib/pkgconfig:$PKG_CONFIG_PATH"
 fi # [[ "$OS_NAME" == x ]]

 LOCAL_LDFLAGS=${LDFLAGS}
@@ -134,9 +139,11 @@ fi
 # -Dgallium-drivers=swrast  => builds GL software rasterizer
 # -Dvulkan-drivers=swrast   => builds VK software rasterizer
 # -Dgallium-drivers=llvmpipe is needed for GL >= 4.1 pipe-screen (see src/gallium/auxiliary/target-helpers/inline_sw_helper.h)
-CXX=${LOCAL_CXX} CC=${LOCAL_CC} PATH=${LOCAL_PATH} LDFLAGS=${LOCAL_LDFLAGS} CPPFLAGS=${LOCAL_CPPFLAGS} \
+PKG_CONFIG_PATH=${LOCAL_PKG_CONFIG_PATH} PATH=${LOCAL_PATH} \
+CXX=${LOCAL_CXX} CC=${LOCAL_CC} LDFLAGS=${LOCAL_LDFLAGS} CPPFLAGS=${LOCAL_CPPFLAGS} \
   meson setup --wipe builddir/ -Dprefix="${MESA_DIR}/out" -Dglx=xlib -Dosmesa=true -Dgallium-drivers=llvmpipe,swrast -Dvulkan-drivers=swrast
-CXX=${LOCAL_CXX} CC=${LOCAL_CC} PATH=${LOCAL_PATH} LDFLAGS=${LOCAL_LDFLAGS} CPPFLAGS=${LOCAL_CPPFLAGS} \
+PKG_CONFIG_PATH=${LOCAL_PKG_CONFIG_PATH} PATH=${LOCAL_PATH} \
+CXX=${LOCAL_CXX} CC=${LOCAL_CC} LDFLAGS=${LOCAL_LDFLAGS} CPPFLAGS=${LOCAL_CPPFLAGS} \
   meson install -C builddir/

 # Disable python venv
--- a/build/windows/build-github.bat
+++ b/build/windows/build-github.bat
@@ -47,7 +47,11 @@ if "%RUNNING_LOCALLY%" == "1" (
    set "PATH=%PATH%;C:\Program Files\7-Zip"
 )

-call "C:\Program Files (x86)\Microsoft Visual Studio\2019\%VISUAL_STUDIO_VERSION%\VC\Auxiliary\Build\vcvars64.bat"
+:: Outdated windows-2019 pattern
+:: call "C:\Program Files (x86)\Microsoft Visual Studio\2019\%VISUAL_STUDIO_VERSION%\VC\Auxiliary\Build\vcvars64.bat"
+
+call "C:\Program Files\Microsoft Visual Studio\2022\%VISUAL_STUDIO_VERSION%\VC\Auxiliary\Build\vcvars64.bat"
+echo Passed vcvars64.bat
 if errorlevel 1 exit /b %errorlevel%

 msbuild /version
@@ -107,7 +111,7 @@ cd out\cmake-%variant%
 if errorlevel 1 exit /b %errorlevel%

 cmake ..\.. ^
-    -G "Visual Studio 16 2019" ^
+    -G "Visual Studio 17 2022" ^
    -A x64 ^
    %flag% ^
    -DCMAKE_INSTALL_PREFIX=..\%variant% ^
--- a/docs_src/markdeep/Materials.md.html
+++ b/docs_src/markdeep/Materials.md.html
@@ -211,7 +211,7 @@ when using textures.
 This property can dramatically change the appearance of a surface. Non-metallic surfaces have
 chromatic diffuse reflection and achromatic specular reflection (reflected light does not change
 color). Metallic surfaces do not have any diffuse reflection and chromatic specular reflection
-(reflected light takes on the color of the surfaced as defined by `baseColor`). 
+(reflected light takes on the color of the surfaced as defined by `baseColor`).

 The effect of `metallic` is shown in figure [metallicProperty] (click on the image to see a
 larger version).
@@ -247,7 +247,7 @@ The effect of `roughness` on metallic surfaces is shown in figure [roughnessCond
 When refraction through an object is enabled (using a `refractonType` of `thin` or `solid`), the
 `roughness` property will also affect the refractions, as shown in figure
 [roughnessRefractionProperty] (click on the image to see a larger version).
-                             
+
 ![Figure [roughnessRefractionProperty]: Refractive sphere with `roughness` varying from 0.0
 (left) to 1.0 (right)](images/materials/refraction_roughness.png)

@@ -306,7 +306,7 @@ The sheen color controls the color appearance and strength of an optional sheen
 base layer described by the properties above. The sheen layer always sits below the clear coat layer
 if such a layer is present.

-The sheen layer can be used to represent cloth and fabric materials. Please refer to 
+The sheen layer can be used to represent cloth and fabric materials. Please refer to
 section [Cloth model] for more information about cloth and fabric materials.

 The effect of `sheenColor` is shown in figure [materialSheenColor]
@@ -519,14 +519,14 @@ light to bend further away from the initial path.

 Table [commonMatIOR] describes acceptable refractive indices for various types of materials.

-Material                   |        IOR       
+Material                   |        IOR
 --------------------------:|:-----------------
 Air                        | 1.0
-Water                      | 1.33             
-Common liquids             | 1.33 to 1.5      
-Common gemstones           | 1.58 to 2.33     
-Plastics, glass            | 1.5 to 1.58      
-Other dielectric materials | 1.33 to 1.58     
+Water                      | 1.33
+Common liquids             | 1.33 to 1.5
+Common gemstones           | 1.58 to 2.33
+Plastics, glass            | 1.5 to 1.58
+Other dielectric materials | 1.33 to 1.58
 [Table [commonMatIOR]: Index of refraction of common materials]

 The appearance of a refractive material will greatly depend on the `refractionType` and
@@ -1091,30 +1091,37 @@ Value
    `bool` or `number`, depending on the `type` of the constant. The type must be one of the types
    described in table [materialConstantsTypes].

-         Type          |            Description                   |      Default
-:----------------------|:-----------------------------------------|:------------------
-int                    | A signed, 32 bit GLSL int                |         0
-float                  | A single-precision GLSL float            |         0.0
-bool                   | A GLSL bool                              |         false
+    Constants may also be specified as mutable by setting the `mutable` property to `true`. Only
+    `bool` spec constants may be specified as mutable.
+
+| Type  | Description                   | Default | May be mutable? |
+|:------|:------------------------------|:--------|-----------------|
+| int   | A signed, 32 bit GLSL int     |         | no              |
+| float | A single-precision GLSL float | 0.0     | no              |
+| bool  | A GLSL bool                   | false   | yes             |
 [Table [materialConstantsTypes]: Material constants types]

 Description
 :   Lists the constant parameters accepted by your material. These constants can be set, or
    "specialized", at runtime when loading a material package. Multiple materials can be loaded from
-    the same material package with differing constant parameter specializations. Once a material is
-    loaded from a material package, its constant parameters cannot be changed. Compared to regular
-    parameters, constant parameters allow the compiler to generate more efficient code. Access
-    constant parameters from the shader by prefixing the name with `materialConstant_`. For example,
-    a constant parameter named `myConstant` is accessed in the shader as
-    `materialConstant_myConstant`. If a constant parameter is not set at runtime, the default is
-    used.
+    the same material package with differing constant parameter specializations. If a constant
+    parameter is specialized as mutable, it may be changed at any time on a per-instance basis via
+    `MaterialInstance::setConstant(name, value)`. Otherwise, once a material is loaded from a
+    material package, its constant parameters cannot be changed.
+
+    Compared to regular parameters, constant parameters allow the compiler to generate more
+    efficient code. Access constant parameters from the shader by prefixing the name with
+    `materialConstant_`. For example, a constant parameter named `myConstant` is accessed in the
+    shader as `materialConstant_myConstant`. If a constant parameter is not set at runtime, the
+    default is used.

 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ JSON
 material {
    constants : [
        {
           name : overrideAlpha,
-           type : bool
+           type : bool,
+           mutable : true,
        },
        {
           name : customAlpha,
@@ -2553,7 +2560,7 @@ standard skybox material. It produces a list of 2 parameters, named `showSun` an
 respectively a boolean and a cubemap texture.

 ```text
-$ matc --reflect parameters filament/src/materials/skybox.mat 
+$ matc --reflect parameters filament/src/materials/skybox.mat
 {
  "parameters": [
    {
@@ -2569,7 +2576,7 @@ $ matc --reflect parameters filament/src/materials/skybox.mat
    }
  ]
 }
-``` 
+```

 ### --variant-filter

--- a/filament/CMakeLists.txt
+++ b/filament/CMakeLists.txt
@@ -262,11 +262,16 @@ set(MATERIAL_SRCS
        src/materials/ssao/mipmapDepth.mat
        src/materials/ssao/sao.mat
        src/materials/ssao/saoBentNormals.mat
-        src/materials/ssao/gtao.mat
-        src/materials/ssao/gtaoBentNormals.mat
        src/materials/vsmMipmap.mat
 )

+if (NOT FILAMENT_DISABLE_GTAO)
+    list(APPEND MATERIAL_SRCS
+        src/materials/ssao/gtao.mat
+        src/materials/ssao/gtaoBentNormals.mat
+    )
+endif()
+
 set(MATERIAL_FL0_SRCS
        src/materials/defaultMaterial.mat
        src/materials/skybox.mat
@@ -316,6 +321,10 @@ if (FILAMENT_FORCE_PROFILING_MODE)
    add_definitions(-DFILAMENT_FORCE_PROFILING_MODE)
 endif()

+if (FILAMENT_DISABLE_GTAO)
+    add_definitions(-DFILAMENT_DISABLE_GTAO)
+endif()
+
 # ==================================================================================================
 # Definitions
 # ==================================================================================================
@@ -463,27 +472,29 @@ add_custom_command(
        APPEND
 )

-add_custom_command(
-        OUTPUT "${MATERIAL_DIR}/gtao.filamat"
-        DEPENDS src/materials/ssao/ssaoUtils.fs
-        DEPENDS src/materials/ssao/ssct.fs
-        DEPENDS src/materials/utils/depthUtils.fs
-        DEPENDS src/materials/utils/geometry.fs
-        DEPENDS src/materials/ssao/gtaoImpl.fs
-        DEPENDS src/materials/ssao/ssctImpl.fs
-        APPEND
-)
+if (NOT FILAMENT_DISABLE_GTAO)
+    add_custom_command(
+            OUTPUT "${MATERIAL_DIR}/gtao.filamat"
+            DEPENDS src/materials/ssao/ssaoUtils.fs
+            DEPENDS src/materials/ssao/ssct.fs
+            DEPENDS src/materials/utils/depthUtils.fs
+            DEPENDS src/materials/utils/geometry.fs
+            DEPENDS src/materials/ssao/gtaoImpl.fs
+            DEPENDS src/materials/ssao/ssctImpl.fs
+            APPEND
+    )

-add_custom_command(
-        OUTPUT "${MATERIAL_DIR}/gtaoBentNormals.filamat"
-        DEPENDS src/materials/ssao/ssaoUtils.fs
-        DEPENDS src/materials/ssao/ssct.fs
-        DEPENDS src/materials/utils/depthUtils.fs
-        DEPENDS src/materials/utils/geometry.fs
-        DEPENDS src/materials/ssao/gtaoImpl.fs
-        DEPENDS src/materials/ssao/ssctImpl.fs
-        APPEND
-)
+    add_custom_command(
+            OUTPUT "${MATERIAL_DIR}/gtaoBentNormals.filamat"
+            DEPENDS src/materials/ssao/ssaoUtils.fs
+            DEPENDS src/materials/ssao/ssct.fs
+            DEPENDS src/materials/utils/depthUtils.fs
+            DEPENDS src/materials/utils/geometry.fs
+            DEPENDS src/materials/ssao/gtaoImpl.fs
+            DEPENDS src/materials/ssao/ssctImpl.fs
+            APPEND
+    )
+endif()

 add_custom_command(
        OUTPUT "${MATERIAL_DIR}/bilateralBlur.filamat"
--- a/filament/backend/CMakeLists.txt
+++ b/filament/backend/CMakeLists.txt
@@ -180,8 +180,11 @@ if (FILAMENT_SUPPORTS_VULKAN)
            src/vulkan/VulkanAsyncHandles.h
            src/vulkan/VulkanBlitter.cpp
            src/vulkan/VulkanBlitter.h
-            src/vulkan/VulkanBuffer.cpp
            src/vulkan/VulkanBuffer.h
+            src/vulkan/VulkanBufferCache.h
+            src/vulkan/VulkanBufferCache.cpp
+            src/vulkan/VulkanBufferProxy.h
+            src/vulkan/VulkanBufferProxy.cpp
            src/vulkan/VulkanCommands.cpp
            src/vulkan/VulkanCommands.h
            src/vulkan/VulkanConstants.h
@@ -254,16 +257,36 @@ if (FILAMENT_SUPPORTS_WEBGPU)
    list(APPEND SRCS
            include/backend/platforms/WebGPUPlatform.h
            src/webgpu/platform/WebGPUPlatform.cpp
+            src/webgpu/WebGPUBufferBase.cpp
+            src/webgpu/WebGPUBufferBase.h
+            src/webgpu/WebGPUBufferObject.cpp
+            src/webgpu/WebGPUBufferObject.h
            src/webgpu/WebGPUConstants.h
+            src/webgpu/WebGPUDescriptorSet.cpp
+            src/webgpu/WebGPUDescriptorSet.h
+            src/webgpu/WebGPUDescriptorSetLayout.cpp
+            src/webgpu/WebGPUDescriptorSetLayout.h
            src/webgpu/WebGPUDriver.cpp
            src/webgpu/WebGPUDriver.h
-            src/webgpu/WebGPUHandles.cpp
-            src/webgpu/WebGPUHandles.h
+            src/webgpu/WebGPUIndexBuffer.cpp
+            src/webgpu/WebGPUIndexBuffer.h
            src/webgpu/WebGPUPipelineCreation.cpp
            src/webgpu/WebGPUPipelineCreation.h
+            src/webgpu/WebGPUProgram.cpp
+            src/webgpu/WebGPUProgram.h
+            src/webgpu/WebGPURenderPrimitive.h
+            src/webgpu/WebGPURenderTarget.cpp
+            src/webgpu/WebGPURenderTarget.h
+            src/webgpu/WebGPUStrings.h
            src/webgpu/WebGPUSwapChain.cpp
            src/webgpu/WebGPUSwapChain.h
-            src/webgpu/WGPUProgram.cpp
+            src/webgpu/WebGPUTexture.cpp
+            src/webgpu/WebGPUTexture.h
+            src/webgpu/WebGPUVertexBuffer.cpp
+            src/webgpu/WebGPUVertexBuffer.h
+            src/webgpu/WebGPUVertexBufferInfo.cpp
+            src/webgpu/WebGPUVertexBufferInfo.h
+            src/webgpu/SpdMipmapGenerator.cpp
    )
    if (WIN32)
        list(APPEND SRCS src/webgpu/platform/WebGPUPlatformWindows.cpp)
--- a/filament/backend/include/backend/Program.h
+++ b/filament/backend/include/backend/Program.h
@@ -20,6 +20,7 @@
 #include <utils/CString.h>
 #include <utils/FixedCapacityVector.h>
 #include <utils/Invocable.h>
+#include <utils/bitset.h>

 #include <backend/DriverEnums.h>

@@ -66,6 +67,7 @@ public:
    using DescriptorBindingsInfo = utils::FixedCapacityVector<Descriptor>;
    using DescriptorSetInfo = std::array<DescriptorBindingsInfo, MAX_DESCRIPTOR_SET_COUNT>;
    using SpecializationConstantsInfo = utils::FixedCapacityVector<SpecializationConstant>;
+    using MutableSpecConstantsInfo = utils::bitset8;
    using ShaderBlob = utils::FixedCapacityVector<uint8_t>;
    using ShaderSource = std::array<ShaderBlob, SHADER_TYPE_COUNT>;

@@ -102,7 +104,8 @@ public:
    Program& descriptorBindings(backend::descriptor_set_t set,
            DescriptorBindingsInfo descriptorBindings) noexcept;

-    Program& specializationConstants(SpecializationConstantsInfo specConstants) noexcept;
+    Program& specializationConstants(SpecializationConstantsInfo specConstants,
+            uint32_t firstMutableId, MutableSpecConstantsInfo mutableSpecConstants) noexcept;

    struct PushConstant {
        utils::CString name;
--- a/filament/backend/include/backend/platforms/OpenGLPlatform.h
+++ b/filament/backend/include/backend/platforms/OpenGLPlatform.h
@@ -199,7 +199,7 @@ public:
     */
    virtual bool makeCurrent(ContextType type,
            SwapChain* UTILS_NONNULL drawSwapChain,
-            SwapChain* UTILS_NONNULL readSwapChain) noexcept = 0;
+            SwapChain* UTILS_NONNULL readSwapChain) = 0;

    /**
     * Called by the driver to make the OpenGL context active on the calling thread and bind
@@ -219,7 +219,7 @@ public:
            SwapChain* UTILS_NONNULL drawSwapChain,
            SwapChain* UTILS_NONNULL readSwapChain,
            utils::Invocable<void()> preContextChange,
-            utils::Invocable<void(size_t index)> postContextChange) noexcept;
+            utils::Invocable<void(size_t index)> postContextChange);

    /**
     * Called by the backend just before calling commit()
--- a/filament/backend/include/backend/platforms/PlatformCocoaGL.h
+++ b/filament/backend/include/backend/platforms/PlatformCocoaGL.h
@@ -58,7 +58,7 @@ protected:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) override;
    void commit(SwapChain* swapChain) noexcept override;
    ExternalTexture* createExternalImageTexture() noexcept override;
    void destroyExternalImageTexture(ExternalTexture* texture) noexcept override;
--- a/filament/backend/include/backend/platforms/PlatformCocoaTouchGL.h
+++ b/filament/backend/include/backend/platforms/PlatformCocoaTouchGL.h
@@ -55,7 +55,7 @@ public:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) override;
    void commit(SwapChain* swapChain) noexcept override;

    ExternalTexture* createExternalImageTexture() noexcept override;
--- a/filament/backend/include/backend/platforms/PlatformEGL.h
+++ b/filament/backend/include/backend/platforms/PlatformEGL.h
@@ -109,11 +109,11 @@ protected:

    bool makeCurrent(ContextType type,
            SwapChain* drawSwapChain,
-            SwapChain* readSwapChain) noexcept override;
+            SwapChain* readSwapChain) override;

    void makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain,
            utils::Invocable<void()> preContextChange,
-            utils::Invocable<void(size_t index)> postContextChange) noexcept override;
+            utils::Invocable<void(size_t index)> postContextChange) override;

    void commit(SwapChain* swapChain) noexcept override;

@@ -148,12 +148,12 @@ protected:
    EGLContext getContextForType(ContextType type) const noexcept;

    // makes the draw and read surface current without changing the current context
-    EGLBoolean makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) noexcept {
+    EGLBoolean makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) {
        return egl.makeCurrent(drawSurface, readSurface);
    }

    // makes context current and set draw and read surfaces to EGL_NO_SURFACE
-    EGLBoolean makeCurrent(EGLContext context) noexcept {
+    EGLBoolean makeCurrent(EGLContext context) {
        return egl.makeCurrent(context, mEGLDummySurface, mEGLDummySurface);
    }

@@ -211,9 +211,9 @@ private:
    public:
        explicit EGL(EGLDisplay& dpy) : mEGLDisplay(dpy) {}
        EGLBoolean makeCurrent(EGLContext context,
-                EGLSurface drawSurface, EGLSurface readSurface) noexcept;
+                EGLSurface drawSurface, EGLSurface readSurface);

-        EGLBoolean makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) noexcept {
+        EGLBoolean makeCurrent(EGLSurface drawSurface, EGLSurface readSurface) {
            return makeCurrent(mCurrentContext, drawSurface, readSurface);
        }
    } egl{ mEGLDisplay };
--- a/filament/backend/include/backend/platforms/PlatformEGLAndroid.h
+++ b/filament/backend/include/backend/platforms/PlatformEGLAndroid.h
@@ -127,7 +127,7 @@ protected:
 protected:
    bool makeCurrent(ContextType type,
            SwapChain* drawSwapChain,
-            SwapChain* readSwapChain) noexcept override;
+            SwapChain* readSwapChain) override;

 private:
    struct InitializeJvmForPerformanceManagerIfNeeded {
--- a/filament/backend/include/backend/platforms/PlatformGLX.h
+++ b/filament/backend/include/backend/platforms/PlatformGLX.h
@@ -51,7 +51,7 @@ protected:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) override;
    void commit(SwapChain* swapChain) noexcept override;

 private:
--- a/filament/backend/include/backend/platforms/PlatformOSMesa.h
+++ b/filament/backend/include/backend/platforms/PlatformOSMesa.h
@@ -56,7 +56,7 @@ protected:
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
    bool makeCurrent(ContextType type, SwapChain* drawSwapChain,
-            SwapChain* readSwapChain) noexcept override;
+            SwapChain* readSwapChain) override;
    void commit(SwapChain* swapChain) noexcept override;

 private:
--- a/filament/backend/include/backend/platforms/PlatformWGL.h
+++ b/filament/backend/include/backend/platforms/PlatformWGL.h
@@ -53,7 +53,7 @@ protected:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) override;
    void commit(SwapChain* swapChain) noexcept override;

 protected:
--- a/filament/backend/include/backend/platforms/PlatformWebGL.h
+++ b/filament/backend/include/backend/platforms/PlatformWebGL.h
@@ -46,7 +46,7 @@ protected:
    SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
    SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
    void destroySwapChain(SwapChain* swapChain) noexcept override;
-    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept override;
+    bool makeCurrent(ContextType type, SwapChain* drawSwapChain, SwapChain* readSwapChain) override;
    void commit(SwapChain* swapChain) noexcept override;
 };

--- a/filament/backend/include/backend/platforms/WebGPUPlatform.h
+++ b/filament/backend/include/backend/platforms/WebGPUPlatform.h
@@ -22,6 +22,7 @@
 #include <webgpu/webgpu_cpp.h>

 #include <cstdint>
+#include <vector>

 namespace filament::backend {

@@ -56,6 +57,10 @@ protected:
            const Platform::DriverConfig& driverConfig) noexcept override;

 private:
+    // returns adapter request option variations applicable for the particular
+    // platform
+    [[nodiscard]] static std::vector<wgpu::RequestAdapterOptions> getAdapterOptions();
+
    // we may consider having the driver own this in the future
    wgpu::Instance mInstance;
 };
--- a/filament/backend/include/private/backend/CircularBuffer.h
+++ b/filament/backend/include/private/backend/CircularBuffer.h
@@ -72,7 +72,7 @@ public:
    Range getBuffer() noexcept;

 private:
-    void* alloc(size_t size) noexcept;
+    void* alloc(size_t size);
    void dealloc() noexcept;

    // pointer to the beginning of the circular buffer (constant)
--- a/filament/backend/include/private/backend/CommandBufferQueue.h
+++ b/filament/backend/include/private/backend/CommandBufferQueue.h
@@ -76,7 +76,7 @@ public:

    // all commands buffers (Slices) written to this point are returned by waitForCommand(). This
    // call blocks until the CircularBuffer has at least mRequiredSize bytes available.
-    void flush() noexcept;
+    void flush();

    // returns from waitForCommands() immediately.
    void requestExit();
--- a/filament/backend/include/private/backend/VirtualMachineEnv.h
+++ b/filament/backend/include/private/backend/VirtualMachineEnv.h
@@ -27,13 +27,13 @@ namespace filament {
 class VirtualMachineEnv {
 public:
    // must be called before VirtualMachineEnv::get() from a thread that is attached to the JavaVM
-    static jint JNI_OnLoad(JavaVM* vm) noexcept;
+    static jint JNI_OnLoad(JavaVM* vm);

    // must be called on backend thread
    static VirtualMachineEnv& get() noexcept;

    // can be called from any thread that already has a JniEnv
-    static JNIEnv* getThreadEnvironment() noexcept;
+    static JNIEnv* getThreadEnvironment();

    // must be called from the backend thread
    JNIEnv* getEnvironment() noexcept {
@@ -49,7 +49,7 @@ public:
 private:
    explicit VirtualMachineEnv(JavaVM* vm) noexcept;
    ~VirtualMachineEnv() noexcept;
-    JNIEnv* getEnvironmentSlow() noexcept;
+    JNIEnv* getEnvironmentSlow();

    static utils::Mutex sLock;
    static JavaVM* sVirtualMachine;
--- a/filament/backend/src/CircularBuffer.cpp
+++ b/filament/backend/src/CircularBuffer.cpp
@@ -65,7 +65,7 @@ CircularBuffer::~CircularBuffer() noexcept {
 // to each others and a special case in circularize()

 UTILS_NOINLINE
-void* CircularBuffer::alloc(size_t size) noexcept {
+void* CircularBuffer::alloc(size_t size) {
 #if HAS_MMAP
    void* data = nullptr;
    void* vaddr = MAP_FAILED;
--- a/filament/backend/src/CommandBufferQueue.cpp
+++ b/filament/backend/src/CommandBufferQueue.cpp
@@ -53,18 +53,18 @@ CommandBufferQueue::~CommandBufferQueue() {
 }

 void CommandBufferQueue::requestExit() {
-    std::lock_guard<utils::Mutex> const lock(mLock);
+    std::lock_guard const lock(mLock);
    mExitRequested = EXIT_REQUESTED;
    mCondition.notify_one();
 }

 bool CommandBufferQueue::isPaused() const noexcept {
-    std::lock_guard<utils::Mutex> const lock(mLock);
+    std::lock_guard const lock(mLock);
    return mPaused;
 }

 void CommandBufferQueue::setPaused(bool paused) {
-    std::lock_guard<utils::Mutex> const lock(mLock);
+    std::lock_guard const lock(mLock);
    if (paused) {
        mPaused = true;
    } else {
@@ -74,12 +74,12 @@ void CommandBufferQueue::setPaused(bool paused) {
 }

 bool CommandBufferQueue::isExitRequested() const {
-    std::lock_guard<utils::Mutex> const lock(mLock);
-    return (bool)mExitRequested;
+    std::lock_guard const lock(mLock);
+    return bool(mExitRequested);
 }


-void CommandBufferQueue::flush() noexcept {
+void CommandBufferQueue::flush() {
    FILAMENT_TRACING_CALL(FILAMENT_TRACING_CATEGORY_FILAMENT);

    CircularBuffer& circularBuffer = mCircularBuffer;
@@ -103,7 +103,7 @@ void CommandBufferQueue::flush() noexcept {
            static_cast<char const*>(begin), static_cast<char const*>(end));


-    std::unique_lock<utils::Mutex> lock(mLock);
+    std::unique_lock lock(mLock);

    // circular buffer is too small, we corrupted the stream
    FILAMENT_CHECK_POSTCONDITION(used <= mFreeSpace) <<
@@ -147,7 +147,7 @@ std::vector<CommandBufferQueue::Range> CommandBufferQueue::waitForCommands() con
    if (!UTILS_HAS_THREADING) {
        return std::move(mCommandBuffersToExecute);
    }
-    std::unique_lock<utils::Mutex> lock(mLock);
+    std::unique_lock lock(mLock);
    while ((mCommandBuffersToExecute.empty() || mPaused) && !mExitRequested) {
        mCondition.wait(lock);
    }
@@ -157,7 +157,7 @@ std::vector<CommandBufferQueue::Range> CommandBufferQueue::waitForCommands() con
 void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Range const& buffer) {
    size_t const used = std::distance(
            static_cast<char const*>(buffer.begin), static_cast<char const*>(buffer.end));
-    std::lock_guard<utils::Mutex> const lock(mLock);
+    std::lock_guard const lock(mLock);
    mFreeSpace += used;
    mCondition.notify_one();
 }
--- a/filament/backend/src/Program.cpp
+++ b/filament/backend/src/Program.cpp
@@ -80,8 +80,18 @@ Program& Program::attributes(AttributesInfo attributes) noexcept {
    return *this;
 }

-Program& Program::specializationConstants(SpecializationConstantsInfo specConstants) noexcept {
+Program& Program::specializationConstants(SpecializationConstantsInfo specConstants,
+        uint32_t firstMutableId, MutableSpecConstantsInfo mutableSpecConstants) noexcept {
+    // String the two lists together.
    mSpecializationConstants = std::move(specConstants);
+    uint32_t firstMutableIndex = specConstants.size();
+    mSpecializationConstants.reserve(specConstants.size() + mutableSpecConstants.size());
+    for (uint32_t i = 0; i < mutableSpecConstants.size(); i++) {
+        mSpecializationConstants[i + firstMutableIndex] = SpecializationConstant {
+                .id = i + firstMutableId,
+                .value = mutableSpecConstants[i],
+        };
+    }
    return *this;
 }

--- a/filament/backend/src/VirtualMachineEnv.cpp
+++ b/filament/backend/src/VirtualMachineEnv.cpp
@@ -50,7 +50,7 @@ JavaVM* VirtualMachineEnv::getVirtualMachine() {
 */
 UTILS_PUBLIC
 UTILS_NOINLINE
-jint VirtualMachineEnv::JNI_OnLoad(JavaVM* vm) noexcept {
+jint VirtualMachineEnv::JNI_OnLoad(JavaVM* vm) {
    std::lock_guard const lock(sLock);
    if (sVirtualMachine) {
        // It doesn't make sense for JNI_OnLoad() to be called more than once
@@ -77,7 +77,7 @@ VirtualMachineEnv& VirtualMachineEnv::get() noexcept {
 }

 UTILS_NOINLINE
-JNIEnv* VirtualMachineEnv::getThreadEnvironment() noexcept {
+JNIEnv* VirtualMachineEnv::getThreadEnvironment() {
    JavaVM* const vm = getVirtualMachine();
    JNIEnv* env = nullptr;
    jint const result = vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6);
@@ -101,7 +101,7 @@ VirtualMachineEnv::~VirtualMachineEnv() noexcept {
 }

 UTILS_NOINLINE
-JNIEnv* VirtualMachineEnv::getEnvironmentSlow() noexcept {
+JNIEnv* VirtualMachineEnv::getEnvironmentSlow() {
    FILAMENT_CHECK_PRECONDITION(mVirtualMachine)
            << "JNI_OnLoad() has not been called";

--- a/filament/backend/src/opengl/GLDescriptorSet.cpp
+++ b/filament/backend/src/opengl/GLDescriptorSet.cpp
@@ -162,8 +162,11 @@ void GLDescriptorSet::update(OpenGLContext&,
    }, descriptors[binding].desc);
 }

-void GLDescriptorSet::update(OpenGLContext& gl,
-        descriptor_binding_t binding, GLTexture* t, SamplerParams params) noexcept {
+void GLDescriptorSet::update(OpenGLContext& gl, HandleAllocatorGL& handleAllocator,
+        descriptor_binding_t binding, TextureHandle th, SamplerParams params) noexcept {
+
+    GLTexture* t = th ? handleAllocator.handle_cast<GLTexture*>(th) : nullptr;
+
    assert_invariant(binding < descriptors.size());
    std::visit([=, &gl](auto&& arg) mutable {
        using T = std::decay_t<decltype(arg)>;
@@ -196,20 +199,12 @@ void GLDescriptorSet::update(OpenGLContext& gl,
                }
            }

-            arg.target = t ? t->gl.target : 0;
-            arg.id = t ? t->gl.id : 0;
-            arg.external = t ? t->gl.external :  false;
+            arg.handle = th;
            if constexpr (std::is_same_v<T, Sampler> ||
                          std::is_same_v<T, SamplerWithAnisotropyWorkaround>) {
                if constexpr (std::is_same_v<T, SamplerWithAnisotropyWorkaround>) {
                    arg.anisotropy = float(1u << params.anisotropyLog2);
                }
-                if (t) {
-                    arg.ref = t->ref;
-                    arg.baseLevel = t->gl.baseLevel;
-                    arg.maxLevel = t->gl.maxLevel;
-                    arg.swizzle = t->gl.swizzle;
-                }
 #ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
                arg.sampler = gl.getSampler(params);
 #else
@@ -225,39 +220,39 @@ void GLDescriptorSet::update(OpenGLContext& gl,
    }, descriptors[binding].desc);
 }

-template<typename T>
 void GLDescriptorSet::updateTextureView(OpenGLContext& gl,
-        HandleAllocatorGL& handleAllocator, GLuint unit, T const& desc) noexcept {
+        HandleAllocatorGL& handleAllocator, GLuint unit, GLTexture const* t) noexcept {
    // The common case is that we don't have a ref handle (we only have one if
    // the texture ever had a View on it).
-    assert_invariant(desc.ref);
-    GLTextureRef* const ref = handleAllocator.handle_cast<GLTextureRef*>(desc.ref);
-    if (UTILS_UNLIKELY((desc.baseLevel != ref->baseLevel || desc.maxLevel != ref->maxLevel))) {
+    assert_invariant(t);
+    assert_invariant(t->ref);
+    GLTextureRef* const ref = handleAllocator.handle_cast<GLTextureRef*>(t->ref);
+    if (UTILS_UNLIKELY((t->gl.baseLevel != ref->baseLevel || t->gl.maxLevel != ref->maxLevel))) {
        // If we have views, then it's still uncommon that we'll switch often
        // handle the case where we reset to the original texture
-        GLint baseLevel = GLint(desc.baseLevel); // NOLINT(*-signed-char-misuse)
-        GLint maxLevel = GLint(desc.maxLevel); // NOLINT(*-signed-char-misuse)
+        GLint baseLevel = GLint(t->gl.baseLevel); // NOLINT(*-signed-char-misuse)
+        GLint maxLevel = GLint(t->gl.maxLevel); // NOLINT(*-signed-char-misuse)
        if (baseLevel > maxLevel) {
            baseLevel = 0;
            maxLevel = 1000; // per OpenGL spec
        }
        // that is very unfortunate that we have to call activeTexture here
        gl.activeTexture(unit);
-        glTexParameteri(desc.target, GL_TEXTURE_BASE_LEVEL, baseLevel);
-        glTexParameteri(desc.target, GL_TEXTURE_MAX_LEVEL,  maxLevel);
-        ref->baseLevel = desc.baseLevel;
-        ref->maxLevel = desc.maxLevel;
+        glTexParameteri(t->gl.target, GL_TEXTURE_BASE_LEVEL, baseLevel);
+        glTexParameteri(t->gl.target, GL_TEXTURE_MAX_LEVEL,  maxLevel);
+        ref->baseLevel = t->gl.baseLevel;
+        ref->maxLevel = t->gl.maxLevel;
    }
-    if (UTILS_UNLIKELY(desc.swizzle != ref->swizzle)) {
+    if (UTILS_UNLIKELY(t->gl.swizzle != ref->swizzle)) {
        using namespace GLUtils;
        gl.activeTexture(unit);
 #if !defined(__EMSCRIPTEN__)  && !defined(FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2)
-        glTexParameteri(desc.target, GL_TEXTURE_SWIZZLE_R, (GLint)getSwizzleChannel(desc.swizzle[0]));
-        glTexParameteri(desc.target, GL_TEXTURE_SWIZZLE_G, (GLint)getSwizzleChannel(desc.swizzle[1]));
-        glTexParameteri(desc.target, GL_TEXTURE_SWIZZLE_B, (GLint)getSwizzleChannel(desc.swizzle[2]));
-        glTexParameteri(desc.target, GL_TEXTURE_SWIZZLE_A, (GLint)getSwizzleChannel(desc.swizzle[3]));
+        glTexParameteri(t->gl.target, GL_TEXTURE_SWIZZLE_R, (GLint)getSwizzleChannel(t->gl.swizzle[0]));
+        glTexParameteri(t->gl.target, GL_TEXTURE_SWIZZLE_G, (GLint)getSwizzleChannel(t->gl.swizzle[1]));
+        glTexParameteri(t->gl.target, GL_TEXTURE_SWIZZLE_B, (GLint)getSwizzleChannel(t->gl.swizzle[2]));
+        glTexParameteri(t->gl.target, GL_TEXTURE_SWIZZLE_A, (GLint)getSwizzleChannel(t->gl.swizzle[3]));
 #endif
-        ref->swizzle = desc.swizzle;
+        ref->swizzle = t->gl.swizzle;
    }
 }

@@ -310,27 +305,31 @@ void GLDescriptorSet::bind(
                }
            } else if constexpr (std::is_same_v<T, Sampler>) {
                GLuint const unit = p.getTextureUnit(set, binding);
-                if (arg.target) {
-                    gl.bindTexture(unit, arg.target, arg.id, arg.external);
+
+
+                if (arg.handle) {
+                    GLTexture const* const t = handleAllocator.handle_cast<GLTexture*>(arg.handle);
+                    gl.bindTexture(unit, t->gl.target, t->gl.id, t->gl.external);
                    gl.bindSampler(unit, arg.sampler);
-                    if (UTILS_UNLIKELY(arg.ref)) {
-                        updateTextureView(gl, handleAllocator, unit, arg);
+                    if (UTILS_UNLIKELY(t->ref)) {
+                        updateTextureView(gl, handleAllocator, unit, t);
                    }
                } else {
                    gl.unbindTextureUnit(unit);
                }
            } else if constexpr (std::is_same_v<T, SamplerWithAnisotropyWorkaround>) {
                GLuint const unit = p.getTextureUnit(set, binding);
-                if (arg.target) {
-                    gl.bindTexture(unit, arg.target, arg.id, arg.external);
+                if (arg.handle) {
+                    GLTexture const* const t = handleAllocator.handle_cast<GLTexture*>(arg.handle);
+                    gl.bindTexture(unit, t->gl.target, t->gl.id, t->gl.external);
                    gl.bindSampler(unit, arg.sampler);
-                    if (UTILS_UNLIKELY(arg.ref)) {
-                        updateTextureView(gl, handleAllocator, unit, arg);
+                    if (UTILS_UNLIKELY(t->ref)) {
+                        updateTextureView(gl, handleAllocator, unit, t);
                    }
 #if defined(GL_EXT_texture_filter_anisotropic)
                    // Driver claims to support anisotropic filtering, but it fails when set on
                    // the sampler, we have to set it on the texture instead.
-                    glTexParameterf(arg.target, GL_TEXTURE_MAX_ANISOTROPY_EXT,
+                    glTexParameterf(t->gl.target, GL_TEXTURE_MAX_ANISOTROPY_EXT,
                            std::min(gl.gets.max_anisotropy, float(arg.anisotropy)));
 #endif
                } else {
@@ -339,19 +338,20 @@ void GLDescriptorSet::bind(
            } else if constexpr (std::is_same_v<T, SamplerGLES2>) {
                // in ES2 the sampler parameters need to be set on the texture itself
                GLuint const unit = p.getTextureUnit(set, binding);
-                if (arg.target) {
-                    gl.bindTexture(unit, arg.target, arg.id, arg.external);
+                if (arg.handle) {
+                    GLTexture const* const t = handleAllocator.handle_cast<GLTexture*>(arg.handle);
+                    gl.bindTexture(unit, t->gl.target, t->gl.id, t->gl.external);
                    SamplerParams const params = arg.params;
-                    glTexParameteri(arg.target, GL_TEXTURE_MIN_FILTER,
+                    glTexParameteri(t->gl.target, GL_TEXTURE_MIN_FILTER,
                            (GLint)GLUtils::getTextureFilter(params.filterMin));
-                    glTexParameteri(arg.target, GL_TEXTURE_MAG_FILTER,
+                    glTexParameteri(t->gl.target, GL_TEXTURE_MAG_FILTER,
                            (GLint)GLUtils::getTextureFilter(params.filterMag));
-                    glTexParameteri(arg.target, GL_TEXTURE_WRAP_S,
+                    glTexParameteri(t->gl.target, GL_TEXTURE_WRAP_S,
                            (GLint)GLUtils::getWrapMode(params.wrapS));
-                    glTexParameteri(arg.target, GL_TEXTURE_WRAP_T,
+                    glTexParameteri(t->gl.target, GL_TEXTURE_WRAP_T,
                            (GLint)GLUtils::getWrapMode(params.wrapT));
 #if defined(GL_EXT_texture_filter_anisotropic)
-                    glTexParameterf(arg.target, GL_TEXTURE_MAX_ANISOTROPY_EXT,
+                    glTexParameterf(t->gl.target, GL_TEXTURE_MAX_ANISOTROPY_EXT,
                            std::min(gl.gets.max_anisotropy, arg.anisotropy));
 #endif
                } else {
--- a/filament/backend/src/opengl/GLDescriptorSet.h
+++ b/filament/backend/src/opengl/GLDescriptorSet.h
@@ -59,8 +59,8 @@ struct GLDescriptorSet : public HwDescriptorSet {
            descriptor_binding_t binding, GLBufferObject* bo, size_t offset, size_t size) noexcept;

    // update a sampler descriptor in the set
-    void update(OpenGLContext& gl,
-            descriptor_binding_t binding, GLTexture* t, SamplerParams params) noexcept;
+    void update(OpenGLContext& gl, HandleAllocatorGL& handleAllocator,
+            descriptor_binding_t binding, TextureHandle th, SamplerParams params) noexcept;

    // conceptually bind the set to the command buffer
    void bind(
@@ -111,46 +111,19 @@ private:

    // A sampler descriptor
    struct Sampler {
-        uint16_t target;                        // 2 (GLenum)
-        bool external = false;                  // 1
-        bool reserved = false;                  // 1
-        GLuint id = 0;                          // 4
+        TextureHandle handle;                   // 4
        GLuint sampler = 0;                     // 4
-        Handle<GLTextureRef> ref;               // 4
-        int8_t baseLevel = 0x7f;                // 1
-        int8_t maxLevel = -1;                   // 1
-        std::array<TextureSwizzle, 4> swizzle{  // 4
-                TextureSwizzle::CHANNEL_0,
-                TextureSwizzle::CHANNEL_1,
-                TextureSwizzle::CHANNEL_2,
-                TextureSwizzle::CHANNEL_3
-        };
    };

    struct SamplerWithAnisotropyWorkaround {
-        uint16_t target;                        // 2 (GLenum)
-        bool external = false;                  // 1
-        bool reserved = false;                  // 1
-        GLuint id = 0;                          // 4
+        TextureHandle handle;                   // 4
        GLuint sampler = 0;                     // 4
-        Handle<GLTextureRef> ref;               // 4
        math::half anisotropy = 1.0f;           // 2
-        int8_t baseLevel = 0x7f;                // 1
-        int8_t maxLevel = -1;                   // 1
-        std::array<TextureSwizzle, 4> swizzle{  // 4
-                TextureSwizzle::CHANNEL_0,
-                TextureSwizzle::CHANNEL_1,
-                TextureSwizzle::CHANNEL_2,
-                TextureSwizzle::CHANNEL_3
-        };
    };

    // A sampler descriptor for ES2
    struct SamplerGLES2 {
-        uint16_t target;                        // 2 (GLenum)
-        bool external = false;                  // 1
-        bool reserved = false;                  // 1
-        GLuint id = 0;                          // 4
+        TextureHandle handle;                   // 4
        SamplerParams params{};                 // 4
        float anisotropy = 1.0f;                // 4
    };
@@ -165,9 +138,8 @@ private:
    };
    static_assert(sizeof(Descriptor) <= 32);

-    template<typename T>
    static void updateTextureView(OpenGLContext& gl,
-            HandleAllocatorGL& handleAllocator, GLuint unit, T const& desc) noexcept;
+            HandleAllocatorGL& handleAllocator, GLuint unit, GLTexture const* t) noexcept;

    utils::FixedCapacityVector<Descriptor> descriptors;     // 16
    utils::bitset64 dynamicBuffers;                         // 8
--- a/filament/backend/src/opengl/OpenGLDriver.cpp
+++ b/filament/backend/src/opengl/OpenGLDriver.cpp
@@ -932,7 +932,12 @@ void OpenGLDriver::createTextureR(Handle<HwTexture> th, SamplerType target, uint
 #if defined(BACKEND_OPENGL_LEVEL_GLES31)
                if (gl.features.multisample_texture) {
                    // multi-sample texture on GL 3.2 / GLES 3.1 and above
-                    t->gl.target = GL_TEXTURE_2D_MULTISAMPLE;
+                    if (depth <= 1) {
+                        // We forcibly change the target to 2D-multisample only for flat texture.
+                        // A depth value greater than 1 may indicate multiview usage, which requires
+                        // GL_TEXTURE_2D_ARRAY. Also 2D MSAA won't work with non-flat texture anyway.
+                        t->gl.target = GL_TEXTURE_2D_MULTISAMPLE;
+                    }
                } else {
                    // Turn off multi-sampling for that texture. It's just not supported.
                }
@@ -1208,7 +1213,12 @@ void OpenGLDriver::importTextureR(Handle<HwTexture> th, intptr_t id,
 #if defined(BACKEND_OPENGL_LEVEL_GLES31)
        if (gl.features.multisample_texture) {
            // multi-sample texture on GL 3.2 / GLES 3.1 and above
-            t->gl.target = GL_TEXTURE_2D_MULTISAMPLE;
+            if (depth <= 1) {
+                // We forcibly change the target to 2D-multisample only for flat texture.
+                // A depth value greater than 1 may indicate multiview usage, which requires
+                // GL_TEXTURE_2D_ARRAY. Also 2D MSAA won't work with non-flat texture anyway.
+                t->gl.target = GL_TEXTURE_2D_MULTISAMPLE;
+            }
        } else {
            // Turn off multi-sampling for that texture. It's just not supported.
        }
@@ -1453,8 +1463,15 @@ void OpenGLDriver::framebufferTexture(TargetBufferInfo const& binfo,
 #if !defined(__EMSCRIPTEN__) && !defined(FILAMENT_IOS)
                if (layerCount > 1) {
                    // if layerCount > 1, it means we use the multiview extension.
-                    glFramebufferTextureMultiviewOVR(GL_FRAMEBUFFER, attachment,
-                        t->gl.id, 0, binfo.layer, layerCount);
+                    if (rt->gl.samples > 1) {
+                        // For MSAA
+                        glFramebufferTextureMultisampleMultiviewOVR(GL_FRAMEBUFFER, attachment,
+                                t->gl.id, 0, rt->gl.samples, binfo.layer, layerCount);
+                    }
+                    else {
+                        glFramebufferTextureMultiviewOVR(GL_FRAMEBUFFER, attachment, t->gl.id, 0,
+                                binfo.layer, layerCount);
+                    }
                } else
 #endif // !defined(__EMSCRIPTEN__) && !defined(FILAMENT_IOS)
                {
@@ -2514,10 +2531,33 @@ void OpenGLDriver::makeCurrent(Handle<HwSwapChain> schDraw, Handle<HwSwapChain>

    mPlatform.makeCurrent(scDraw->swapChain, scRead->swapChain,
            [this]() {
+                for (auto t: mTexturesWithStreamsAttached) {
+                    if (t->hwStream->streamType == StreamType::NATIVE) {
+                        mPlatform.detach(t->hwStream->stream);
+                    }
+                }
                // OpenGL context is about to change, unbind everything
                mContext.unbindEverything();
            },
            [this](size_t index) {
+                for (auto t: mTexturesWithStreamsAttached) {
+                    if (t->hwStream->streamType == StreamType::NATIVE) {
+                        if (t->externalTexture) {
+                            glGenTextures(1, &t->externalTexture->id);
+                            t->gl.id = t->externalTexture->id;
+                        } else {
+                            glGenTextures(1, &t->gl.id);
+                        }
+                        mPlatform.attach(t->hwStream->stream, t->gl.id);
+                        mContext.updateTexImage(GL_TEXTURE_EXTERNAL_OES, t->gl.id);
+                    }
+                }
+
+                // force invalidation of all bound descriptor sets
+                decltype(mInvalidDescriptorSetBindings) changed;
+                changed.setValue((1 << MAX_DESCRIPTOR_SET_COUNT) - 1);
+                mInvalidDescriptorSetBindings |= changed;
+
                // OpenGL context has changed, resynchronize the state with the cache
                mContext.synchronizeStateAndCache(index);
                slog.d << "*** OpenGL context change : " << (index ? "protected" : "default") << io::endl;
@@ -2992,6 +3032,7 @@ void OpenGLDriver::attachStream(GLTexture* t, GLStream* hwStream) noexcept {
    switch (hwStream->streamType) {
        case StreamType::NATIVE:
            mPlatform.attach(hwStream->stream, t->gl.id);
+            mContext.updateTexImage(GL_TEXTURE_EXTERNAL_OES, t->gl.id);
            break;
        case StreamType::ACQUIRED:
            break;
@@ -3020,7 +3061,12 @@ void OpenGLDriver::detachStream(GLTexture* t) noexcept {
            break;
    }

-    glGenTextures(1, &t->gl.id);
+    if (t->externalTexture) {
+        glGenTextures(1, &t->externalTexture->id);
+        t->gl.id = t->externalTexture->id;
+    } else {
+        glGenTextures(1, &t->gl.id);
+    }

    t->hwStream = nullptr;
 }
@@ -3044,8 +3090,14 @@ void OpenGLDriver::replaceStream(GLTexture* texture, GLStream* newStream) noexce

    switch (newStream->streamType) {
        case StreamType::NATIVE:
-            glGenTextures(1, &texture->gl.id);
+            if (texture->externalTexture) {
+                glGenTextures(1, &texture->externalTexture->id);
+                texture->gl.id = texture->externalTexture->id;
+            } else {
+                glGenTextures(1, &texture->gl.id);
+            }
            mPlatform.attach(newStream->stream, texture->gl.id);
+            mContext.updateTexImage(GL_TEXTURE_EXTERNAL_OES, texture->gl.id);
            break;
        case StreamType::ACQUIRED:
            // Just re-use the old texture id.
@@ -3708,8 +3760,7 @@ void OpenGLDriver::updateDescriptorSetTexture(
        TextureHandle th,
        SamplerParams params) {
    GLDescriptorSet* ds = handle_cast<GLDescriptorSet*>(dsh);
-    GLTexture* t = th ? handle_cast<GLTexture*>(th) : nullptr;
-    ds->update(mContext, binding, t, params);
+    ds->update(mContext, mHandleAllocator, binding, th, params);
 }

 void OpenGLDriver::flush(int) {
--- a/filament/backend/src/opengl/OpenGLPlatform.cpp
+++ b/filament/backend/src/opengl/OpenGLPlatform.cpp
@@ -64,7 +64,7 @@ utils::CString OpenGLPlatform::getRendererString(Driver const* driver) {
 }

 void OpenGLPlatform::makeCurrent(SwapChain* drawSwapChain, SwapChain* readSwapChain,
-        utils::Invocable<void()>, utils::Invocable<void(size_t)>) noexcept {
+        utils::Invocable<void()>, utils::Invocable<void(size_t)>) {
    makeCurrent(getCurrentContextType(), drawSwapChain, readSwapChain);
 }

--- a/filament/backend/src/opengl/ShaderCompilerService.cpp
+++ b/filament/backend/src/opengl/ShaderCompilerService.cpp
@@ -23,6 +23,7 @@
 #include "OpenGLDriver.h"

 #include <iterator>
+#include <optional>
 #include <private/backend/BackendUtils.h>

 #include <backend/DriverEnums.h>
@@ -110,7 +111,17 @@ struct ShaderCompilerService::OpenGLProgramToken : ProgramToken {
        cond.wait(l, [this] { return signaled; });
    }

-    CallbackManager::Handle handle{};
+    // This is invoked upon token completion, which occurs after a successful `gl.program`
+    // population or upon cancellation. In either scenario, the callback handle must be submitted
+    // to notify the caller that resource loading has concluded.
+    void trySubmittingCallback() noexcept {
+        if (handle) {
+            compiler.submitCallbackHandle(*handle);
+            handle = std::nullopt;
+        }
+    }
+
+    std::optional<CallbackManager::Handle> handle{};
    BlobCacheKey key;

    // Used for the `THREAD_POOL` mode.
@@ -120,7 +131,7 @@ struct ShaderCompilerService::OpenGLProgramToken : ProgramToken {
 };

 ShaderCompilerService::OpenGLProgramToken::~OpenGLProgramToken() {
-    compiler.submitCallbackHandle(handle);
+    trySubmittingCallback();
 }

 /* static */ void ShaderCompilerService::setUserData(const program_token_t& token,
@@ -339,7 +350,7 @@ GLuint ShaderCompilerService::getProgram(program_token_t& token) {

    // Cleanup the token.
    token->compiler.cancelTickOp(token);
-    token = nullptr;// This will submit a callback condition (handle) to the callback manager.
+    token = nullptr; // This will try submitting a callback handle to the callback manager.
 }

 void ShaderCompilerService::tick() {
@@ -392,7 +403,7 @@ GLuint ShaderCompilerService::initialize(program_token_t& token) {

    // Cleanup the token.
    token->compiler.cancelTickOp(token);
-    token = nullptr;// This will submit a callback condition (handle) to the callback manager.
+    token = nullptr;

    return program;
 }
@@ -659,6 +670,7 @@ void ShaderCompilerService::executeTickOps() noexcept {
    }
    glLinkProgram(program);
    token->gl.program = program;
+    token->trySubmittingCallback();
 }

 /* static */ bool ShaderCompilerService::isLinkCompleted(program_token_t const& token) noexcept {
--- a/filament/backend/src/opengl/gl_headers.cpp
+++ b/filament/backend/src/opengl/gl_headers.cpp
@@ -70,6 +70,9 @@ PFNGLMAXSHADERCOMPILERTHREADSKHRPROC glMaxShaderCompilerThreadsKHR;
 #ifdef GL_OVR_multiview
 PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glFramebufferTextureMultiviewOVR;
 #endif
+#ifdef GL_OVR_multiview_multisampled_render_to_texture
+PFNGLFRAMEBUFFERTEXTUREMULTISAMPLEMULTIVIEWOVRPROC glFramebufferTextureMultisampleMultiviewOVR;
+#endif

 #if defined(__ANDROID__) && !defined(FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2)
 // On Android, If we want to support a build system less than ANDROID_API 21, we need to
@@ -123,6 +126,9 @@ void importGLESExtensionsEntryPoints() {
 #ifdef GL_OVR_multiview
        getProcAddress(glFramebufferTextureMultiviewOVR, "glFramebufferTextureMultiviewOVR");
 #endif
+#ifdef GL_OVR_multiview_multisampled_render_to_texture
+        getProcAddress(glFramebufferTextureMultisampleMultiviewOVR, "glFramebufferTextureMultisampleMultiviewOVR");
+#endif
 #if defined(__ANDROID__) && !defined(FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2)
        getProcAddress(glDispatchCompute, "glDispatchCompute");
 #endif
--- a/filament/backend/src/opengl/gl_headers.h
+++ b/filament/backend/src/opengl/gl_headers.h
@@ -154,6 +154,9 @@ extern PFNGLMAXSHADERCOMPILERTHREADSKHRPROC glMaxShaderCompilerThreadsKHR;
 #ifdef GL_OVR_multiview
 extern PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glFramebufferTextureMultiviewOVR;
 #endif
+#ifdef GL_OVR_multiview_multisampled_render_to_texture
+extern PFNGLFRAMEBUFFERTEXTUREMULTISAMPLEMULTIVIEWOVRPROC glFramebufferTextureMultisampleMultiviewOVR;
+#endif
 #if defined(__ANDROID__) && !defined(FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2)
 extern PFNGLDISPATCHCOMPUTEPROC glDispatchCompute;
 #endif
--- a/filament/backend/src/opengl/platforms/PlatformCocoaGL.mm
+++ b/filament/backend/src/opengl/platforms/PlatformCocoaGL.mm
@@ -255,7 +255,7 @@ void PlatformCocoaGL::destroySwapChain(Platform::SwapChain* swapChain) noexcept
 }

 bool PlatformCocoaGL::makeCurrent(ContextType type, SwapChain* drawSwapChain,
-        SwapChain* readSwapChain) noexcept {
+        SwapChain* readSwapChain) {
    ASSERT_PRECONDITION_NON_FATAL(drawSwapChain == readSwapChain,
            "ContextManagerCocoa does not support using distinct draw/read swap chains.");
    CocoaGLSwapChain* swapChain = (CocoaGLSwapChain*)drawSwapChain;
--- a/filament/backend/src/opengl/platforms/PlatformCocoaTouchGL.mm
+++ b/filament/backend/src/opengl/platforms/PlatformCocoaTouchGL.mm
@@ -155,7 +155,7 @@ uint32_t PlatformCocoaTouchGL::getDefaultFramebufferObject() noexcept {
 }

 bool PlatformCocoaTouchGL::makeCurrent(ContextType type, SwapChain* drawSwapChain,
-        SwapChain* readSwapChain) noexcept {
+        SwapChain* readSwapChain) {
    ASSERT_PRECONDITION_NON_FATAL(drawSwapChain == readSwapChain,
            "PlatformCocoaTouchGL does not support using distinct draw/read swap chains.");
    CAEAGLLayer* const glLayer = (__bridge CAEAGLLayer*) drawSwapChain;
--- a/filament/backend/src/opengl/platforms/PlatformEGL.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformEGL.cpp
@@ -577,18 +577,18 @@ OpenGLPlatform::ContextType PlatformEGL::getCurrentContextType() const noexcept
 }

 bool PlatformEGL::makeCurrent(ContextType type,
-        SwapChain* drawSwapChain, SwapChain* readSwapChain) noexcept {
+        SwapChain* drawSwapChain, SwapChain* readSwapChain) {
    SwapChainEGL const* const dsc = static_cast<SwapChainEGL const*>(drawSwapChain);
    SwapChainEGL const* const rsc = static_cast<SwapChainEGL const*>(readSwapChain);
    EGLContext context = getContextForType(type);
    EGLBoolean const success = egl.makeCurrent(context, dsc->sur, rsc->sur);
-    return success == EGL_TRUE ? true : false;
+    return success == EGL_TRUE;
 }

 void PlatformEGL::makeCurrent(SwapChain* drawSwapChain,
        SwapChain* readSwapChain,
        Invocable<void()> preContextChange,
-        Invocable<void(size_t index)> postContextChange) noexcept {
+        Invocable<void(size_t index)> postContextChange) {

    assert_invariant(drawSwapChain);
    assert_invariant(readSwapChain);
@@ -796,7 +796,7 @@ void PlatformEGL::Config::erase(EGLint name) noexcept {
 // ------------------------------------------------------------------------------------------------

 EGLBoolean PlatformEGL::EGL::makeCurrent(EGLContext context, EGLSurface drawSurface,
-        EGLSurface readSurface) noexcept {
+        EGLSurface readSurface) {
    if (UTILS_UNLIKELY((
            mCurrentContext != context ||
            drawSurface != mCurrentDrawSurface || readSurface != mCurrentReadSurface))) {
--- a/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformEGLAndroid.cpp
@@ -135,7 +135,7 @@ static constexpr const std::string_view kNativeWindowInvalidMsg =

 bool PlatformEGLAndroid::makeCurrent(ContextType type,
        SwapChain* drawSwapChain,
-        SwapChain* readSwapChain) noexcept {
+        SwapChain* readSwapChain) {

    // fast & safe path
    if (UTILS_LIKELY(!mAssertNativeWindowIsValid)) {
--- a/filament/backend/src/opengl/platforms/PlatformGLX.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformGLX.cpp
@@ -266,7 +266,7 @@ void PlatformGLX::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
 }

 bool PlatformGLX::makeCurrent(ContextType type, SwapChain* drawSwapChain,
-        SwapChain* readSwapChain) noexcept {
+        SwapChain* readSwapChain) {
    g_glx.setCurrentContext(mGLXDisplay,
            (GLXDrawable)drawSwapChain, (GLXDrawable)readSwapChain, mGLXContext);
    return true;
--- a/filament/backend/src/opengl/platforms/PlatformOSMesa.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformOSMesa.cpp
@@ -169,7 +169,7 @@ void PlatformOSMesa::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
 }

 bool PlatformOSMesa::makeCurrent(ContextType type, SwapChain* drawSwapChain,
-        SwapChain* readSwapChain) noexcept {
+        SwapChain* readSwapChain) {
    OSMesaAPI* api = (OSMesaAPI*) mOsMesaApi;
    OSMesaSwapchain* impl = (OSMesaSwapchain*) drawSwapChain;

--- a/filament/backend/src/opengl/platforms/PlatformWGL.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformWGL.cpp
@@ -262,7 +262,7 @@ void PlatformWGL::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
 }

 bool PlatformWGL::makeCurrent(ContextType type, SwapChain* drawSwapChain,
-        SwapChain* readSwapChain) noexcept {
+        SwapChain* readSwapChain) {
    ASSERT_PRECONDITION_NON_FATAL(drawSwapChain == readSwapChain,
                                  "PlatformWGL does not support distinct draw/read swap chains.");

--- a/filament/backend/src/opengl/platforms/PlatformWebGL.cpp
+++ b/filament/backend/src/opengl/platforms/PlatformWebGL.cpp
@@ -47,7 +47,7 @@ void PlatformWebGL::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
 }

 bool PlatformWebGL::makeCurrent(ContextType type, SwapChain* drawSwapChain,
-        SwapChain* readSwapChain) noexcept {
+        SwapChain* readSwapChain) {
    return true;
 }

--- a/filament/backend/src/vulkan/VulkanBuffer.h
+++ b/filament/backend/src/vulkan/VulkanBuffer.h
@@ -17,35 +17,36 @@
 #ifndef TNT_FILAMENT_BACKEND_VULKANBUFFER_H
 #define TNT_FILAMENT_BACKEND_VULKANBUFFER_H

-#include "VulkanContext.h"
-#include "VulkanStagePool.h"
 #include "VulkanMemory.h"
+#include "memory/Resource.h"
+
+#include <functional>

 namespace filament::backend {

-// Encapsulates a Vulkan buffer, its attached DeviceMemory and a staging area.
-class VulkanBuffer {
+class VulkanBuffer : public fvkmemory::Resource {
 public:
-    VulkanBuffer(VmaAllocator allocator, VulkanStagePool& stagePool, VkBufferUsageFlags usage,
-            uint32_t numBytes);
-    ~VulkanBuffer();
-    void loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint32_t byteOffset,
-            uint32_t numBytes);
-    VkBuffer getGpuBuffer() const {
-        return mGpuBuffer;
+    // Because we need to recycle the unused `VulkanGpuBuffer`, we allow for a callback that the
+    // "Pool" can use to acquire the buffer back.
+    using OnRecycle = std::function<void(VulkanGpuBuffer const*)>;
+
+    VulkanBuffer(VulkanGpuBuffer const* gpuBuffer, OnRecycle&& onRecycleFn)
+        : mGpuBuffer(gpuBuffer),
+          mOnRecycleFn(onRecycleFn) {}
+
+    ~VulkanBuffer() {
+        if (mOnRecycleFn) {
+            mOnRecycleFn(mGpuBuffer);
+        }
    }

-private:
-    VmaAllocator mAllocator;
-    VulkanStagePool& mStagePool;
+    VulkanGpuBuffer const* getGpuBuffer() const { return mGpuBuffer; }

-    VmaAllocation mGpuMemory = VK_NULL_HANDLE;
-    VkBuffer mGpuBuffer = VK_NULL_HANDLE;
-    VkBufferUsageFlags mUsage = {};
-	uint32_t mUpdatedOffset = 0;
-    uint32_t mUpdatedBytes = 0;
+private:
+    VulkanGpuBuffer const* mGpuBuffer;
+    OnRecycle mOnRecycleFn;
 };

-} // namespace filament::backend
+}// namespace filament::backend

-#endif // TNT_FILAMENT_BACKEND_VULKANBUFFER_H
+#endif// TNT_FILAMENT_BACKEND_VULKANBUFFER_H
--- a/filament/backend/src/vulkan/VulkanBufferCache.cpp
+++ b/filament/backend/src/vulkan/VulkanBufferCache.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VulkanBufferCache.h"
+
+#include "VulkanBuffer.h"
+#include "VulkanConstants.h"
+#include "VulkanMemory.h"
+#include "memory/Resource.h"
+#include "memory/ResourceManager.h"
+
+#include <utility>
+
+namespace filament::backend {
+
+namespace {
+
+VkBufferUsageFlags getVkBufferUsage(VulkanBufferUsage usage) {
+    switch (usage) {
+        case VulkanBufferUsage::VERTEX:
+            return VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+        case VulkanBufferUsage::INDEX:
+            return VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
+        case VulkanBufferUsage::UNIFORM:
+            return VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+        case VulkanBufferUsage::SHADER_STORAGE:
+            return VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+        case VulkanBufferUsage::UNKNOWN:
+            return 0;
+    }
+
+    return 0;
+}
+
+}// namespace
+
+VulkanBufferCache::VulkanBufferCache(VulkanContext const& context,
+        fvkmemory::ResourceManager& resourceManager, VmaAllocator allocator)
+    : mContext(context),
+      mResourceManager(resourceManager),
+      mAllocator(allocator) {}
+
+fvkmemory::resource_ptr<VulkanBuffer> VulkanBufferCache::acquire(VulkanBufferUsage usage,
+        uint32_t numBytes) noexcept {
+    assert_invariant(usage != VulkanBufferUsage::UNKNOWN);
+
+    BufferPool& bufferPool = getPool(usage);
+
+    // First check if an allocation exists whose capacity is greater than or equal to the requested
+    // size.
+    auto iter = bufferPool.lower_bound(numBytes);
+    if (iter != bufferPool.end()) {
+        VulkanGpuBuffer const* gpuBuffer = iter->second.gpuBuffer;
+        bufferPool.erase(iter);
+        return fvkmemory::resource_ptr<VulkanBuffer>::construct(&mResourceManager, gpuBuffer,
+                [this](VulkanGpuBuffer const* gpuBuffer) { this->release(gpuBuffer); });
+    }
+
+    // We were not able to find a sufficiently large allocation, so create a new one that is
+    // recycled after being yielded.
+    VulkanGpuBuffer const* gpuBuffer = allocate(usage, numBytes);
+    return fvkmemory::resource_ptr<VulkanBuffer>::construct(&mResourceManager, gpuBuffer,
+            [this](VulkanGpuBuffer const* gpuBuffer) { this->release(gpuBuffer); });
+}
+
+void VulkanBufferCache::gc() noexcept {
+    FVK_SYSTRACE_CONTEXT();
+    FVK_SYSTRACE_START("VulkanBufferCache::gc");
+
+    // If this is one of the first few frames, return early to avoid wrapping unsigned integers.
+    constexpr uint32_t TIME_BEFORE_EVICTION = 3;
+    if (++mCurrentFrame <= TIME_BEFORE_EVICTION) {
+        return;
+    }
+    const uint64_t evictionTime = mCurrentFrame - TIME_BEFORE_EVICTION;
+
+    // Destroy buffers that have not been used for several frames.
+    for (auto& bufferPool: mGpuBufferPools) {
+        for (auto poolIter = bufferPool.begin(); poolIter != bufferPool.end();) {
+            if (poolIter->second.lastAccessed < evictionTime) {
+#if FVK_ENABLED(FVK_DEBUG_VULKAN_BUFFER_CACHE)
+                FVK_LOGD << "VulkanBufferCache - Destroyed vkBuffer "
+                         << poolIter->second.gpuBuffer->vkbuffer << " with usage "
+                         << static_cast<int>(poolIter->second.gpuBuffer->usage) << utils::io::endl;
+#endif// FVK_DEBUG_VULKAN_BUFFER_CACHE
+
+                destroy(poolIter->second.gpuBuffer);
+                poolIter = bufferPool.erase(poolIter);
+            } else {
+                ++poolIter;
+            }
+        }
+    }
+
+    FVK_SYSTRACE_END();
+}
+
+void VulkanBufferCache::terminate() noexcept {
+    for (auto& bufferPool: mGpuBufferPools) {
+        for (auto& poolEntry: bufferPool) {
+            destroy(poolEntry.second.gpuBuffer);
+        }
+        bufferPool.clear();
+    }
+}
+
+void VulkanBufferCache::release(VulkanGpuBuffer const* gpuBuffer) noexcept {
+    assert_invariant(gpuBuffer != nullptr);
+
+    BufferPool& bufferPool = getPool(gpuBuffer->usage);
+    bufferPool.insert(std::make_pair(gpuBuffer->numBytes, UnusedGpuBuffer{
+                                                              .lastAccessed = mCurrentFrame,
+                                                              .gpuBuffer = gpuBuffer,
+                                                          }));
+}
+
+VulkanGpuBuffer const* VulkanBufferCache::allocate(VulkanBufferUsage usage,
+        uint32_t numBytes) noexcept {
+    VkBufferCreateInfo const bufferInfo{
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .size = numBytes,
+        // `VK_BUFFER_USAGE_TRANSFER_DST_BIT` is needed to allow updating the buffer through
+        // a staging using `vkCmdCopyBuffer`.
+        .usage = getVkBufferUsage(usage) | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+    };
+
+    VmaAllocationCreateFlags vmaFlags = 0;
+    if (usage == VulkanBufferUsage::UNIFORM) {
+        // In the case of UMA, the uniform buffers will always be mappable
+        if (mContext.isUnifiedMemoryArchitecture()) {
+            vmaFlags |= VMA_ALLOCATION_CREATE_MAPPED_BIT |
+                        VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
+        }
+    }
+
+    VulkanGpuBuffer* gpuBuffer = new VulkanGpuBuffer{
+        .numBytes = numBytes,
+        .usage = usage,
+    };
+    VmaAllocationCreateInfo const allocInfo{
+        .flags = vmaFlags,
+        .usage = VMA_MEMORY_USAGE_AUTO,
+        .requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+    };
+    UTILS_UNUSED_IN_RELEASE VkResult result = vmaCreateBuffer(mAllocator, &bufferInfo, &allocInfo,
+            &gpuBuffer->vkbuffer, &gpuBuffer->vmaAllocation, &gpuBuffer->allocationInfo);
+
+#if FVK_ENABLED(FVK_DEBUG_VULKAN_BUFFER_CACHE)
+    if (result != VK_SUCCESS) {
+        FVK_LOGE << "VulkanBufferCache - failed to allocate a new vkBuffer of size " << numBytes
+                 << " and usage " << static_cast<int>(usage) << ", error: " << result
+                 << utils::io::endl;
+    } else {
+        FVK_LOGD << "VulkanBufferCache - allocated a vkBuffer " << gpuBuffer->vkbuffer
+                 << " of size " << numBytes << " and usage = " << static_cast<int>(usage)
+                 << "  successfully" << utils::io::endl;
+    }
+#endif// FVK_DEBUG_VULKAN_BUFFER_CACHE
+
+    return gpuBuffer;
+}
+
+void VulkanBufferCache::destroy(VulkanGpuBuffer const* gpuBuffer) noexcept {
+    vmaDestroyBuffer(mAllocator, gpuBuffer->vkbuffer, gpuBuffer->vmaAllocation);
+    delete gpuBuffer;
+    gpuBuffer = nullptr;
+}
+
+VulkanBufferCache::BufferPool& VulkanBufferCache::getPool(VulkanBufferUsage usage) noexcept {
+
+    int poolIndex = -1;
+    switch (usage) {
+        case VulkanBufferUsage::VERTEX:
+            poolIndex = 0;
+            break;
+        case VulkanBufferUsage::INDEX:
+            poolIndex = 1;
+            break;
+        case VulkanBufferUsage::UNIFORM:
+            poolIndex = 2;
+            break;
+        case VulkanBufferUsage::SHADER_STORAGE:
+            poolIndex = 3;
+            break;
+        case VulkanBufferUsage::UNKNOWN:
+            PANIC_LOG("There's no pool for buffers with unkown usage.");
+            break;
+    }
+
+    assert_invariant(poolIndex >= 0 && poolIndex < MAX_POOL_COUNT);
+    return mGpuBufferPools[poolIndex];
+}
+
+}// namespace filament::backend
--- a/filament/backend/src/vulkan/VulkanBufferCache.h
+++ b/filament/backend/src/vulkan/VulkanBufferCache.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_VULKANBUFFERCACHE_H
+#define TNT_FILAMENT_BACKEND_VULKANBUFFERCACHE_H
+
+#include "VulkanBuffer.h"
+#include "VulkanContext.h"
+#include "VulkanMemory.h"
+#include "memory/Resource.h"
+#include "memory/ResourceManager.h"
+
+#include <map>
+
+namespace filament::backend {
+
+class VulkanBufferCache {
+public:
+    VulkanBufferCache(VulkanContext const& context, fvkmemory::ResourceManager& resourceManager,
+            VmaAllocator allocator);
+
+    // `VulkanBufferCache` is not copyable.
+    VulkanBufferCache(const VulkanBufferCache&) = delete;
+    VulkanBufferCache& operator=(const VulkanBufferCache&) = delete;
+
+    // Allocates or reuse a new VkBuffer that is device local.
+    // In the case of Unified memory architecture, uniform buffers are also host visible.
+    fvkmemory::resource_ptr<VulkanBuffer> acquire(VulkanBufferUsage usage,
+            uint32_t numBytes) noexcept;
+
+    // Evicts old unused `VulkanGpuBuffer` and bumps the current frame number
+    void gc() noexcept;
+
+    // Destroys all unused `VulkanGpuBuffer`.
+    // This should be called while the context's VkDevice is still alive.
+    void terminate() noexcept;
+
+private:
+    struct UnusedGpuBuffer {
+        uint64_t lastAccessed;
+        VulkanGpuBuffer const* gpuBuffer;
+    };
+
+    using BufferPool = std::multimap<uint32_t, UnusedGpuBuffer>;
+
+    // Return a `VulkanGpuBuffer` back to its corresponding pool
+    void release(VulkanGpuBuffer const* gpuBuffer) noexcept;
+
+    // Allocate a new VkBuffer from the VMA pool of the corresponding `numBytes` and `usage`.
+    VulkanGpuBuffer const* allocate(VulkanBufferUsage usage, uint32_t numBytes) noexcept;
+
+    // Destroy the corresponding VkBuffer and return the VkDeviceMemory to the VMA pool.
+    void destroy(VulkanGpuBuffer const* gpuBuffer) noexcept;
+
+    BufferPool& getPool(VulkanBufferUsage usage) noexcept;
+
+    VulkanContext const& mContext;
+    fvkmemory::ResourceManager& mResourceManager;
+    VmaAllocator mAllocator;
+
+    // Buffers can be recycled, after they are released. Each type of buffer have its own pool
+    static constexpr int MAX_POOL_COUNT = 4;
+    BufferPool mGpuBufferPools[MAX_POOL_COUNT];
+
+    // Store the current "time" (really just a frame count) and LRU eviction parameters.
+    uint64_t mCurrentFrame = 0;
+};
+
+}// namespace filament::backend
+
+#endif// TNT_FILAMENT_BACKEND_VULKANBUFFERCACHE_H
--- a/filament/backend/src/vulkan/VulkanBufferProxy.cpp
+++ b/filament/backend/src/vulkan/VulkanBufferProxy.cpp
@@ -14,49 +14,35 @@
 * limitations under the License.
 */

-#include "VulkanBuffer.h"
+#include "VulkanBufferProxy.h"
+#include "VulkanCommands.h"
 #include "VulkanMemory.h"

-#include <utils/Panic.h>
+#include "VulkanBufferCache.h"
+#include "VulkanMemory.h"

 using namespace bluevk;

 namespace filament::backend {

-VulkanBuffer::VulkanBuffer(VmaAllocator allocator, VulkanStagePool& stagePool,
-        VkBufferUsageFlags usage, uint32_t numBytes)
+VulkanBufferProxy::VulkanBufferProxy(VmaAllocator allocator, VulkanStagePool& stagePool,
+        VulkanBufferCache& bufferCache, VulkanBufferUsage usage, uint32_t numBytes)
    : mAllocator(allocator),
      mStagePool(stagePool),
-      mUsage(usage),
+      mBufferCache(bufferCache),
+      mBuffer(mBufferCache.acquire(usage, numBytes)),
      mUpdatedOffset(0),
-      mUpdatedBytes(0) {
-    // for now make sure that only 1 bit is set in usage
-    // (because loadFromCpu() assumes that somewhat)
-    assert_invariant(usage && !(usage & (usage - 1)));
+      mUpdatedBytes(0) {}

-    // Create the VkBuffer.
-    VkBufferCreateInfo bufferInfo {
-        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-        .size = numBytes,
-        .usage = usage | VK_BUFFER_USAGE_TRANSFER_DST_BIT
-    };
-
-    VmaAllocationCreateInfo allocInfo { .usage = VMA_MEMORY_USAGE_GPU_ONLY };
-    vmaCreateBuffer(mAllocator, &bufferInfo, &allocInfo, &mGpuBuffer, &mGpuMemory, nullptr);
-}
-
-VulkanBuffer::~VulkanBuffer() {
-    vmaDestroyBuffer(mAllocator, mGpuBuffer, mGpuMemory);
-}
-
-void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint32_t byteOffset,
-        uint32_t numBytes) {
-    VulkanStage const* stage = mStagePool.acquireStage(numBytes);
-    void* mapped;
-    vmaMapMemory(mAllocator, stage->memory, &mapped);
-    memcpy(mapped, cpuData, numBytes);
-    vmaUnmapMemory(mAllocator, stage->memory);
-    vmaFlushAllocation(mAllocator, stage->memory, 0, numBytes);
+void VulkanBufferProxy::loadFromCpu(VulkanCommandBuffer& commands, const void* cpuData,
+        uint32_t byteOffset, uint32_t numBytes) {
+    // Note: this should be stored within the command buffer before going out of
+    // scope, so that the command buffer can manage its lifecycle.
+    fvkmemory::resource_ptr<VulkanStage::Segment> stage = mStagePool.acquireStage(numBytes);
+    assert_invariant(stage->memory());
+    commands.acquire(stage);
+    memcpy(stage->mapping(), cpuData, numBytes);
+    vmaFlushAllocation(mAllocator, stage->memory(), stage->offset(), numBytes);

    // If there was a previous update, then we need to make sure the following write is properly
    // synced with the previous read.
@@ -64,13 +50,13 @@ void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint
            (byteOffset >= mUpdatedOffset && byteOffset <= (mUpdatedOffset + mUpdatedBytes))) {
        VkAccessFlags srcAccess = 0;
        VkPipelineStageFlags srcStage = 0;
-        if (mUsage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) {
+        if (getUsage() == VulkanBufferUsage::UNIFORM) {
            srcAccess = VK_ACCESS_SHADER_READ_BIT;
            srcStage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
-        } else if (mUsage & VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) {
+        } else if (getUsage() == VulkanBufferUsage::VERTEX) {
            srcAccess = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
            srcStage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
-        } else if (mUsage & VK_BUFFER_USAGE_INDEX_BUFFER_BIT) {
+        } else if (getUsage() == VulkanBufferUsage::INDEX) {
            srcAccess = VK_ACCESS_INDEX_READ_BIT;
            srcStage = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
        }
@@ -81,20 +67,20 @@ void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint
            .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .buffer = mGpuBuffer,
+            .buffer = getVkBuffer(),
            .offset = byteOffset,
            .size = numBytes,
        };
-        vkCmdPipelineBarrier(cmdbuf, srcStage, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1,
-                &barrier, 0, nullptr);
+        vkCmdPipelineBarrier(commands.buffer(), srcStage, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0,
+                nullptr, 1, &barrier, 0, nullptr);
    }

    VkBufferCopy region = {
-        .srcOffset = 0,
+        .srcOffset = stage->offset(),
        .dstOffset = byteOffset,
        .size = numBytes,
    };
-    vkCmdCopyBuffer(cmdbuf, stage->buffer, mGpuBuffer, 1, &region);
+    vkCmdCopyBuffer(commands.buffer(), stage->buffer(), getVkBuffer(), 1, &region);

    mUpdatedOffset = byteOffset;
    mUpdatedBytes = numBytes;
@@ -106,16 +92,16 @@ void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint
    VkAccessFlags dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;

-    if (mUsage & VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) {
+    if (getUsage() == VulkanBufferUsage::VERTEX) {
        dstAccessMask |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
        dstStageMask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
-    } else if (mUsage & VK_BUFFER_USAGE_INDEX_BUFFER_BIT) {
+    } else if (getUsage() == VulkanBufferUsage::INDEX) {
        dstAccessMask |= VK_ACCESS_INDEX_READ_BIT;
        dstStageMask |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
-    } else if (mUsage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) {
+    } else if (getUsage() == VulkanBufferUsage::UNIFORM) {
        dstAccessMask |= VK_ACCESS_SHADER_READ_BIT;
        dstStageMask |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
-    } else if (mUsage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) {
+    } else if (getUsage() == VulkanBufferUsage::SHADER_STORAGE) {
        // TODO: implement me
    }

@@ -125,13 +111,21 @@ void VulkanBuffer::loadFromCpu(VkCommandBuffer cmdbuf, const void* cpuData, uint
        .dstAccessMask = dstAccessMask,
        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-        .buffer = mGpuBuffer,
+        .buffer = getVkBuffer(),
        .offset = byteOffset,
        .size = numBytes,
    };

-    vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, dstStageMask, 0, 0, nullptr, 1,
-            &barrier, 0, nullptr);
+    vkCmdPipelineBarrier(commands.buffer(), VK_PIPELINE_STAGE_TRANSFER_BIT, dstStageMask, 0, 0,
+            nullptr, 1, &barrier, 0, nullptr);
 }

-} // namespace filament::backend
+VkBuffer VulkanBufferProxy::getVkBuffer() const noexcept {
+    return mBuffer->getGpuBuffer()->vkbuffer;
+}
+
+VulkanBufferUsage VulkanBufferProxy::getUsage() const noexcept {
+    return mBuffer->getGpuBuffer()->usage;
+}
+
+}// namespace filament::backend
--- a/filament/backend/src/vulkan/VulkanBufferProxy.h
+++ b/filament/backend/src/vulkan/VulkanBufferProxy.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_VULKANBUFFERPROXY_H
+#define TNT_FILAMENT_BACKEND_VULKANBUFFERPROXY_H
+
+#include "VulkanBufferCache.h"
+#include "VulkanCommands.h"
+#include "VulkanContext.h"
+#include "VulkanMemory.h"
+#include "VulkanStagePool.h"
+
+namespace filament::backend {
+
+// This class acts as a dynamic wrapper for a `VulkanBuffer`. It allows you to modify the
+// `VulkanBuffer` it references at runtime, wihtout affecting any external objects.
+class VulkanBufferProxy {
+public:
+    VulkanBufferProxy(VmaAllocator allocator, VulkanStagePool& stagePool,
+            VulkanBufferCache& bufferCache, VulkanBufferUsage usage, uint32_t numBytes);
+
+    void loadFromCpu(VulkanCommandBuffer& commands, const void* cpuData, uint32_t byteOffset,
+            uint32_t numBytes);
+
+    VkBuffer getVkBuffer() const noexcept;
+
+    VulkanBufferUsage getUsage() const noexcept;
+
+private:
+    VmaAllocator mAllocator;
+    VulkanStagePool& mStagePool;
+    VulkanBufferCache& mBufferCache;
+
+    fvkmemory::resource_ptr<VulkanBuffer> mBuffer;
+    uint32_t mUpdatedOffset = 0;
+    uint32_t mUpdatedBytes = 0;
+};
+
+}// namespace filament::backend
+
+#endif// TNT_FILAMENT_BACKEND_VULKANBUFFERPROXY_H
--- a/filament/backend/src/vulkan/VulkanConstants.h
+++ b/filament/backend/src/vulkan/VulkanConstants.h
@@ -81,6 +81,8 @@
 // All other debug features must be disabled.
 #define FVK_DEBUG_PROFILING               0x00040000

+#define FVK_DEBUG_VULKAN_BUFFER_CACHE     0x00080000
+
 // Useful default combinations
 #define FVK_DEBUG_EVERYTHING              (0xFFFFFFFF & ~FVK_DEBUG_PROFILING)
 #define FVK_DEBUG_PERFORMANCE     \
--- a/filament/backend/src/vulkan/VulkanContext.h
+++ b/filament/backend/src/vulkan/VulkanContext.h
@@ -71,23 +71,23 @@ struct VulkanRenderPass {
 struct VulkanContext {
 public:
    static uint32_t selectMemoryType(VkPhysicalDeviceMemoryProperties const& memoryProperties,
-            uint32_t flags, VkFlags reqs) {
+            uint32_t types, VkFlags reqs) {
        for (uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; i++) {
-            if (flags & 1) {
+            if (types & 1) {
                if ((memoryProperties.memoryTypes[i].propertyFlags & reqs) == reqs) {
                    return i;
                }
            }
-            flags >>= 1;
+            types >>= 1;
        }
        return (uint32_t) VK_MAX_MEMORY_TYPES;
    }

-    inline uint32_t selectMemoryType(uint32_t flags, VkFlags reqs) const {
+    inline uint32_t selectMemoryType(uint32_t types, VkFlags reqs) const {
        if ((reqs & VK_MEMORY_PROPERTY_PROTECTED_BIT) != 0) {
            assert_invariant(isProtectedMemorySupported());
        }
-        return selectMemoryType(mMemoryProperties, flags, reqs);
+        return selectMemoryType(mMemoryProperties, types, reqs);
    }

    inline fvkutils::VkFormatList const& getAttachmentDepthStencilFormats() const {
--- a/filament/backend/src/vulkan/VulkanDescriptorSetCache.cpp
+++ b/filament/backend/src/vulkan/VulkanDescriptorSetCache.cpp
@@ -320,7 +320,7 @@ void VulkanDescriptorSetCache::updateBuffer(fvkmemory::resource_ptr<VulkanDescri
        uint8_t binding, fvkmemory::resource_ptr<VulkanBufferObject> bufferObject,
        VkDeviceSize offset, VkDeviceSize size) noexcept {
    VkDescriptorBufferInfo const info = {
-        .buffer = bufferObject->buffer.getGpuBuffer(),
+        .buffer = bufferObject->buffer.getVkBuffer(),
        .offset = offset,
        .range = size,
    };
--- a/filament/backend/src/vulkan/VulkanDriver.cpp
+++ b/filament/backend/src/vulkan/VulkanDriver.cpp
@@ -19,7 +19,8 @@
 #include "CommandStreamDispatcher.h"
 #include "SystraceProfile.h"
 #include "VulkanAsyncHandles.h"
-#include "VulkanBuffer.h"
+#include "VulkanBufferCache.h"
+#include "VulkanBufferProxy.h"
 #include "VulkanCommands.h"
 #include "VulkanDriverFactory.h"
 #include "VulkanHandles.h"
@@ -84,7 +85,11 @@ VmaAllocator createAllocator(VkInstance instance, VkPhysicalDevice physicalDevic
        .vkGetImageMemoryRequirements2KHR = vkGetImageMemoryRequirements2KHR
 #endif
    };
-    VmaAllocatorCreateInfo const allocatorInfo {
+    VmaAllocatorCreateInfo const allocatorInfo{
+        // Disable the internal VMA synchronization because the backend is singled threaded.
+        // Improve CPU performance when using VMA functions. The backend will guarantee that all
+        // access to VMA is done in a thread safe way.
+        .flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT,
        .physicalDevice = physicalDevice,
        .device = device,
        .pVulkanFunctions = &funcs,
@@ -209,7 +214,8 @@ VulkanDriver::VulkanDriver(VulkanPlatform* platform, VulkanContext const& contex
              mPlatform->getProtectedGraphicsQueueFamilyIndex(), &mContext),
      mPipelineLayoutCache(mPlatform->getDevice()),
      mPipelineCache(mPlatform->getDevice()),
-      mStagePool(mAllocator, &mCommands),
+      mStagePool(mAllocator, &mResourceManager, &mCommands, &mContext.getPhysicalDeviceLimits()),
+      mBufferCache(context, mResourceManager, mAllocator),
      mFramebufferCache(mPlatform->getDevice()),
      mYcbcrConversionCache(mPlatform->getDevice()),
      mSamplerCache(mPlatform->getDevice()),
@@ -328,7 +334,6 @@ void VulkanDriver::terminate() {
    // descriptorSetLayoutCache
    mExternalImageManager.terminate();

-    mStagePool.terminate();
    mPipelineCache.terminate();
    mFramebufferCache.terminate();
    mSamplerCache.terminate();
@@ -339,6 +344,15 @@ void VulkanDriver::terminate() {
    // Before terminating ResourceManager, we must make sure all of the resource_ptrs have been unset.
    mResourceManager.terminate();

+    // Must come after `mResourceManager`.
+    // Before terminating the memory pool, we must make sure all the VulkanBufferMemory are yielded
+    // back to the pool.
+    mBufferCache.terminate();
+
+    // Before terminating stagePool, we need all resources to have been
+    // reclaimed, as they perform cleanup within the stage pool.
+    mStagePool.terminate();
+
 #if FVK_ENABLED(FVK_DEBUG_RESOURCE_LEAK)
    mResourceManager.print();
 #endif
@@ -371,6 +385,7 @@ void VulkanDriver::collectGarbage() {
    mCommands.gc();
    mDescriptorSetCache.gc();
    mStagePool.gc();
+    mBufferCache.gc();
    mFramebufferCache.gc();
    mPipelineCache.gc();

@@ -514,7 +529,7 @@ void VulkanDriver::createIndexBufferR(Handle<HwIndexBuffer> ibh, ElementType ele
    FVK_SYSTRACE_SCOPE();
    auto elementSize = (uint8_t) getElementTypeSize(elementType);
    auto ib = resource_ptr<VulkanIndexBuffer>::make(&mResourceManager, ibh, mAllocator, mStagePool,
-            elementSize, indexCount);
+            mBufferCache, elementSize, indexCount);
    ib.inc();
 }

@@ -531,7 +546,7 @@ void VulkanDriver::createBufferObjectR(Handle<HwBufferObject> boh, uint32_t byte
        BufferObjectBinding bindingType, BufferUsage usage) {
    FVK_SYSTRACE_SCOPE();
    auto bo = resource_ptr<VulkanBufferObject>::make(&mResourceManager, boh, mAllocator, mStagePool,
-            byteCount, bindingType);
+            mBufferCache, byteCount, bindingType);
    bo.inc();
 }

@@ -1223,7 +1238,7 @@ void VulkanDriver::updateIndexBuffer(Handle<HwIndexBuffer> ibh, BufferDescriptor
    VulkanCommandBuffer& commands = mCommands.get();
    auto ib = resource_ptr<VulkanIndexBuffer>::cast(&mResourceManager, ibh);
    commands.acquire(ib);
-    ib->buffer.loadFromCpu(commands.buffer(), p.buffer, byteOffset, p.size);
+    ib->buffer.loadFromCpu(commands, p.buffer, byteOffset, p.size);

    scheduleDestroy(std::move(p));
 }
@@ -1238,7 +1253,7 @@ void VulkanDriver::updateBufferObject(Handle<HwBufferObject> boh, BufferDescript

    auto bo = resource_ptr<VulkanBufferObject>::cast(&mResourceManager, boh);
    commands.acquire(bo);
-    bo->buffer.loadFromCpu(commands.buffer(), bd.buffer, byteOffset, bd.size);
+    bo->buffer.loadFromCpu(commands, bd.buffer, byteOffset, bd.size);

    scheduleDestroy(std::move(bd));
 }
@@ -1249,7 +1264,7 @@ void VulkanDriver::updateBufferObjectUnsynchronized(Handle<HwBufferObject> boh,
    auto bo = resource_ptr<VulkanBufferObject>::cast(&mResourceManager, boh);
    commands.acquire(bo);
    // TODO: implement unsynchronized version
-    bo->buffer.loadFromCpu(commands.buffer(), bd.buffer, byteOffset, bd.size);
+    bo->buffer.loadFromCpu(commands, bd.buffer, byteOffset, bd.size);
    scheduleDestroy(std::move(bd));
 }

@@ -1622,8 +1637,8 @@ void VulkanDriver::readPixels(Handle<HwRenderTarget> src, uint32_t x, uint32_t y
    mReadPixels.run(
            srcTarget, x, y, width, height, mPlatform->getGraphicsQueueFamilyIndex(),
            std::move(pbd),
-            [&context = mContext](uint32_t reqs, VkFlags flags) {
-                return context.selectMemoryType(reqs, flags);
+            [&context = mContext](uint32_t types, VkFlags reqs) {
+                return context.selectMemoryType(types, reqs);
            },
            [this](PixelBufferDescriptor&& pbd) {
                scheduleDestroy(std::move(pbd));
@@ -1898,7 +1913,7 @@ void VulkanDriver::bindRenderPrimitive(Handle<HwRenderPrimitive> rph) {
    // avoid rebinding these if they are already bound, but since we do not (yet) support subranges
    // it would be rare for a client to make consecutive draw calls with the same render primitive.
    vkCmdBindVertexBuffers(cmdbuffer, 0, bufferCount, buffers, offsets);
-    vkCmdBindIndexBuffer(cmdbuffer, prim->indexBuffer->buffer.getGpuBuffer(), 0,
+    vkCmdBindIndexBuffer(cmdbuffer, prim->indexBuffer->buffer.getVkBuffer(), 0,
            prim->indexBuffer->indexType);
 }

--- a/filament/backend/src/vulkan/VulkanDriver.h
+++ b/filament/backend/src/vulkan/VulkanDriver.h
@@ -18,15 +18,17 @@
 #define TNT_FILAMENT_BACKEND_VULKANDRIVER_H

 #include "VulkanBlitter.h"
+#include "VulkanBufferCache.h"
 #include "VulkanConstants.h"
 #include "VulkanContext.h"
 #include "VulkanFboCache.h"
 #include "VulkanHandles.h"
+#include "VulkanMemory.h"
 #include "VulkanPipelineCache.h"
+#include "VulkanQueryManager.h"
 #include "VulkanReadPixels.h"
 #include "VulkanSamplerCache.h"
 #include "VulkanStagePool.h"
-#include "VulkanQueryManager.h"
 #include "VulkanYcbcrConversionCache.h"
 #include "vulkan/VulkanDescriptorSetCache.h"
 #include "vulkan/VulkanDescriptorSetLayoutCache.h"
@@ -138,6 +140,7 @@ private:
    VulkanPipelineLayoutCache mPipelineLayoutCache;
    VulkanPipelineCache mPipelineCache;
    VulkanStagePool mStagePool;
+    VulkanBufferCache mBufferCache;
    VulkanFboCache mFramebufferCache;
    VulkanYcbcrConversionCache mYcbcrConversionCache;
    VulkanSamplerCache mSamplerCache;
--- a/filament/backend/src/vulkan/VulkanHandles.cpp
+++ b/filament/backend/src/vulkan/VulkanHandles.cpp
@@ -583,22 +583,22 @@ void VulkanVertexBuffer::setBuffer(fvkmemory::resource_ptr<VulkanBufferObject> b
    int8_t const* const attribToBuffer = vbi->getAttributeToBuffer();
    for (uint8_t attribIndex = 0; attribIndex < count; attribIndex++) {
        if (attribToBuffer[attribIndex] == static_cast<int8_t>(index)) {
-            vkbuffers[attribIndex] = bufferObject->buffer.getGpuBuffer();
+            vkbuffers[attribIndex] = bufferObject->buffer.getVkBuffer();
        }
    }
    mResources.push_back(bufferObject);
 }

 VulkanBufferObject::VulkanBufferObject(VmaAllocator allocator, VulkanStagePool& stagePool,
-        uint32_t byteCount, BufferObjectBinding bindingType)
+        VulkanBufferCache& bufferCache, uint32_t byteCount, BufferObjectBinding bindingType)
    : HwBufferObject(byteCount),
-      buffer(allocator, stagePool, getBufferObjectUsage(bindingType), byteCount),
+      buffer(allocator, stagePool, bufferCache, getBufferObjectUsage(bindingType), byteCount),
      bindingType(bindingType) {}

 VulkanRenderPrimitive::VulkanRenderPrimitive(PrimitiveType pt,
        fvkmemory::resource_ptr<VulkanVertexBuffer> vb,
        fvkmemory::resource_ptr<VulkanIndexBuffer> ib)
-    : HwRenderPrimitive{.type = pt},
+    : HwRenderPrimitive{ .type = pt },
      vertexBuffer(vb),
      indexBuffer(ib) {}

--- a/filament/backend/src/vulkan/VulkanHandles.h
+++ b/filament/backend/src/vulkan/VulkanHandles.h
@@ -21,20 +21,21 @@
 #include "DriverBase.h"

 #include "VulkanAsyncHandles.h"
-#include "VulkanBuffer.h"
+#include "VulkanBufferCache.h"
+#include "VulkanBufferProxy.h"
 #include "VulkanFboCache.h"
 #include "VulkanSwapChain.h"
 #include "VulkanTexture.h"
 #include "vulkan/memory/Resource.h"
-#include "vulkan/utils/StaticVector.h"
 #include "vulkan/utils/Definitions.h"
+#include "vulkan/utils/StaticVector.h"

 #include <backend/Program.h>

-#include <utils/bitset.h>
 #include <utils/FixedCapacityVector.h>
 #include <utils/Mutex.h>
 #include <utils/StructureOfArrays.h>
+#include <utils/bitset.h>

 #include <array>

@@ -428,21 +429,22 @@ private:
 };

 struct VulkanIndexBuffer : public HwIndexBuffer, fvkmemory::Resource {
-    VulkanIndexBuffer(VmaAllocator allocator, VulkanStagePool& stagePool, uint8_t elementSize,
-            uint32_t indexCount)
+    VulkanIndexBuffer(VmaAllocator allocator, VulkanStagePool& stagePool,
+            VulkanBufferCache& bufferCache, uint8_t elementSize, uint32_t indexCount)
        : HwIndexBuffer(elementSize, indexCount),
-          buffer(allocator, stagePool, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, elementSize * indexCount),
+          buffer(allocator, stagePool, bufferCache, VulkanBufferUsage::INDEX,
+                  elementSize * indexCount),
          indexType(elementSize == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32) {}

-    VulkanBuffer buffer;
+    VulkanBufferProxy buffer;
    const VkIndexType indexType;
 };

 struct VulkanBufferObject : public HwBufferObject, fvkmemory::Resource {
-    VulkanBufferObject(VmaAllocator allocator, VulkanStagePool& stagePool, uint32_t byteCount,
-            BufferObjectBinding bindingType);
+    VulkanBufferObject(VmaAllocator allocator, VulkanStagePool& stagePool,
+            VulkanBufferCache& bufferCache, uint32_t byteCount, BufferObjectBinding bindingType);

-    VulkanBuffer buffer;
+    VulkanBufferProxy buffer;
    const BufferObjectBinding bindingType;
 };

@@ -455,18 +457,19 @@ struct VulkanRenderPrimitive : public HwRenderPrimitive, fvkmemory::Resource {
    fvkmemory::resource_ptr<VulkanIndexBuffer> indexBuffer;
 };

-inline constexpr VkBufferUsageFlagBits getBufferObjectUsage(
-        BufferObjectBinding bindingType) noexcept {
-    switch(bindingType) {
+inline constexpr VulkanBufferUsage getBufferObjectUsage(BufferObjectBinding bindingType) noexcept {
+    switch (bindingType) {
        case BufferObjectBinding::VERTEX:
-            return VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+            return VulkanBufferUsage::VERTEX;
        case BufferObjectBinding::UNIFORM:
-            return VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+            return VulkanBufferUsage::UNIFORM;
        case BufferObjectBinding::SHADER_STORAGE:
-            return VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-        // when adding more buffer-types here, make sure to update VulkanBuffer::loadFromCpu()
-        // if necessary.
+            return VulkanBufferUsage::SHADER_STORAGE;
+            // when adding more buffer-types here, make sure to update VulkanBuffer::loadFromCpu()
+            // if necessary.
    }
+
+    return VulkanBufferUsage::UNKNOWN;
 }

 } // namespace filament::backend
--- a/filament/backend/src/vulkan/VulkanMemory.h
+++ b/filament/backend/src/vulkan/VulkanMemory.h
@@ -37,4 +37,24 @@ VK_DEFINE_HANDLE(VmaAllocator)
 VK_DEFINE_HANDLE(VmaAllocation)
 VK_DEFINE_HANDLE(VmaPool)

+namespace filament::backend {
+
+enum class VulkanBufferUsage : uint8_t {
+    UNKNOWN,
+    VERTEX,
+    INDEX,
+    UNIFORM,
+    SHADER_STORAGE,
+};
+
+struct VulkanGpuBuffer {
+    VkBuffer vkbuffer = VK_NULL_HANDLE;
+    VmaAllocation vmaAllocation = VK_NULL_HANDLE;
+    VmaAllocationInfo allocationInfo;
+    uint32_t numBytes = 0;
+    VulkanBufferUsage usage = VulkanBufferUsage::UNKNOWN;
+};
+
+} // namespace filament::backend
+
 #endif // TNT_FILAMENT_BACKEND_VULKANMEMORY_H
--- a/filament/backend/src/vulkan/VulkanReadPixels.cpp
+++ b/filament/backend/src/vulkan/VulkanReadPixels.cpp
@@ -143,7 +143,7 @@ void VulkanReadPixels::run(fvkmemory::resource_ptr<VulkanRenderTarget> srcTarget
        mTaskHandler = std::make_unique<TaskHandler>();
    }

-    VkCommandPool& cmdpool = mCommandPool;
+    VkCommandPool const cmdpool = mCommandPool;

    fvkmemory::resource_ptr<VulkanTexture> srcTexture = srcTarget->getColor0().texture;
    assert_invariant(srcTexture);
@@ -152,17 +152,17 @@ void VulkanReadPixels::run(fvkmemory::resource_ptr<VulkanRenderTarget> srcTarget
            = srcFormat == VK_FORMAT_B8G8R8A8_UNORM || srcFormat == VK_FORMAT_B8G8R8A8_SRGB;

    // Create a host visible, linearly tiled image as a staging area.
-    VkImageCreateInfo const imageInfo{
-            .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
-            .imageType = VK_IMAGE_TYPE_2D,
-            .format = srcFormat,
-            .extent = {width, height, 1},
-            .mipLevels = 1,
-            .arrayLayers = 1,
-            .samples = VK_SAMPLE_COUNT_1_BIT,
-            .tiling = VK_IMAGE_TILING_LINEAR,
-            .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
-            .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+    VkImageCreateInfo const imageInfo = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .imageType = VK_IMAGE_TYPE_2D,
+        .format = srcFormat,
+        .extent = { width, height, 1 },
+        .mipLevels = 1,
+        .arrayLayers = 1,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .tiling = VK_IMAGE_TILING_LINEAR,
+        .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
    };

    VkImage stagingImage;
@@ -196,20 +196,20 @@ void VulkanReadPixels::run(fvkmemory::resource_ptr<VulkanRenderTarget> srcTarget
            << "VulkanReadPixels: unable to find a memory type that meets requirements.";

    VkMemoryAllocateInfo const allocInfo = {
-            .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
-            .allocationSize = memReqs.size,
-            .memoryTypeIndex = memoryTypeIndex,
+        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+        .allocationSize = memReqs.size,
+        .memoryTypeIndex = memoryTypeIndex,
    };

    vkAllocateMemory(device, &allocInfo, VKALLOC, &stagingMemory);
    vkBindImageMemory(device, stagingImage, stagingMemory, 0);

    VkCommandBuffer cmdbuffer;
-    VkCommandBufferAllocateInfo const allocateInfo{
-            .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
-            .commandPool = cmdpool,
-            .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
-            .commandBufferCount = 1,
+    VkCommandBufferAllocateInfo const allocateInfo = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+        .commandPool = cmdpool,
+        .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+        .commandBufferCount = 1,
    };
    vkAllocateCommandBuffers(device, &allocateInfo, &cmdbuffer);

@@ -304,7 +304,6 @@ void VulkanReadPixels::run(fvkmemory::resource_ptr<VulkanRenderTarget> srcTarget
                                 cmdpool, cmdbuffer, pUserBuffer,
                                 fence = readCompleteFence]() mutable {
        VkResult status = vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX);
-        // Fence hasn't been reached. Try waiting again.
        if (status != VK_SUCCESS) {
            FVK_LOGE << "Failed to wait for readPixels fence" << utils::io::endl;
            return;
--- a/filament/backend/src/vulkan/VulkanStagePool.cpp
+++ b/filament/backend/src/vulkan/VulkanStagePool.cpp
@@ -28,46 +28,111 @@ static constexpr uint32_t TIME_BEFORE_EVICTION = 3;

 namespace filament::backend {

-VulkanStagePool::VulkanStagePool(VmaAllocator allocator, VulkanCommands* commands)
-    : mAllocator(allocator),
-      mCommands(commands) {}
+namespace {

-VulkanStage const* VulkanStagePool::acquireStage(uint32_t numBytes) {
-    // First check if a stage exists whose capacity is greater than or equal to the requested size.
-    auto iter = mFreeStages.lower_bound(numBytes);
-    if (iter != mFreeStages.end()) {
-        auto stage = iter->second;
-        mFreeStages.erase(iter);
-        stage->lastAccessed = mCurrentFrame;
-        mUsedStages.push_back(stage);
-        return stage;
-    }
-    // We were not able to find a sufficiently large stage, so create a new one.
-    VulkanStage* stage = new VulkanStage({
-        .memory = VK_NULL_HANDLE,
-        .buffer = VK_NULL_HANDLE,
-        .capacity = numBytes,
-        .lastAccessed = mCurrentFrame,
+// Note: these are temporary values, they will be configurable.
+static constexpr uint32_t MAX_EMPTY_STAGES_TO_RETAIN = 1;
+constexpr uint32_t STAGE_SIZE = 1048576;
+
+}// namespace
+
+fvkmemory::resource_ptr<VulkanStage::Segment> VulkanStage::acquireSegment(
+        fvkmemory::ResourceManager* resManager, uint32_t numBytes) {
+    auto segment = fvkmemory::resource_ptr<Segment>::construct(
+        resManager, this, numBytes, mCurrentOffset, [this](uint32_t offset) {
+            mSegments.erase(offset);
    });
+    mSegments.insert({mCurrentOffset, segment.get()});
+    mCurrentOffset += numBytes;
+    return segment;
+}

-    // Create the VkBuffer.
-    mUsedStages.push_back(stage);
-    VkBufferCreateInfo bufferInfo {
+VulkanStagePool::VulkanStagePool(VmaAllocator allocator, fvkmemory::ResourceManager* resManager,
+        VulkanCommands* commands, const VkPhysicalDeviceLimits* deviceLimits)
+    : mAllocator(allocator),
+      mResManager(resManager),
+      mCommands(commands),
+      mDeviceLimits(deviceLimits) {}
+
+fvkmemory::resource_ptr<VulkanStage::Segment> VulkanStagePool::acquireStage(uint32_t numBytes) {
+    // Apply alignment to the byte count to ensure that, when we later flush
+    // data written by the host, we only flush the atoms that we modified, and
+    // no adjacent atoms.
+    numBytes = alignToNonCoherentAtomSize(numBytes);
+
+    // First check if a stage segment exists whose capacity is greater than or
+    // equal to the requested size.
+    auto iter = mStages.lower_bound(numBytes);
+
+    VulkanStage* pStage;
+    if (iter != mStages.end()) {
+        pStage = iter->second;
+        mStages.erase(iter);
+    } else {
+        pStage = allocateNewStage(std::max(numBytes, STAGE_SIZE));
+    }
+
+    // Note: this allocation updates `currentOffset` and `segments` within
+    // the parent stage. When destroyed, it will update `segments`.
+    fvkmemory::resource_ptr<VulkanStage::Segment> pSegment = pStage->acquireSegment(mResManager, numBytes);
+
+    // Update the stage's metadata, and reinsert it with the remaining segment
+    // capacity.
+    uint32_t spaceRemaining = pStage->capacity() - pStage->currentOffset();
+    mStages.insert({ spaceRemaining, pStage });
+
+    return pSegment;
+}
+
+uint32_t VulkanStagePool::alignToNonCoherentAtomSize(uint32_t bytes) {
+    VkDeviceSize alignment = mDeviceLimits->nonCoherentAtomSize;
+    if (alignment == 0) {
+        return bytes;
+    }
+
+    uint32_t remainder = bytes % alignment;
+    return remainder == 0 ? bytes : bytes + (alignment - remainder);
+}
+
+VulkanStage* VulkanStagePool::allocateNewStage(uint32_t capacity) {
+    VkBufferCreateInfo bufferInfo{
        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-        .size = numBytes,
+        .size = alignToNonCoherentAtomSize(capacity),
        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
    };
    VmaAllocationCreateInfo allocInfo { .usage = VMA_MEMORY_USAGE_CPU_ONLY };
-    UTILS_UNUSED_IN_RELEASE VkResult result = vmaCreateBuffer(mAllocator, &bufferInfo,
-            &allocInfo, &stage->buffer, &stage->memory, nullptr);
+    VkBuffer buffer;
+    VmaAllocation memory;
+    VkResult result =
+            vmaCreateBuffer(mAllocator, &bufferInfo, &allocInfo, &buffer, &memory, nullptr);

 #if FVK_ENABLED(FVK_DEBUG_STAGING_ALLOCATION)
    if (result != VK_SUCCESS) {
        FVK_LOGE << "Allocation error: " << result << utils::io::endl;
+    } else {
+        FVK_LOGD << "Allocated stage with hndl " << buffer << utils::io::endl;
    }
 #endif

-    return stage;
+    void* pMapping = nullptr;
+    if (result == VK_SUCCESS) {
+        result = vmaMapMemory(mAllocator, memory, &pMapping);
+
+#if FVK_ENABLED(FVK_DEBUG_STAGING_ALLOCATION)
+        if (result != VK_SUCCESS) {
+            FVK_LOGE << "Memory mapping erryr: " << result << utils::io::endl;
+        }
+#endif
+    }
+
+    return new VulkanStage(memory, buffer, capacity, pMapping);
+}
+
+void VulkanStagePool::destroyStage(VulkanStage const*&& stage) {
+    assert(stage->isSafeToReset());  // Ensure all segments have been reset already.
+    vmaUnmapMemory(mAllocator, stage->memory());
+    vmaDestroyBuffer(mAllocator, stage->buffer(), stage->memory());
+    delete stage;
 }

 VulkanStageImage const* VulkanStagePool::acquireImage(PixelDataFormat format, PixelDataType type,
@@ -141,27 +206,34 @@ void VulkanStagePool::gc() noexcept {
    }
    const uint64_t evictionTime = mCurrentFrame - TIME_BEFORE_EVICTION;

-    // Destroy buffers that have not been used for several frames.
-    decltype(mFreeStages) freeStages;
-    freeStages.swap(mFreeStages);
-    for (auto pair : freeStages) {
-        if (pair.second->lastAccessed < evictionTime) {
-            vmaDestroyBuffer(mAllocator, pair.second->buffer, pair.second->memory);
-            delete pair.second;
-        } else {
-            mFreeStages.insert(pair);
-        }
-    }
+    decltype(mStages) freeStages;
+    freeStages.swap(mStages);
+    uint8_t freeStageCount = 0;  // Assuming we'll never have > 255 free stages
+    for (auto& pair : freeStages) {
+        // First, find any stages that have no segments within them.
+        if (pair.second->isSafeToReset()) {
+            if (++freeStageCount > MAX_EMPTY_STAGES_TO_RETAIN) {
+#if FVK_ENABLED(FVK_DEBUG_STAGING_ALLOCATION)
+                FVK_LOGD << "Destroying a staging buffer with hndl " << pair.second->buffer()
+                         << utils::io::endl;
+#endif
+                destroyStage(std::move(pair.second));
+                continue;
+            }

-    // Reclaim buffers that are no longer being used by any command buffer.
-    decltype(mUsedStages) usedStages;
-    usedStages.swap(mUsedStages);
-    for (auto stage : usedStages) {
-        if (stage->lastAccessed < evictionTime) {
-            stage->lastAccessed = mCurrentFrame;
-            mFreeStages.insert(std::make_pair(stage->capacity, stage));
+#if FVK_ENABLED(FVK_DEBUG_STAGING_ALLOCATION)
+            if (pair.first == 0) {
+                FVK_LOGD << "Recycling an unused staging buffer with hndl " << pair.second->buffer()
+                         << utils::io::endl;
+            }
+#endif
+
+            // Note - this segment is free, make sure the structure is cleared
+            // and reinsert it into our free stage list.
+            pair.second->reset();
+            mStages.insert({ pair.second->capacity(), pair.second });
        } else {
-            mUsedStages.push_back(stage);
+            mStages.insert(pair);
        }
    }

@@ -192,17 +264,10 @@ void VulkanStagePool::gc() noexcept {
 }

 void VulkanStagePool::terminate() noexcept {
-    for (auto stage : mUsedStages) {
-        vmaDestroyBuffer(mAllocator, stage->buffer, stage->memory);
-        delete stage;
+    for (auto& pair : mStages) {
+        destroyStage(std::move(pair.second));
    }
-    mUsedStages.clear();
-
-    for (auto pair : mFreeStages) {
-        vmaDestroyBuffer(mAllocator, pair.second->buffer, pair.second->memory);
-        delete pair.second;
-    }
-    mFreeStages.clear();
+    mStages.clear();

    for (auto image : mUsedImages) {
        vmaDestroyImage(mAllocator, image->image, image->memory);
--- a/filament/backend/src/vulkan/VulkanStagePool.h
+++ b/filament/backend/src/vulkan/VulkanStagePool.h
@@ -17,8 +17,11 @@
 #ifndef TNT_FILAMENT_BACKEND_VULKANSTAGEPOOL_H
 #define TNT_FILAMENT_BACKEND_VULKANSTAGEPOOL_H

-#include "backend/DriverEnums.h"
 #include "VulkanMemory.h"
+#include "backend/DriverEnums.h"
+#include "vulkan/memory/Resource.h"
+#include "vulkan/memory/ResourceManager.h"
+#include "vulkan/memory/ResourcePointer.h"

 #include <map>
 #include <unordered_set>
@@ -28,12 +31,96 @@ namespace filament::backend {

 class VulkanCommands;

-// Immutable POD representing a shared CPU-GPU staging area.
-struct VulkanStage {
-    VmaAllocation memory;
-    VkBuffer buffer;
-    uint32_t capacity;
-    mutable uint64_t lastAccessed;
+// Object representing a shared CPU-GPU staging area, which can be subdivided
+// into smaller buffers as needed.
+class VulkanStage {
+public:
+    VulkanStage(VmaAllocation memory, VkBuffer buffer, uint32_t capacity, void* mapping)
+        : mMemory(memory),
+          mBuffer(buffer),
+          mCapacity(capacity),
+          mMapping(mapping) {}
+
+    ~VulkanStage() = default;
+    VulkanStage(const VulkanStage& other) = delete;
+    VulkanStage(VulkanStage&& other) = delete;
+    VulkanStage& operator=(const VulkanStage& other) = delete;
+    VulkanStage& operator=(VulkanStage&& other) = delete;
+
+    class Segment : public fvkmemory::Resource {
+    public:
+        using OnRecycle = std::function<void(uint32_t offset)>;
+
+        Segment(VulkanStage* parentStage, uint32_t capacity, uint32_t offset,
+                OnRecycle&& onRecycleFn)
+            : mParentStage(parentStage),
+              mCapacity(capacity),
+              mOffset(offset),
+              mOnRecycleFn(onRecycleFn) {}
+
+        ~Segment() {
+            if (mOnRecycleFn) {
+                mOnRecycleFn(offset());
+            }
+        }
+
+        // Should not be copying this around.
+        Segment(const Segment& other) = delete;
+        Segment(Segment&& other) = delete;
+        Segment& operator=(const Segment& other) = delete;
+        Segment& operator=(Segment&& other) = delete;
+
+        inline VulkanStage* parentStage() const { return mParentStage; }
+        inline VkBuffer buffer() const { return parentStage()->buffer(); }
+        inline VmaAllocation memory() const { return parentStage()->memory(); }
+        inline uint32_t capacity() const { return mCapacity; }
+        inline uint32_t offset() const { return mOffset; }
+
+        inline void* mapping() const {
+            return reinterpret_cast<void*>(
+                    reinterpret_cast<char*>(mParentStage->mapping()) + offset());
+        }
+
+    private:
+        // Ensure parent class can access the terminate method.
+        friend class VulkanStage;
+
+        VulkanStage* const mParentStage;
+        const uint32_t mCapacity;
+        const uint32_t mOffset;
+        OnRecycle mOnRecycleFn;
+    };
+
+    inline VmaAllocation memory() const { return mMemory; }
+    inline VkBuffer buffer() const { return mBuffer; }
+    inline uint32_t capacity() const { return mCapacity; }
+    inline void* mapping() const { return mMapping; }
+
+    inline uint32_t currentOffset() { return mCurrentOffset; }
+
+    inline bool isSafeToReset() const { return mSegments.empty(); }
+
+    inline void reset() { mCurrentOffset = 0; }
+
+    // Marks a region of the block as "in-use", and provides information about
+    // the allocated region to the caller. Note: this assumes that numBytes
+    // is aligned to the physical device's nonCoherentAtomSize.
+    fvkmemory::resource_ptr<Segment> acquireSegment(fvkmemory::ResourceManager* resManager,
+            uint32_t numBytes);
+
+private:
+    const VmaAllocation mMemory;
+    const VkBuffer mBuffer;
+    const uint32_t mCapacity;
+
+    void* mMapping;
+
+    uint32_t mCurrentOffset = 0;
+
+    // Maps the start offset of a vulkan stage block to the stage block,
+    // for easy deletions later. This is managed by the blocks themselves, in an
+    // RAII pattern, during construction and destruction.
+    std::unordered_map<uint32_t, Segment*> mSegments;
 };

 struct VulkanStageImage {
@@ -49,11 +136,15 @@ struct VulkanStageImage {
 // This class manages two types of host-mappable staging areas: buffer stages and image stages.
 class VulkanStagePool {
 public:
-    VulkanStagePool(VmaAllocator allocator, VulkanCommands* commands);
+    VulkanStagePool(VmaAllocator allocator, fvkmemory::ResourceManager* resManager,
+            VulkanCommands* commands, const VkPhysicalDeviceLimits* deviceLimits);

-    // Finds or creates a stage whose capacity is at least the given number of bytes.
-    // The stage is automatically released back to the pool after TIME_BEFORE_EVICTION frames.
-    VulkanStage const* acquireStage(uint32_t numBytes);
+    // Finds or creates a stage block whose capacity is at least the given
+    // number of bytes. Internally, creates and manages and subdivides large
+    // buffers so that we have less objects around that we have to keep track
+    // of.
+    // This function is NOT thread-safe.
+    fvkmemory::resource_ptr<VulkanStage::Segment> acquireStage(uint32_t numBytes);

    // Images have VK_IMAGE_LAYOUT_GENERAL and must not be transitioned to any other layout
    VulkanStageImage const* acquireImage(PixelDataFormat format, PixelDataType type,
@@ -64,17 +155,37 @@ public:

    // Destroys all unused stages and asserts that there are no stages currently in use.
    // This should be called while the context's VkDevice is still alive.
+    // Note: it is expected that all resources have been reclaimed before this
+    // is called. It is also expected that this stage pool does not hold any
+    // resource_ptrs, as this would lead to undefined behavior.
    void terminate() noexcept;

 private:
    VmaAllocator mAllocator;
+    fvkmemory::ResourceManager* mResManager;
    VulkanCommands* mCommands;
+    const VkPhysicalDeviceLimits* mDeviceLimits;
+
+    // Takes a number of bytes, and aligns it to the non-coherent atom size.
+    // This allows us to ensure that when we flush buffers from the host, we
+    // never flush more atoms than we need to.
+    uint32_t alignToNonCoherentAtomSize(uint32_t numBytes);
+
+    // Allocates a new stage buffer, and optionally subdivides it into stage
+    // blocks. If subdivideBlocks is true, predefined divisions will be used.
+    // Otherwise, it's expected that capacity is defined to a value, and that
+    // is the size that will be used for the buffer (as well as the only block
+    // being created).
+    VulkanStage* allocateNewStage(uint32_t capacity);
+
+    // Performs any bookkeeping required to delete a VulkanStage object; namely,
+    // unmapping memory, freeing the allocation, and deleting the VulkanStage
+    // object. Note: takes an r-value because after this call, `stage` won't
+    // exist.
+    void destroyStage(VulkanStage const*&& stage);

    // Use an ordered multimap for quick (capacity => stage) lookups using lower_bound().
-    std::multimap<uint32_t, VulkanStage const*> mFreeStages;
-
-    // Simple unordered set for stashing a list of in-use stages that can be reclaimed later.
-    std::vector<VulkanStage const*> mUsedStages;
+    std::multimap<uint32_t, VulkanStage*> mStages;

    std::unordered_set<VulkanStageImage const*> mFreeImages;
    std::vector<VulkanStageImage const*> mUsedImages;
--- a/filament/backend/src/vulkan/VulkanTexture.cpp
+++ b/filament/backend/src/vulkan/VulkanTexture.cpp
@@ -25,6 +25,7 @@
 #include <backend/DriverEnums.h>
 #include <private/backend/BackendUtils.h>

+#include <utils/compiler.h>
 #include <utils/Panic.h>

 using namespace bluevk;
@@ -218,6 +219,40 @@ VkImageUsageFlags getUsage(VulkanContext const& context, uint8_t samples,
    return usage;
 }

+void adjustedMemcpy(void* mapped, PixelBufferDescriptor const& p, size_t width, size_t height,
+        size_t depth) {
+    uint8_t* buf = (uint8_t*) p.buffer;
+    size_t const pixelSize = PixelBufferDescriptor::computeDataSize(p.format, p.type, 1, 1, 1);
+    size_t const pbdStride = p.stride ? p.stride : width;
+
+    // Slow path of copying row by row
+    assert_invariant(pbdStride >= width);
+    if (UTILS_UNLIKELY(p.left > 0 || p.top > 0 || pbdStride > width)) {
+        size_t const pbdRowSize =
+                PixelBufferDescriptor::computeDataSize(p.format, p.type, pbdStride, 1, p.alignment);
+        size_t const pbdHeight = p.size / pixelSize / pbdStride / depth;
+        size_t const pbdLayerSize = pbdRowSize * pbdHeight;
+
+        size_t const rowSize = width * pixelSize;
+        size_t const layerSize = width * height * pixelSize;
+
+        // Size of a row to write
+        size_t const writeSize = std::min(pbdStride - p.left, width) * pixelSize;
+
+        for (size_t z = 0; z < depth; z++) {
+            for (size_t y = p.top; y < pbdHeight; y++) {
+                uint8_t* buf = (uint8_t*) p.buffer +
+                               ((p.left * pixelSize) + (y * pbdRowSize) + (z * pbdLayerSize));
+                uint8_t* curMapped = (uint8_t*) mapped + ((y - p.top) * rowSize + z * layerSize);
+                memcpy(curMapped, buf, writeSize);
+            }
+        }
+    } else {
+        size_t const writeSize = pixelSize * (width * height * depth);
+        memcpy(mapped, buf, writeSize);
+    }
+}
+
 } // anonymous namespace

 VulkanTextureState::VulkanTextureState(VulkanStagePool& stagePool, VulkanCommands* commands,
@@ -480,30 +515,40 @@ void VulkanTexture::updateImage(const PixelBufferDescriptor& data, uint32_t widt
    assert_invariant(hostData->size > 0 && "Data is empty");

    // Otherwise, use vkCmdCopyBufferToImage.
-    void* mapped = nullptr;
-    VulkanStage const* stage = mState->mStagePool.acquireStage(hostData->size);
-    assert_invariant(stage->memory);
-    vmaMapMemory(mState->mAllocator, stage->memory, &mapped);
-    memcpy(mapped, hostData->buffer, hostData->size);
-    vmaUnmapMemory(mState->mAllocator, stage->memory);
-    vmaFlushAllocation(mState->mAllocator, stage->memory, 0, hostData->size);
+    size_t const bpp =
+            PixelBufferDescriptor::computeDataSize(hostData->format, hostData->type, 1, 1, 1);
+    size_t const writeSize = width * height * depth * bpp;
+
+    // Note: the following stageSegment must be stored within the command buffer
+    // before going out of scope, to ensure proper bookkeeping within the
+    // staging buffer pool.
+    fvkmemory::resource_ptr<VulkanStage::Segment> stageSegment =
+            mState->mStagePool.acquireStage(writeSize);
+    assert_invariant(stageSegment->memory());
+    adjustedMemcpy(stageSegment->mapping(), *hostData, width, height, depth);
+    vmaFlushAllocation(mState->mAllocator, stageSegment->memory(), stageSegment->offset(),
+            writeSize);

    VulkanCommandBuffer& commands = mState->mCommands->get();
    VkCommandBuffer const cmdbuf = commands.buffer();
+    commands.acquire(stageSegment);
    commands.acquire(fvkmemory::resource_ptr<VulkanTexture>::cast(this));

+    bool const isDepth = getImageAspect() & VK_IMAGE_ASPECT_DEPTH_BIT;
+
    VkBufferImageCopy copyRegion = {
-        .bufferOffset = {},
+        .bufferOffset = stageSegment->offset(),
        .bufferRowLength = {},
        .bufferImageHeight = {},
        .imageSubresource = {
-            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+            .aspectMask = VkImageAspectFlags(
+                    isDepth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT),
            .mipLevel = miplevel,
            .baseArrayLayer = 0,
-            .layerCount = 1
+            .layerCount = 1,
        },
        .imageOffset = { int32_t(xoffset), int32_t(yoffset), int32_t(zoffset) },
-        .imageExtent = { width, height, depth }
+        .imageExtent = { width, height, depth },
    };

    VkImageSubresourceRange transitionRange = {
@@ -511,7 +556,7 @@ void VulkanTexture::updateImage(const PixelBufferDescriptor& data, uint32_t widt
        .baseMipLevel = miplevel,
        .levelCount = 1,
        .baseArrayLayer = 0,
-        .layerCount = 1
+        .layerCount = 1,
    };

    // Vulkan specifies subregions for 3D textures differently than from 2D arrays.
@@ -536,20 +581,25 @@ void VulkanTexture::updateImage(const PixelBufferDescriptor& data, uint32_t widt

    transitionLayout(&commands, transitionRange, newLayout);

-    vkCmdCopyBufferToImage(cmdbuf, stage->buffer, mState->mTextureImage, newVkLayout, 1, &copyRegion);
+    vkCmdCopyBufferToImage(cmdbuf, stageSegment->buffer(), mState->mTextureImage, newVkLayout, 1,
+            &copyRegion);

    transitionLayout(&commands, transitionRange, nextLayout);
 }

-void VulkanTexture::updateImageWithBlit(const PixelBufferDescriptor& hostData, uint32_t width,
+void VulkanTexture::updateImageWithBlit(const PixelBufferDescriptor& data, uint32_t width,
        uint32_t height, uint32_t depth, uint32_t miplevel) {
+    // Otherwise, use vkCmdCopyBufferToImage.
+    size_t const bpp = PixelBufferDescriptor::computeDataSize(data.format, data.type, 1, 1, 1);
+    size_t const writeSize = width * height * depth * bpp;
+
    void* mapped = nullptr;
    VulkanStageImage const* stage
-            = mState->mStagePool.acquireImage(hostData.format, hostData.type, width, height);
+            = mState->mStagePool.acquireImage(data.format, data.type, width, height);
    vmaMapMemory(mState->mAllocator, stage->memory, &mapped);
-    memcpy(mapped, hostData.buffer, hostData.size);
+    adjustedMemcpy(mapped, data, width, height, depth);
    vmaUnmapMemory(mState->mAllocator, stage->memory);
-    vmaFlushAllocation(mState->mAllocator, stage->memory, 0, hostData.size);
+    vmaFlushAllocation(mState->mAllocator, stage->memory, 0, writeSize);

    VulkanCommandBuffer& commands = mState->mCommands->get();
    VkCommandBuffer const cmdbuf = commands.buffer();
--- a/filament/backend/src/vulkan/memory/Resource.cpp
+++ b/filament/backend/src/vulkan/memory/Resource.cpp
@@ -26,6 +26,7 @@ template ResourceType getTypeEnum<VulkanIndexBuffer>() noexcept;
 template ResourceType getTypeEnum<VulkanProgram>() noexcept;
 template ResourceType getTypeEnum<VulkanRenderTarget>() noexcept;
 template ResourceType getTypeEnum<VulkanSwapChain>() noexcept;
+template ResourceType getTypeEnum<VulkanStage::Segment>() noexcept;
 template ResourceType getTypeEnum<VulkanRenderPrimitive>() noexcept;
 template ResourceType getTypeEnum<VulkanTexture>() noexcept;
 template ResourceType getTypeEnum<VulkanTextureState>() noexcept;
@@ -35,6 +36,7 @@ template ResourceType getTypeEnum<VulkanVertexBufferInfo>() noexcept;
 template ResourceType getTypeEnum<VulkanDescriptorSetLayout>() noexcept;
 template ResourceType getTypeEnum<VulkanDescriptorSet>() noexcept;
 template ResourceType getTypeEnum<VulkanFence>() noexcept;
+template ResourceType getTypeEnum<VulkanBuffer>() noexcept;

 template<typename D>
 ResourceType getTypeEnum() noexcept {
@@ -53,6 +55,9 @@ ResourceType getTypeEnum() noexcept {
    if constexpr (std::is_same_v<D, VulkanSwapChain>) {
        return ResourceType::SWAP_CHAIN;
    }
+    if constexpr (std::is_same_v<D, VulkanStage::Segment>) {
+        return ResourceType::STAGE_SEGMENT;
+    }
    if constexpr (std::is_same_v<D, VulkanRenderPrimitive>) {
        return ResourceType::RENDER_PRIMITIVE;
    }
@@ -80,6 +85,9 @@ ResourceType getTypeEnum() noexcept {
    if constexpr (std::is_same_v<D, VulkanFence>) {
        return ResourceType::FENCE;
    }
+    if constexpr (std::is_same_v<D, VulkanBuffer>) {
+        return ResourceType::VULKAN_BUFFER;
+    }
    return ResourceType::UNDEFINED_TYPE;
 }

@@ -95,6 +103,8 @@ std::string getTypeStr(ResourceType type) {
            return "RenderTarget";
        case ResourceType::SWAP_CHAIN:
            return "SwapChain";
+        case ResourceType::STAGE_SEGMENT:
+            return "Stage::Segment";
        case ResourceType::RENDER_PRIMITIVE:
            return "RenderPrimitive";
        case ResourceType::TEXTURE:
@@ -113,6 +123,8 @@ std::string getTypeStr(ResourceType type) {
            return "DescriptorSet";
        case ResourceType::FENCE:
            return "Fence";
+        case ResourceType::VULKAN_BUFFER:
+            return "VulkanBuffer";
        case ResourceType::UNDEFINED_TYPE:
            return "";
    }
--- a/filament/backend/src/vulkan/memory/Resource.h
+++ b/filament/backend/src/vulkan/memory/Resource.h
@@ -49,7 +49,9 @@ enum class ResourceType : uint8_t {
    DESCRIPTOR_SET_LAYOUT = 11,
    DESCRIPTOR_SET = 12,
    FENCE = 13,
-    UNDEFINED_TYPE = 14,    // Must be the last enum because we use it for iterating over the enums.
+    VULKAN_BUFFER = 14,
+    STAGE_SEGMENT = 15,
+    UNDEFINED_TYPE = 16,    // Must be the last enum because we use it for iterating over the enums.
 };

 template<typename D>
--- a/filament/backend/src/vulkan/memory/ResourceManager.cpp
+++ b/filament/backend/src/vulkan/memory/ResourceManager.cpp
@@ -77,6 +77,9 @@ void ResourceManager::destroyWithType(ResourceType type, HandleId id) {
        case ResourceType::SWAP_CHAIN:
            destruct<VulkanSwapChain>(Handle<VulkanSwapChain>(id));
            break;
+        case ResourceType::STAGE_SEGMENT:
+            destruct<VulkanStage::Segment>(Handle<VulkanStage::Segment>(id));
+            break;
        case ResourceType::RENDER_PRIMITIVE:
            destruct<VulkanRenderPrimitive>(Handle<VulkanRenderPrimitive>(id));
            break;
@@ -104,6 +107,9 @@ void ResourceManager::destroyWithType(ResourceType type, HandleId id) {
        case ResourceType::FENCE:
            destruct<VulkanFence>(Handle<VulkanFence>(id));
            break;
+        case ResourceType::VULKAN_BUFFER:
+            destruct<VulkanBuffer>(Handle<VulkanBuffer>(id));
+            break;
        case ResourceType::UNDEFINED_TYPE:
            break;
    }
--- a/filament/backend/src/vulkan/platform/VulkanPlatform.cpp
+++ b/filament/backend/src/vulkan/platform/VulkanPlatform.cpp
@@ -92,6 +92,16 @@ StructA* chainStruct(StructA* structA, StructB* structB) {
    return structA;
 }

+bool shouldSkipFormat(VkFormat format) {
+    // Skip formats that require extensions.
+    for (VkFormat const extFormat: fvkutils::EXT_VK_FORMATS) {
+        if (format == extFormat) {
+            return true;
+        }
+    }
+    return false;
+}
+
 void printDeviceInfo(VkInstance instance, VkPhysicalDevice device) {
    // Print some driver or MoltenVK information if it is available.
    if (vkGetPhysicalDeviceProperties2) {
@@ -151,8 +161,14 @@ void printDepthFormats(VkPhysicalDevice device) {
    constexpr VkFormatFeatureFlags required =
            VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
    FVK_LOGI << "Sampleable depth formats: ";
-    for (VkFormat format : fvkutils::ALL_VK_FORMATS) {
+    for (VkFormat const format : fvkutils::ALL_VK_FORMATS) {
+        // Skip formats that require extensions.
+        if (shouldSkipFormat(format)) {
+            continue;
+        }
+
        VkFormatProperties props;
+
        vkGetPhysicalDeviceFormatProperties(device, format, &props);
        if ((props.optimalTilingFeatures & required) == required) {
            FVK_LOGI << format << " ";
@@ -617,7 +633,13 @@ fvkutils::VkFormatList findBlittableDepthStencilFormats(VkPhysicalDevice device)
    std::vector<VkFormat> selectedFormats;
    constexpr VkFormatFeatureFlags required = VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT |
            VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
-    for (VkFormat format : fvkutils::ALL_VK_FORMATS) {
+
+    for (VkFormat const format : fvkutils::ALL_VK_FORMATS) {
+        // Skip formats that require extensions.
+        if (shouldSkipFormat(format)) {
+            continue;
+        }
+
        if (fvkutils::isVkDepthFormat(format)) {
            VkFormatProperties props;
            vkGetPhysicalDeviceFormatProperties(device, format, &props);
--- a/filament/backend/src/vulkan/utils/Definitions.h
+++ b/filament/backend/src/vulkan/utils/Definitions.h
@@ -32,313 +32,325 @@ using VkFormatList = utils::FixedCapacityVector<VkFormat>;
 // Copied from
 //   https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkFormat.html
 constexpr VkFormat ALL_VK_FORMATS[] = {
-        VK_FORMAT_UNDEFINED,
-        VK_FORMAT_R4G4_UNORM_PACK8,
-        VK_FORMAT_R4G4B4A4_UNORM_PACK16,
-        VK_FORMAT_B4G4R4A4_UNORM_PACK16,
-        VK_FORMAT_R5G6B5_UNORM_PACK16,
-        VK_FORMAT_B5G6R5_UNORM_PACK16,
-        VK_FORMAT_R5G5B5A1_UNORM_PACK16,
-        VK_FORMAT_B5G5R5A1_UNORM_PACK16,
-        VK_FORMAT_A1R5G5B5_UNORM_PACK16,
-        VK_FORMAT_R8_UNORM,
-        VK_FORMAT_R8_SNORM,
-        VK_FORMAT_R8_USCALED,
-        VK_FORMAT_R8_SSCALED,
-        VK_FORMAT_R8_UINT,
-        VK_FORMAT_R8_SINT,
-        VK_FORMAT_R8_SRGB,
-        VK_FORMAT_R8G8_UNORM,
-        VK_FORMAT_R8G8_SNORM,
-        VK_FORMAT_R8G8_USCALED,
-        VK_FORMAT_R8G8_SSCALED,
-        VK_FORMAT_R8G8_UINT,
-        VK_FORMAT_R8G8_SINT,
-        VK_FORMAT_R8G8_SRGB,
-        VK_FORMAT_R8G8B8_UNORM,
-        VK_FORMAT_R8G8B8_SNORM,
-        VK_FORMAT_R8G8B8_USCALED,
-        VK_FORMAT_R8G8B8_SSCALED,
-        VK_FORMAT_R8G8B8_UINT,
-        VK_FORMAT_R8G8B8_SINT,
-        VK_FORMAT_R8G8B8_SRGB,
-        VK_FORMAT_B8G8R8_UNORM,
-        VK_FORMAT_B8G8R8_SNORM,
-        VK_FORMAT_B8G8R8_USCALED,
-        VK_FORMAT_B8G8R8_SSCALED,
-        VK_FORMAT_B8G8R8_UINT,
-        VK_FORMAT_B8G8R8_SINT,
-        VK_FORMAT_B8G8R8_SRGB,
-        VK_FORMAT_R8G8B8A8_UNORM,
-        VK_FORMAT_R8G8B8A8_SNORM,
-        VK_FORMAT_R8G8B8A8_USCALED,
-        VK_FORMAT_R8G8B8A8_SSCALED,
-        VK_FORMAT_R8G8B8A8_UINT,
-        VK_FORMAT_R8G8B8A8_SINT,
-        VK_FORMAT_R8G8B8A8_SRGB,
-        VK_FORMAT_B8G8R8A8_UNORM,
-        VK_FORMAT_B8G8R8A8_SNORM,
-        VK_FORMAT_B8G8R8A8_USCALED,
-        VK_FORMAT_B8G8R8A8_SSCALED,
-        VK_FORMAT_B8G8R8A8_UINT,
-        VK_FORMAT_B8G8R8A8_SINT,
-        VK_FORMAT_B8G8R8A8_SRGB,
-        VK_FORMAT_A8B8G8R8_UNORM_PACK32,
-        VK_FORMAT_A8B8G8R8_SNORM_PACK32,
-        VK_FORMAT_A8B8G8R8_USCALED_PACK32,
-        VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
-        VK_FORMAT_A8B8G8R8_UINT_PACK32,
-        VK_FORMAT_A8B8G8R8_SINT_PACK32,
-        VK_FORMAT_A8B8G8R8_SRGB_PACK32,
-        VK_FORMAT_A2R10G10B10_UNORM_PACK32,
-        VK_FORMAT_A2R10G10B10_SNORM_PACK32,
-        VK_FORMAT_A2R10G10B10_USCALED_PACK32,
-        VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
-        VK_FORMAT_A2R10G10B10_UINT_PACK32,
-        VK_FORMAT_A2R10G10B10_SINT_PACK32,
-        VK_FORMAT_A2B10G10R10_UNORM_PACK32,
-        VK_FORMAT_A2B10G10R10_SNORM_PACK32,
-        VK_FORMAT_A2B10G10R10_USCALED_PACK32,
-        VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
-        VK_FORMAT_A2B10G10R10_UINT_PACK32,
-        VK_FORMAT_A2B10G10R10_SINT_PACK32,
-        VK_FORMAT_R16_UNORM,
-        VK_FORMAT_R16_SNORM,
-        VK_FORMAT_R16_USCALED,
-        VK_FORMAT_R16_SSCALED,
-        VK_FORMAT_R16_UINT,
-        VK_FORMAT_R16_SINT,
-        VK_FORMAT_R16_SFLOAT,
-        VK_FORMAT_R16G16_UNORM,
-        VK_FORMAT_R16G16_SNORM,
-        VK_FORMAT_R16G16_USCALED,
-        VK_FORMAT_R16G16_SSCALED,
-        VK_FORMAT_R16G16_UINT,
-        VK_FORMAT_R16G16_SINT,
-        VK_FORMAT_R16G16_SFLOAT,
-        VK_FORMAT_R16G16B16_UNORM,
-        VK_FORMAT_R16G16B16_SNORM,
-        VK_FORMAT_R16G16B16_USCALED,
-        VK_FORMAT_R16G16B16_SSCALED,
-        VK_FORMAT_R16G16B16_UINT,
-        VK_FORMAT_R16G16B16_SINT,
-        VK_FORMAT_R16G16B16_SFLOAT,
-        VK_FORMAT_R16G16B16A16_UNORM,
-        VK_FORMAT_R16G16B16A16_SNORM,
-        VK_FORMAT_R16G16B16A16_USCALED,
-        VK_FORMAT_R16G16B16A16_SSCALED,
-        VK_FORMAT_R16G16B16A16_UINT,
-        VK_FORMAT_R16G16B16A16_SINT,
-        VK_FORMAT_R16G16B16A16_SFLOAT,
-        VK_FORMAT_R32_UINT,
-        VK_FORMAT_R32_SINT,
-        VK_FORMAT_R32_SFLOAT,
-        VK_FORMAT_R32G32_UINT,
-        VK_FORMAT_R32G32_SINT,
-        VK_FORMAT_R32G32_SFLOAT,
-        VK_FORMAT_R32G32B32_UINT,
-        VK_FORMAT_R32G32B32_SINT,
-        VK_FORMAT_R32G32B32_SFLOAT,
-        VK_FORMAT_R32G32B32A32_UINT,
-        VK_FORMAT_R32G32B32A32_SINT,
-        VK_FORMAT_R32G32B32A32_SFLOAT,
-        VK_FORMAT_R64_UINT,
-        VK_FORMAT_R64_SINT,
-        VK_FORMAT_R64_SFLOAT,
-        VK_FORMAT_R64G64_UINT,
-        VK_FORMAT_R64G64_SINT,
-        VK_FORMAT_R64G64_SFLOAT,
-        VK_FORMAT_R64G64B64_UINT,
-        VK_FORMAT_R64G64B64_SINT,
-        VK_FORMAT_R64G64B64_SFLOAT,
-        VK_FORMAT_R64G64B64A64_UINT,
-        VK_FORMAT_R64G64B64A64_SINT,
-        VK_FORMAT_R64G64B64A64_SFLOAT,
-        VK_FORMAT_B10G11R11_UFLOAT_PACK32,
-        VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
-        VK_FORMAT_D16_UNORM,
-        VK_FORMAT_X8_D24_UNORM_PACK32,
-        VK_FORMAT_D32_SFLOAT,
-        VK_FORMAT_S8_UINT,
-        VK_FORMAT_D16_UNORM_S8_UINT,
-        VK_FORMAT_D24_UNORM_S8_UINT,
-        VK_FORMAT_D32_SFLOAT_S8_UINT,
-        VK_FORMAT_BC1_RGB_UNORM_BLOCK,
-        VK_FORMAT_BC1_RGB_SRGB_BLOCK,
-        VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
-        VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
-        VK_FORMAT_BC2_UNORM_BLOCK,
-        VK_FORMAT_BC2_SRGB_BLOCK,
-        VK_FORMAT_BC3_UNORM_BLOCK,
-        VK_FORMAT_BC3_SRGB_BLOCK,
-        VK_FORMAT_BC4_UNORM_BLOCK,
-        VK_FORMAT_BC4_SNORM_BLOCK,
-        VK_FORMAT_BC5_UNORM_BLOCK,
-        VK_FORMAT_BC5_SNORM_BLOCK,
-        VK_FORMAT_BC6H_UFLOAT_BLOCK,
-        VK_FORMAT_BC6H_SFLOAT_BLOCK,
-        VK_FORMAT_BC7_UNORM_BLOCK,
-        VK_FORMAT_BC7_SRGB_BLOCK,
-        VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
-        VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
-        VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
-        VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
-        VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
-        VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
-        VK_FORMAT_EAC_R11_UNORM_BLOCK,
-        VK_FORMAT_EAC_R11_SNORM_BLOCK,
-        VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
-        VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
-        VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
-        VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
-        VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
-        VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
-        VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
-        VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
-        VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
-        VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
-        VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
-        VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
-        VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
-        VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
-        VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
-        VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
-        VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
-        VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
-        VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
-        VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
-        VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
-        VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
-        VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
-        VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
-        VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
-        VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
-        VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
-        VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
-        VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
-        VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
-        VK_FORMAT_G8B8G8R8_422_UNORM,
-        VK_FORMAT_B8G8R8G8_422_UNORM,
-        VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,
-        VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
-        VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,
-        VK_FORMAT_G8_B8R8_2PLANE_422_UNORM,
-        VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,
-        VK_FORMAT_R10X6_UNORM_PACK16,
-        VK_FORMAT_R10X6G10X6_UNORM_2PACK16,
-        VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16,
-        VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
-        VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16,
-        VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
-        VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
-        VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
-        VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
-        VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
-        VK_FORMAT_R12X4_UNORM_PACK16,
-        VK_FORMAT_R12X4G12X4_UNORM_2PACK16,
-        VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16,
-        VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
-        VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16,
-        VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
-        VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
-        VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
-        VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
-        VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
-        VK_FORMAT_G16B16G16R16_422_UNORM,
-        VK_FORMAT_B16G16R16G16_422_UNORM,
-        VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM,
-        VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,
-        VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM,
-        VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,
-        VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
-        VK_FORMAT_G8_B8R8_2PLANE_444_UNORM,
-        VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16,
-        VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16,
-        VK_FORMAT_G16_B16R16_2PLANE_444_UNORM,
-        VK_FORMAT_A4R4G4B4_UNORM_PACK16,
-        VK_FORMAT_A4B4G4R4_UNORM_PACK16,
-        VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK,
-        VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK,
-        VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG,
-        VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG,
-        VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG,
-        VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG,
-        VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG,
-        VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG,
-        VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG,
-        VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG,
-// Not supported (yet) by bluevk
-//        VK_FORMAT_R16G16_SFIXED5_NV,
-//        VK_FORMAT_A1B5G5R5_UNORM_PACK16_KHR,
-//        VK_FORMAT_A8_UNORM_KHR,
-//        VK_FORMAT_A8_UNORM,
-        VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT,
-        VK_FORMAT_G8B8G8R8_422_UNORM_KHR,
-        VK_FORMAT_B8G8R8G8_422_UNORM_KHR,
-        VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR,
-        VK_FORMAT_G8_B8R8_2PLANE_420_UNORM_KHR,
-        VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM_KHR,
-        VK_FORMAT_G8_B8R8_2PLANE_422_UNORM_KHR,
-        VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM_KHR,
-        VK_FORMAT_R10X6_UNORM_PACK16_KHR,
-        VK_FORMAT_R10X6G10X6_UNORM_2PACK16_KHR,
-        VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16_KHR,
-        VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16_KHR,
-        VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16_KHR,
-        VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16_KHR,
-        VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16_KHR,
-        VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16_KHR,
-        VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16_KHR,
-        VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16_KHR,
-        VK_FORMAT_R12X4_UNORM_PACK16_KHR,
-        VK_FORMAT_R12X4G12X4_UNORM_2PACK16_KHR,
-        VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16_KHR,
-        VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16_KHR,
-        VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16_KHR,
-        VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16_KHR,
-        VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16_KHR,
-        VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16_KHR,
-        VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16_KHR,
-        VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16_KHR,
-        VK_FORMAT_G16B16G16R16_422_UNORM_KHR,
-        VK_FORMAT_B16G16R16G16_422_UNORM_KHR,
-        VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM_KHR,
-        VK_FORMAT_G16_B16R16_2PLANE_420_UNORM_KHR,
-        VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM_KHR,
-        VK_FORMAT_G16_B16R16_2PLANE_422_UNORM_KHR,
-        VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM_KHR,
-        VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT,
-        VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT,
-        VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT,
-        VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT,
-        VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT,
-        VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT,
-        VK_FORMAT_R16G16_S10_5_NV,
+    VK_FORMAT_UNDEFINED,
+    VK_FORMAT_R4G4_UNORM_PACK8,
+    VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+    VK_FORMAT_B4G4R4A4_UNORM_PACK16,
+    VK_FORMAT_R5G6B5_UNORM_PACK16,
+    VK_FORMAT_B5G6R5_UNORM_PACK16,
+    VK_FORMAT_R5G5B5A1_UNORM_PACK16,
+    VK_FORMAT_B5G5R5A1_UNORM_PACK16,
+    VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+    VK_FORMAT_R8_UNORM,
+    VK_FORMAT_R8_SNORM,
+    VK_FORMAT_R8_USCALED,
+    VK_FORMAT_R8_SSCALED,
+    VK_FORMAT_R8_UINT,
+    VK_FORMAT_R8_SINT,
+    VK_FORMAT_R8_SRGB,
+    VK_FORMAT_R8G8_UNORM,
+    VK_FORMAT_R8G8_SNORM,
+    VK_FORMAT_R8G8_USCALED,
+    VK_FORMAT_R8G8_SSCALED,
+    VK_FORMAT_R8G8_UINT,
+    VK_FORMAT_R8G8_SINT,
+    VK_FORMAT_R8G8_SRGB,
+    VK_FORMAT_R8G8B8_UNORM,
+    VK_FORMAT_R8G8B8_SNORM,
+    VK_FORMAT_R8G8B8_USCALED,
+    VK_FORMAT_R8G8B8_SSCALED,
+    VK_FORMAT_R8G8B8_UINT,
+    VK_FORMAT_R8G8B8_SINT,
+    VK_FORMAT_R8G8B8_SRGB,
+    VK_FORMAT_B8G8R8_UNORM,
+    VK_FORMAT_B8G8R8_SNORM,
+    VK_FORMAT_B8G8R8_USCALED,
+    VK_FORMAT_B8G8R8_SSCALED,
+    VK_FORMAT_B8G8R8_UINT,
+    VK_FORMAT_B8G8R8_SINT,
+    VK_FORMAT_B8G8R8_SRGB,
+    VK_FORMAT_R8G8B8A8_UNORM,
+    VK_FORMAT_R8G8B8A8_SNORM,
+    VK_FORMAT_R8G8B8A8_USCALED,
+    VK_FORMAT_R8G8B8A8_SSCALED,
+    VK_FORMAT_R8G8B8A8_UINT,
+    VK_FORMAT_R8G8B8A8_SINT,
+    VK_FORMAT_R8G8B8A8_SRGB,
+    VK_FORMAT_B8G8R8A8_UNORM,
+    VK_FORMAT_B8G8R8A8_SNORM,
+    VK_FORMAT_B8G8R8A8_USCALED,
+    VK_FORMAT_B8G8R8A8_SSCALED,
+    VK_FORMAT_B8G8R8A8_UINT,
+    VK_FORMAT_B8G8R8A8_SINT,
+    VK_FORMAT_B8G8R8A8_SRGB,
+    VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+    VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+    VK_FORMAT_A8B8G8R8_USCALED_PACK32,
+    VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
+    VK_FORMAT_A8B8G8R8_UINT_PACK32,
+    VK_FORMAT_A8B8G8R8_SINT_PACK32,
+    VK_FORMAT_A8B8G8R8_SRGB_PACK32,
+    VK_FORMAT_A2R10G10B10_UNORM_PACK32,
+    VK_FORMAT_A2R10G10B10_SNORM_PACK32,
+    VK_FORMAT_A2R10G10B10_USCALED_PACK32,
+    VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
+    VK_FORMAT_A2R10G10B10_UINT_PACK32,
+    VK_FORMAT_A2R10G10B10_SINT_PACK32,
+    VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+    VK_FORMAT_A2B10G10R10_SNORM_PACK32,
+    VK_FORMAT_A2B10G10R10_USCALED_PACK32,
+    VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
+    VK_FORMAT_A2B10G10R10_UINT_PACK32,
+    VK_FORMAT_A2B10G10R10_SINT_PACK32,
+    VK_FORMAT_R16_UNORM,
+    VK_FORMAT_R16_SNORM,
+    VK_FORMAT_R16_USCALED,
+    VK_FORMAT_R16_SSCALED,
+    VK_FORMAT_R16_UINT,
+    VK_FORMAT_R16_SINT,
+    VK_FORMAT_R16_SFLOAT,
+    VK_FORMAT_R16G16_UNORM,
+    VK_FORMAT_R16G16_SNORM,
+    VK_FORMAT_R16G16_USCALED,
+    VK_FORMAT_R16G16_SSCALED,
+    VK_FORMAT_R16G16_UINT,
+    VK_FORMAT_R16G16_SINT,
+    VK_FORMAT_R16G16_SFLOAT,
+    VK_FORMAT_R16G16B16_UNORM,
+    VK_FORMAT_R16G16B16_SNORM,
+    VK_FORMAT_R16G16B16_USCALED,
+    VK_FORMAT_R16G16B16_SSCALED,
+    VK_FORMAT_R16G16B16_UINT,
+    VK_FORMAT_R16G16B16_SINT,
+    VK_FORMAT_R16G16B16_SFLOAT,
+    VK_FORMAT_R16G16B16A16_UNORM,
+    VK_FORMAT_R16G16B16A16_SNORM,
+    VK_FORMAT_R16G16B16A16_USCALED,
+    VK_FORMAT_R16G16B16A16_SSCALED,
+    VK_FORMAT_R16G16B16A16_UINT,
+    VK_FORMAT_R16G16B16A16_SINT,
+    VK_FORMAT_R16G16B16A16_SFLOAT,
+    VK_FORMAT_R32_UINT,
+    VK_FORMAT_R32_SINT,
+    VK_FORMAT_R32_SFLOAT,
+    VK_FORMAT_R32G32_UINT,
+    VK_FORMAT_R32G32_SINT,
+    VK_FORMAT_R32G32_SFLOAT,
+    VK_FORMAT_R32G32B32_UINT,
+    VK_FORMAT_R32G32B32_SINT,
+    VK_FORMAT_R32G32B32_SFLOAT,
+    VK_FORMAT_R32G32B32A32_UINT,
+    VK_FORMAT_R32G32B32A32_SINT,
+    VK_FORMAT_R32G32B32A32_SFLOAT,
+    VK_FORMAT_R64_UINT,
+    VK_FORMAT_R64_SINT,
+    VK_FORMAT_R64_SFLOAT,
+    VK_FORMAT_R64G64_UINT,
+    VK_FORMAT_R64G64_SINT,
+    VK_FORMAT_R64G64_SFLOAT,
+    VK_FORMAT_R64G64B64_UINT,
+    VK_FORMAT_R64G64B64_SINT,
+    VK_FORMAT_R64G64B64_SFLOAT,
+    VK_FORMAT_R64G64B64A64_UINT,
+    VK_FORMAT_R64G64B64A64_SINT,
+    VK_FORMAT_R64G64B64A64_SFLOAT,
+    VK_FORMAT_B10G11R11_UFLOAT_PACK32,
+    VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
+    VK_FORMAT_D16_UNORM,
+    VK_FORMAT_X8_D24_UNORM_PACK32,
+    VK_FORMAT_D32_SFLOAT,
+    VK_FORMAT_S8_UINT,
+    VK_FORMAT_D16_UNORM_S8_UINT,
+    VK_FORMAT_D24_UNORM_S8_UINT,
+    VK_FORMAT_D32_SFLOAT_S8_UINT,
+    VK_FORMAT_BC1_RGB_UNORM_BLOCK,
+    VK_FORMAT_BC1_RGB_SRGB_BLOCK,
+    VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
+    VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
+    VK_FORMAT_BC2_UNORM_BLOCK,
+    VK_FORMAT_BC2_SRGB_BLOCK,
+    VK_FORMAT_BC3_UNORM_BLOCK,
+    VK_FORMAT_BC3_SRGB_BLOCK,
+    VK_FORMAT_BC4_UNORM_BLOCK,
+    VK_FORMAT_BC4_SNORM_BLOCK,
+    VK_FORMAT_BC5_UNORM_BLOCK,
+    VK_FORMAT_BC5_SNORM_BLOCK,
+    VK_FORMAT_BC6H_UFLOAT_BLOCK,
+    VK_FORMAT_BC6H_SFLOAT_BLOCK,
+    VK_FORMAT_BC7_UNORM_BLOCK,
+    VK_FORMAT_BC7_SRGB_BLOCK,
+    VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
+    VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
+    VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
+    VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
+    VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
+    VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
+    VK_FORMAT_EAC_R11_UNORM_BLOCK,
+    VK_FORMAT_EAC_R11_SNORM_BLOCK,
+    VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
+    VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
+    VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
+    VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
+    VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
+    VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
+    VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
+    VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
+    VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
+    VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
+    VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
+    VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
+    VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
+    VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+    VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
+    VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
+    VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
+    VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
+    VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
+    VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
+    VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
+    VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
+    VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
+    VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
+    VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
+    VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
+    VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
+    VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
+    VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
+    VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
+    VK_FORMAT_G8B8G8R8_422_UNORM,
+    VK_FORMAT_B8G8R8G8_422_UNORM,
+    VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,
+    VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
+    VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,
+    VK_FORMAT_G8_B8R8_2PLANE_422_UNORM,
+    VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,
+    VK_FORMAT_R10X6_UNORM_PACK16,
+    VK_FORMAT_R10X6G10X6_UNORM_2PACK16,
+    VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16,
+    VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
+    VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16,
+    VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
+    VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+    VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
+    VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
+    VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
+    VK_FORMAT_R12X4_UNORM_PACK16,
+    VK_FORMAT_R12X4G12X4_UNORM_2PACK16,
+    VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16,
+    VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
+    VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16,
+    VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
+    VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
+    VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
+    VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
+    VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
+    VK_FORMAT_G16B16G16R16_422_UNORM,
+    VK_FORMAT_B16G16R16G16_422_UNORM,
+    VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM,
+    VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,
+    VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM,
+    VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,
+    VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
+    VK_FORMAT_G8_B8R8_2PLANE_444_UNORM,
+    VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16,
+    VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16,
+    VK_FORMAT_G16_B16R16_2PLANE_444_UNORM,
+    VK_FORMAT_A4R4G4B4_UNORM_PACK16,
+    VK_FORMAT_A4B4G4R4_UNORM_PACK16,
+    VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK,
+    VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK,
+    VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG,
+    VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG,
+    VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG,
+    VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG,
+    VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG,
+    VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG,
+    VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG,
+    VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG,
+    // Not supported (yet) by bluevk
+    //        VK_FORMAT_R16G16_SFIXED5_NV,
+    //        VK_FORMAT_A1B5G5R5_UNORM_PACK16_KHR,
+    //        VK_FORMAT_A8_UNORM_KHR,
+    //        VK_FORMAT_A8_UNORM,
+    VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT,
+    VK_FORMAT_G8B8G8R8_422_UNORM_KHR,
+    VK_FORMAT_B8G8R8G8_422_UNORM_KHR,
+    VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM_KHR,
+    VK_FORMAT_G8_B8R8_2PLANE_420_UNORM_KHR,
+    VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM_KHR,
+    VK_FORMAT_G8_B8R8_2PLANE_422_UNORM_KHR,
+    VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM_KHR,
+    VK_FORMAT_R10X6_UNORM_PACK16_KHR,
+    VK_FORMAT_R10X6G10X6_UNORM_2PACK16_KHR,
+    VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16_KHR,
+    VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16_KHR,
+    VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16_KHR,
+    VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16_KHR,
+    VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16_KHR,
+    VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16_KHR,
+    VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16_KHR,
+    VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16_KHR,
+    VK_FORMAT_R12X4_UNORM_PACK16_KHR,
+    VK_FORMAT_R12X4G12X4_UNORM_2PACK16_KHR,
+    VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16_KHR,
+    VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16_KHR,
+    VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16_KHR,
+    VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16_KHR,
+    VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16_KHR,
+    VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16_KHR,
+    VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16_KHR,
+    VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16_KHR,
+    VK_FORMAT_G16B16G16R16_422_UNORM_KHR,
+    VK_FORMAT_B16G16R16G16_422_UNORM_KHR,
+    VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM_KHR,
+    VK_FORMAT_G16_B16R16_2PLANE_420_UNORM_KHR,
+    VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM_KHR,
+    VK_FORMAT_G16_B16R16_2PLANE_422_UNORM_KHR,
+    VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM_KHR,
+    VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT,
+    VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT,
+    VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT,
+    VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT,
+    VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT,
+    VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT,
+    VK_FORMAT_R16G16_S10_5_NV,
+};
+
+constexpr VkFormat EXT_VK_FORMATS[] = {
+   // VK_EXT_ycbcr_2plane_444_formats
+   VK_FORMAT_G16_B16R16_2PLANE_444_UNORM,
+   VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16,
+   VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16,
+   VK_FORMAT_G8_B8R8_2PLANE_444_UNORM,
+   VK_FORMAT_G16_B16R16_2PLANE_444_UNORM,
+
+   // VK_NV_optical_flow
+   VK_FORMAT_R16G16_S10_5_NV,
 };

 using UniformBufferBitmask = utils::bitset64;
--- a/filament/backend/src/vulkan/utils/Helper.h
+++ b/filament/backend/src/vulkan/utils/Helper.h
@@ -21,6 +21,7 @@

 #include <utils/bitset.h>
 #include <utils/FixedCapacityVector.h>
+#include <utils/Panic.h>

 #include <bluevk/BlueVK.h>

--- a/filament/backend/src/vulkan/utils/StaticVector.h
+++ b/filament/backend/src/vulkan/utils/StaticVector.h
@@ -20,6 +20,7 @@
 // An Array that will be statically fixed in capacity, but the "size" (as in user added elements) is
 // variable. Note that this class is movable.

+#include <utils/debug.h>
 #include <utils/Panic.h>

 #include <array>
--- a/filament/backend/src/webgpu/SpdMipmapGenerator.cpp
+++ b/filament/backend/src/webgpu/SpdMipmapGenerator.cpp
@@ -0,0 +1,795 @@
+#include "SpdMipmapGenerator.h"
+#include <sstream>
+#include <stdexcept>
+// C++ port of https://github.com/JolifantoBambla/webgpu-spd for early experiments- do not merge like this
+namespace spd {
+
+    // Helper to convert enums to strings for map keys or shader code
+    const char* to_string(wgpu::TextureFormat format) {
+        switch (format) {
+            case wgpu::TextureFormat::RGBA8Unorm: return "rgba8unorm";
+            case wgpu::TextureFormat::BGRA8Unorm: return "bgra8unorm";
+            case wgpu::TextureFormat::R32Float: return "r32float";
+            case wgpu::TextureFormat::RG32Float: return "rg32float";
+            case wgpu::TextureFormat::RGBA32Float: return "rgba32float";
+            case wgpu::TextureFormat::R16Float: return "r16float";
+            case wgpu::TextureFormat::RG16Float: return "rg16float";
+            case wgpu::TextureFormat::RGBA16Float: return "rgba16float";
+            // Add other formats as needed
+            default: return "rgba8unorm";
+        }
+    }
+
+
+    std::string MipmapGenerator::GetFilterCode(SPDFilter filter) {
+        switch (filter) {
+            case SPDFilter::Min:
+                return R"(
+    fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
+        return min(min(v0, v1), min(v2, v3));
+    }
+    )";
+            case SPDFilter::Max:
+                return R"(
+    fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
+        return max(max(v0, v1), max(v2, v3));
+    }
+    )";
+            case SPDFilter::MinMax:
+                 return R"(
+    fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
+        let max4 = max(max(v0.xy, v1.xy), max(v2.xy, v3.xy));
+        return vec4<SPDScalar>(min(min(v0.x, v1.x), min(v2.x, v3.x)), max(max4.x, max4.y), 0.0, 0.0);
+    }
+    )";
+            case SPDFilter::Average:
+            default:
+                return R"(
+    fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
+        return (v0 + v1 + v2 + v3) * 0.25;
+    }
+    )";
+        }
+    }
+
+
+    MipmapGenerator::MipmapGenerator(const wgpu::Device& device) : m_device(device) {
+        wgpu::BindGroupLayoutEntry bglEntry{};
+        bglEntry.binding = 0;
+        bglEntry.visibility = wgpu::ShaderStage::Compute;
+        bglEntry.buffer.type = wgpu::BufferBindingType::Uniform;
+        bglEntry.buffer.minBindingSize = 16;
+
+        wgpu::BindGroupLayoutDescriptor bglDesc{};
+        bglDesc.entryCount = 1;
+        bglDesc.entries = &bglEntry;
+        m_internalResourcesBindGroupLayout = m_device.CreateBindGroupLayout(&bglDesc);
+    }
+
+    void MipmapGenerator::PreparePipelines(wgpu::TextureFormat format, SPDFilter filter, bool halfPrecision) {
+        SPDScalarType scalarType = SanitizeScalarType(format, halfPrecision);
+        // Prepare for a reasonable number of mips
+        for (uint32_t i = 1; i <= 4; ++i) {
+            GetOrCreatePipeline(format, filter, i, scalarType);
+        }
+    }
+    
+    SPDScalarType MipmapGenerator::SanitizeScalarType(wgpu::TextureFormat format, bool halfPrecision) {
+        std::string formatStr = to_string(format);
+        std::transform(formatStr.begin(), formatStr.end(), formatStr.begin(), ::tolower);
+
+        SPDScalarType texelType = SPDScalarType::F32;
+        if (formatStr.find("sint") != std::string::npos) {
+            texelType = SPDScalarType::I32;
+        } else if (formatStr.find("uint") != std::string::npos) {
+            texelType = SPDScalarType::U32;
+        }
+
+        if (halfPrecision) {
+            bool hasF16 = false;
+            // In a real Dawn app, you would check device.GetSupportedFeatures()
+            // For now, let's assume it's available if requested.
+             hasF16 = true; 
+
+            if (!hasF16) {
+                // Log warning: half precision requested but not supported
+            }
+            if (texelType != SPDScalarType::F32) {
+                // Log warning: half precision for non-float format
+            }
+            if (hasF16 && texelType == SPDScalarType::F32) {
+                return SPDScalarType::F16;
+            }
+        }
+        return texelType;
+    }
+
+    SPDPipeline& MipmapGenerator::GetOrCreatePipeline(wgpu::TextureFormat format, SPDFilter filter, uint32_t numMips, SPDScalarType scalarType) {
+        if (m_pipelines[format][scalarType][filter].count(numMips) == 0) {
+            // Create the pipeline
+            SPDPipeline spdPipeline;
+
+            std::vector<wgpu::BindGroupLayoutEntry> mipsBglEntries;
+            for (uint32_t i = 0; i <= numMips; ++i) {
+                wgpu::BindGroupLayoutEntry entry{};
+                entry.binding = i;
+                entry.visibility = wgpu::ShaderStage::Compute;
+                if (i == 0) {
+                    entry.texture.sampleType = (scalarType == SPDScalarType::I32) ? wgpu::TextureSampleType::Sint :
+                                               (scalarType == SPDScalarType::U32) ? wgpu::TextureSampleType::Uint :
+                                               wgpu::TextureSampleType::UnfilterableFloat;
+                    entry.texture.viewDimension = wgpu::TextureViewDimension::e2DArray;
+                } else {
+                    entry.storageTexture.access = wgpu::StorageTextureAccess::WriteOnly;
+                    entry.storageTexture.format = format;
+                    entry.storageTexture.viewDimension = wgpu::TextureViewDimension::e2DArray;
+                }
+                mipsBglEntries.push_back(entry);
+            }
+            
+            wgpu::BindGroupLayoutDescriptor mipsBglDesc{};
+            mipsBglDesc.entryCount = mipsBglEntries.size();
+            mipsBglDesc.entries = mipsBglEntries.data();
+            spdPipeline.mipsBindGroupLayout = m_device.CreateBindGroupLayout(&mipsBglDesc);
+
+            std::string shaderCode = MakeShaderCode(format, GetFilterCode(filter), numMips, scalarType);
+            wgpu::ShaderModuleWGSLDescriptor wgslDesc{};
+            wgslDesc.code = shaderCode.c_str();
+
+            wgpu::ShaderModuleDescriptor shaderModuleDesc{};
+            shaderModuleDesc.nextInChain = &wgslDesc;
+            wgpu::ShaderModule shaderModule = m_device.CreateShaderModule(&shaderModuleDesc);
+            
+            wgpu::BindGroupLayout bgls[] = { spdPipeline.mipsBindGroupLayout, m_internalResourcesBindGroupLayout };
+            wgpu::PipelineLayoutDescriptor layoutDesc{};
+            layoutDesc.bindGroupLayoutCount = 2;
+            layoutDesc.bindGroupLayouts = bgls;
+
+            wgpu::ComputePipelineDescriptor pipelineDesc{};
+            pipelineDesc.layout = m_device.CreatePipelineLayout(&layoutDesc);
+            pipelineDesc.compute.module = shaderModule;
+            pipelineDesc.compute.entryPoint = "downsample";
+
+            spdPipeline.pipeline = m_device.CreateComputePipeline(&pipelineDesc);
+            m_pipelines[format][scalarType][filter][numMips] = std::move(spdPipeline);
+        }
+        return m_pipelines[format][scalarType][filter][numMips];
+    }
+    
+    void MipmapGenerator::Generate(
+        wgpu::CommandEncoder& commandEncoder,
+        wgpu::Texture srcTexture,
+        const SPDPassConfig& config)
+    {
+        uint32_t width = srcTexture.GetWidth();
+        uint32_t height = srcTexture.GetHeight();
+        uint32_t arrayLayerCount = srcTexture.GetDepthOrArrayLayers();
+
+        wgpu::Texture target = config.targetTexture ? config.targetTexture : srcTexture;
+        uint32_t numMips = config.numMips > 0 ? config.numMips : target.GetMipLevelCount() - 1;
+
+        if (numMips == 0) return;
+
+        SPDScalarType scalarType = SanitizeScalarType(srcTexture.GetFormat(), config.halfPrecision);
+        SPDPipeline& spdPipeline = GetOrCreatePipeline(target.GetFormat(), config.filter, numMips, scalarType);
+        
+        // --- Create Bind Group 0 (Mips) ---
+        std::vector<wgpu::BindGroupEntry> mipEntries;
+
+        wgpu::TextureViewDescriptor srcViewDesc{};
+        srcViewDesc.dimension = wgpu::TextureViewDimension::e2DArray;
+        srcViewDesc.baseMipLevel = config.sourceMipLevel;
+        srcViewDesc.mipLevelCount = 1;
+        srcViewDesc.baseArrayLayer = 0;
+        srcViewDesc.arrayLayerCount = arrayLayerCount;
+        
+        wgpu::BindGroupEntry srcEntry{};
+        srcEntry.binding = 0;
+        srcEntry.textureView = srcTexture.CreateView(&srcViewDesc);
+        mipEntries.push_back(srcEntry);
+
+        for (uint32_t i = 0; i < numMips; ++i) {
+            wgpu::TextureViewDescriptor dstViewDesc{};
+            dstViewDesc.dimension = wgpu::TextureViewDimension::e2DArray;
+            dstViewDesc.baseMipLevel = config.sourceMipLevel + i + 1;
+            dstViewDesc.mipLevelCount = 1;
+            dstViewDesc.baseArrayLayer = 0;
+            dstViewDesc.arrayLayerCount = arrayLayerCount;
+           
+            wgpu::BindGroupEntry dstEntry{};
+            dstEntry.binding = i + 1;
+            dstEntry.textureView = target.CreateView(&dstViewDesc);
+            mipEntries.push_back(dstEntry);
+        }
+
+        wgpu::BindGroupDescriptor mipBindGroupDesc{};
+        mipBindGroupDesc.layout = spdPipeline.mipsBindGroupLayout;
+        mipBindGroupDesc.entryCount = mipEntries.size();
+        mipBindGroupDesc.entries = mipEntries.data();
+        wgpu::BindGroup mipsBindGroup = m_device.CreateBindGroup(&mipBindGroupDesc);
+
+        // --- Create Bind Group 1 (Internal Resources) ---
+        uint32_t numWorkGroupsX = (width + 63) / 64;
+        uint32_t numWorkGroupsY = (height + 63) / 64;
+        uint32_t numWorkGroups = numWorkGroupsX * numWorkGroupsY;
+
+        struct DownsamplePassMeta {
+            uint32_t work_group_offset[2] = {0, 0};
+            uint32_t num_work_groups;
+            uint32_t mips;
+            uint32_t padding[12]; // Ensure size is multiple of 16
+        } meta;
+        meta.num_work_groups = numWorkGroups;
+        meta.mips = numMips;
+        
+        wgpu::BufferDescriptor metaBufferDesc{};
+        metaBufferDesc.size = sizeof(DownsamplePassMeta);
+        metaBufferDesc.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
+        wgpu::Buffer metaBuffer = m_device.CreateBuffer(&metaBufferDesc);
+        m_device.GetQueue().WriteBuffer(metaBuffer, 0, &meta, sizeof(meta));
+
+        wgpu::BindGroupEntry metaEntry{};
+        metaEntry.binding = 0;
+        metaEntry.buffer = metaBuffer;
+        
+        wgpu::BindGroupDescriptor internalBindGroupDesc{};
+        internalBindGroupDesc.layout = m_internalResourcesBindGroupLayout;
+        internalBindGroupDesc.entryCount = 1;
+        internalBindGroupDesc.entries = &metaEntry;
+        wgpu::BindGroup internalBindGroup = m_device.CreateBindGroup(&internalBindGroupDesc);
+        
+        // --- Dispatch ---
+        wgpu::ComputePassEncoder pass = commandEncoder.BeginComputePass();
+        pass.SetPipeline(spdPipeline.pipeline);
+        pass.SetBindGroup(0, mipsBindGroup);
+        pass.SetBindGroup(1, internalBindGroup);
+        pass.DispatchWorkgroups(numWorkGroupsX, numWorkGroupsY, arrayLayerCount);
+        pass.End();
+    }
+
+    // Main shader generation logic
+
+// Helper function to check if a string is in a vector of strings
+bool includes(const std::vector<std::string>& vec, const std::string& str) {
+    for (const auto& s : vec) {
+        if (s == str) {
+            return true;
+        }
+    }
+    return false;
+}
+
+std::string MakeShaderCode(wgpu::TextureFormat outputFormat,
+                           const std::string& filterOp,
+                           unsigned int numMips, // Assuming a default value for numMips
+                           SPDScalarType scalarType) { // Default scalarType
+    std::stringstream ss;
+
+    std::string texelType;
+    if (scalarType == SPDScalarType::I32) {
+        texelType = "i32";
+    } else if (scalarType == SPDScalarType::U32) {
+        texelType = "u32";
+    } else {
+        texelType = "f32";
+    }
+
+    bool useF16 = (scalarType == SPDScalarType::F16);
+
+    std::string filterCode = filterOp;
+    if (filterOp == SPD_FILTER_AVERAGE && !includes({"f32", "f16"}, texelType)) {
+        // Replace "* 0.25" with "/ 4"
+        size_t pos = filterCode.find("* 0.25");
+        if (pos != std::string::npos) {
+            filterCode.replace(pos, std::string("* 0.25").length(), "/ 4");
+        }
+    }
+
+    // Generate mipsBindings
+    std::string mipsBindings;
+    for (unsigned int i = 0; i < numMips; ++i) {
+        mipsBindings += "@group(0) @binding(" + std::to_string(i + 1) + ") var dst_mip_" + std::to_string(i + 1) + ": texture_storage_2d_array<" + to_string(outputFormat)  + ", write>;\n";
+    }
+
+    // Generate mipsAccessorBody
+    std::string mipsAccessorBody;
+    for (unsigned int i = 0; i < numMips; ++i) {
+        if (i == 5 && numMips > 6) {
+            mipsAccessorBody += " else if mip == 6 {\n";
+            mipsAccessorBody += "                textureStore(dst_mip_6, uv, slice, " + (useF16 ? "vec4<" + texelType + ">(value)" : "value") + ");\n";
+            mipsAccessorBody += "                mip_dst_6_buffer[slice][uv.y][uv.x] = value;\n";
+            mipsAccessorBody += "            }";
+        } else {
+            if (i != 0) {
+                mipsAccessorBody += " else ";
+            }
+            mipsAccessorBody += "if mip == " + std::to_string(i + 1) + " {\n";
+            mipsAccessorBody += "                textureStore(dst_mip_" + std::to_string(i + 1) + ", uv, slice, " + (useF16 ? "vec4<" + texelType + ">(value)" : "value") + ");\n";
+            mipsAccessorBody += "            }";
+        }
+    }
+
+    std::string mipsAccessor = "fn store_dst_mip(value: vec4<SPDScalar>, uv: vec2<u32>, slice: u32, mip: u32) {\n" + mipsAccessorBody + "\n}";
+    std::string midMipAccessor = "return mip_dst_6_buffer[slice][uv.y][uv.x];";
+
+    // Start building the final shader code string
+    ss << R"(
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2023 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the “Software”), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+// Definitions --------------------------------------------------------------------------------------------------------
+
+)";
+    if (useF16) {
+        ss << "enable f16;\n";
+    }
+    ss << "alias SPDScalar = " << texelType << ";\n\n"; // Using texelType here, assuming SPDScalar maps to it.
+
+    ss << R"(
+// Helpers ------------------------------------------------------------------------------------------------------------
+
+/**
+ * A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
+ * * The 64-wide lane indices to 8x8 remapping is performed as follows:
+ * 00 01 08 09 10 11 18 19
+ * 02 03 0a 0b 12 13 1a 1b
+ * 04 05 0c 0d 14 15 1c 1d
+ * 06 07 0e 0f 16 17 1e 1f
+ * 20 21 28 29 30 31 38 39
+ * 22 23 2a 2b 32 33 3a 3b
+ * 24 25 2c 2d 34 35 3c 3d
+ * 26 27 2e 2f 36 37 3e 3f
+ * * @param a: The input 1D coordinate to remap.
+ *
+ * @returns The remapped 2D coordinates.
+ */
+fn remap_for_wave_reduction(a: u32) -> vec2<u32> {
+    return vec2<u32>(
+        insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
+        insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
+    );
+}
+
+fn map_to_xy(local_invocation_index: u32) -> vec2<u32> {
+    let sub_xy: vec2<u32> = remap_for_wave_reduction(local_invocation_index % 64);
+    return vec2<u32>(
+        sub_xy.x + 8 * ((local_invocation_index >> 6) % 2),
+        sub_xy.y + 8 * ((local_invocation_index >> 7))
+    );
+}
+
+/*
+ * Compute a linear value from a SRGB value.
+ * * @param value: The value to convert to linear from SRGB.
+ * * @returns A value in SRGB space.
+ */
+/*
+fn srgb_to_linear(value: SPDScalar) -> SPDScalar {
+    let j = vec3<SPDScalar>(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    let k = vec2<SPDScalar>(1.055, -0.055);
+    return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y);
+}
+*/
+
+// Resources & Accessors -----------------------------------------------------------------------------------------------
+struct DownsamplePassMeta {
+    work_group_offset: vec2<u32>,
+    num_work_groups: u32,
+    mips: u32,
+}
+
+// In the original version dst_mip_i is an image2Darray [SPD_MAX_MIP_LEVELS+1], i.e., 12+1, but WGSL doesn't support arrays of textures yet
+// Also these are read_write because for mips 7-13, the workgroup reads from mip level 6 - since most formats don't support read_write access in WGSL yet, we use a single read_write buffer in such cases instead
+@group(0) @binding(0) var src_mip_0: texture_2d_array<)" << texelType << R"(>;
+)" << mipsBindings << R"(
+
+@group(1) @binding(0) var<uniform> downsample_pass_meta : DownsamplePassMeta;
+@group(1) @binding(1) var<storage, read_write> spd_global_counter: array<atomic<u32>>;
+@group(1) @binding(2) var<storage, read_write> mip_dst_6_buffer: array<array<array<vec4<f32>, 64>, 64>>;
+
+fn get_mips() -> u32 {
+    return downsample_pass_meta.mips;
+}
+
+fn get_num_work_groups() -> u32 {
+    return downsample_pass_meta.num_work_groups;
+}
+
+fn get_work_group_offset() -> vec2<u32> {
+    return downsample_pass_meta.work_group_offset;
+}
+
+fn load_src_image(uv: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
+    return vec4<SPDScalar>(textureLoad(src_mip_0, uv, slice, 0));
+}
+
+fn load_mid_mip_image(uv: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
+    )";
+    if (numMips > 6) {
+        ss << midMipAccessor;
+    } else {
+        ss << "return vec4<SPDScalar>();";
+    }
+    ss << R"(
+}
+
+)" << mipsAccessor << R"(
+
+// Workgroup -----------------------------------------------------------------------------------------------------------
+
+var<workgroup> spd_intermediate: array<array<vec4<SPDScalar>, 16>, 16>;
+var<workgroup> spd_counter: atomic<u32>;
+
+fn spd_increase_atomic_counter(slice: u32) {
+    atomicStore(&spd_counter, atomicAdd(&spd_global_counter[slice], 1));
+}
+
+fn spd_get_atomic_counter() -> u32 {
+    return atomicLoad(&spd_counter);
+}
+
+fn spd_reset_atomic_counter(slice: u32) {
+    atomicStore(&spd_global_counter[slice], 0);
+}
+
+// Cotnrol flow --------------------------------------------------------------------------------------------------------
+
+fn spd_barrier() {
+    // in glsl this does: groupMemoryBarrier(); barrier();
+    workgroupBarrier();
+}
+
+// Only last active workgroup should proceed
+fn spd_exit_workgroup(num_work_groups: u32, local_invocation_index: u32, slice: u32) -> bool {
+    // global atomic counter
+    if (local_invocation_index == 0) {
+        spd_increase_atomic_counter(slice);
+    }
+    spd_barrier();
+    return spd_get_atomic_counter() != (num_work_groups - 1);
+}
+
+// Pixel access --------------------------------------------------------------------------------------------------------
+
+)" << filterCode << R"(
+
+fn spd_store(pix: vec2<u32>, out_value: vec4<SPDScalar>, mip: u32, slice: u32) {
+    store_dst_mip(out_value, pix, slice, mip + 1);
+}
+
+fn spd_load_intermediate(x: u32, y: u32) -> vec4<SPDScalar> {
+    return spd_intermediate[x][y];
+}
+
+fn spd_store_intermediate(x: u32, y: u32, value: vec4<SPDScalar>) {
+    spd_intermediate[x][y] = value;
+}
+
+fn spd_reduce_intermediate(i0: vec2<u32>, i1: vec2<u32>, i2: vec2<u32>, i3: vec2<u32>) -> vec4<SPDScalar> {
+    let v0 = spd_load_intermediate(i0.x, i0.y);
+    let v1 = spd_load_intermediate(i1.x, i1.y);
+    let v2 = spd_load_intermediate(i2.x, i2.y);
+    let v3 = spd_load_intermediate(i3.x, i3.y);
+    return spd_reduce_4(v0, v1, v2, v3);
+}
+
+fn spd_reduce_load_4(base: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
+    let v0 = load_src_image(base + vec2<u32>(0, 0), slice);
+    let v1 = load_src_image(base + vec2<u32>(0, 1), slice);
+    let v2 = load_src_image(base + vec2<u32>(1, 0), slice);
+    let v3 = load_src_image(base + vec2<u32>(1, 1), slice);
+    return spd_reduce_4(v0, v1, v2, v3);
+}
+
+fn spd_reduce_load_mid_mip_4(base: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
+    let v0 = load_mid_mip_image(base + vec2<u32>(0, 0), slice);
+    let v1 = load_mid_mip_image(base + vec2<u32>(0, 1), slice);
+    let v2 = load_mid_mip_image(base + vec2<u32>(1, 0), slice);
+    let v3 = load_mid_mip_image(base + vec2<u32>(1, 1), slice);
+    return spd_reduce_4(v0, v1, v2, v3);
+}
+
+// Main logic ---------------------------------------------------------------------------------------------------------
+
+fn spd_downsample_mips_0_1(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    var v: array<vec4<SPDScalar>, 4>;
+
+    let workgroup64 = workgroup_id.xy * 64;
+    let workgroup32 = workgroup_id.xy * 32;
+    let workgroup16 = workgroup_id.xy * 16;
+
+    var tex = workgroup64 + vec2<u32>(x * 2, y * 2);
+    var pix = workgroup32 + vec2<u32>(x, y);
+    v[0] = spd_reduce_load_4(tex, slice);
+    spd_store(pix, v[0], 0, slice);
+
+    tex = workgroup64 + vec2<u32>(x * 2 + 32, y * 2);
+    pix = workgroup32 + vec2<u32>(x + 16, y);
+    v[1] = spd_reduce_load_4(tex, slice);
+    spd_store(pix, v[1], 0, slice);
+
+    tex = workgroup64 + vec2<u32>(x * 2, y * 2 + 32);
+    pix = workgroup32 + vec2<u32>(x, y + 16);
+    v[2] = spd_reduce_load_4(tex, slice);
+    spd_store(pix, v[2], 0, slice);
+
+    tex = workgroup64 + vec2<u32>(x * 2 + 32, y * 2 + 32);
+    pix = workgroup32 + vec2<u32>(x + 16, y + 16);
+    v[3] = spd_reduce_load_4(tex, slice);
+    spd_store(pix, v[3], 0, slice);
+
+    if mip <= 1 {
+        return;
+    }
+
+    for (var i = 0u; i < 4u; i++) {
+        spd_store_intermediate(x, y, v[i]);
+        spd_barrier();
+        if local_invocation_index < 64 {
+            v[i] = spd_reduce_intermediate(
+                vec2<u32>(x * 2 + 0, y * 2 + 0),
+                vec2<u32>(x * 2 + 1, y * 2 + 0),
+                vec2<u32>(x * 2 + 0, y * 2 + 1),
+                vec2<u32>(x * 2 + 1, y * 2 + 1)
+            );
+            spd_store(workgroup16 + vec2<u32>(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
+        }
+        spd_barrier();
+    }
+
+    if local_invocation_index < 64 {
+        spd_store_intermediate(x + 0, y + 0, v[0]);
+        spd_store_intermediate(x + 8, y + 0, v[1]);
+        spd_store_intermediate(x + 0, y + 8, v[2]);
+        spd_store_intermediate(x + 8, y + 8, v[3]);
+    }
+}
+
+fn spd_downsample_mip_2(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    if local_invocation_index < 64u {
+        let v = spd_reduce_intermediate(
+            vec2<u32>(x * 2 + 0, y * 2 + 0),
+            vec2<u32>(x * 2 + 1, y * 2 + 0),
+            vec2<u32>(x * 2 + 0, y * 2 + 1),
+            vec2<u32>(x * 2 + 1, y * 2 + 1)
+        );
+        spd_store(workgroup_id.xy * 8 + vec2<u32>(x, y), v, mip, slice);
+        // store to LDS, try to reduce bank conflicts
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // ...
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        spd_store_intermediate(x * 2 + y % 2, y * 2, v);
+    }
+}
+
+fn spd_downsample_mip_3(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    if local_invocation_index < 16u {
+        // x 0 x 0
+        // 0 0 0 0
+        // 0 x 0 x
+        // 0 0 0 0
+        let v = spd_reduce_intermediate(
+            vec2<u32>(x * 4 + 0 + 0, y * 4 + 0),
+            vec2<u32>(x * 4 + 2 + 0, y * 4 + 0),
+            vec2<u32>(x * 4 + 0 + 1, y * 4 + 2),
+            vec2<u32>(x * 4 + 2 + 1, y * 4 + 2)
+        );
+        spd_store(workgroup_id.xy * 4 + vec2<u32>(x, y), v, mip, slice);
+        // store to LDS
+        // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
+        // ...
+        // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
+        // ...
+        // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
+        // ...
+        spd_store_intermediate(x * 4 + y, y * 4, v);
+    }
+}
+
+fn spd_downsample_mip_4(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    if local_invocation_index < 4u {
+        // x 0 0 0 x 0 0 0
+        // ...
+        // 0 x 0 0 0 x 0 0
+        let v = spd_reduce_intermediate(
+            vec2<u32>(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
+            vec2<u32>(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
+            vec2<u32>(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
+            vec2<u32>(x * 8 + 4 + 1 + y * 2, y * 8 + 4)
+        );
+        spd_store(workgroup_id.xy * 2 + vec2<u32>(x, y), v, mip, slice);
+        // store to LDS
+        // x x x x 0 ...
+        // 0 ...
+        spd_store_intermediate(x + y * 2, 0, v);
+    }
+}
+
+fn spd_downsample_mip_5(workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    if local_invocation_index < 1u {
+        // x x x x 0 ...
+        // 0 ...
+        let v = spd_reduce_intermediate(vec2<u32>(0, 0), vec2<u32>(1, 0), vec2<u32>(2, 0), vec2<u32>(3, 0));
+        spd_store(workgroup_id.xy, v, mip, slice);
+    }
+}
+
+fn spd_downsample_next_four(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) {
+    if mips <= base_mip {
+        return;
+    }
+    spd_barrier();
+    spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice);
+
+    if mips <= base_mip + 1 {
+        return;
+    }
+    spd_barrier();
+    spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice);
+
+    if mips <= base_mip + 2 {
+        return;
+    }
+    spd_barrier();
+    spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice);
+
+    if mips <= base_mip + 3 {
+        return;
+    }
+    spd_barrier();
+    spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice);
+}
+
+fn spd_downsample_last_four(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32, exit: bool) {
+    if mips <= base_mip {
+        return;
+    }
+    spd_barrier();
+    if !exit {
+        spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice);
+    }
+
+    if mips <= base_mip + 1 {
+        return;
+    }
+    spd_barrier();
+    if !exit {
+        spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice);
+    }
+
+    if mips <= base_mip + 2 {
+        return;
+    }
+    spd_barrier();
+    if !exit {
+        spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice);
+    }
+
+    if mips <= base_mip + 3 {
+        return;
+    }
+    spd_barrier();
+    if !exit {
+        spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice);
+    }
+}
+
+fn spd_downsample_mips_6_7(x: u32, y: u32, mips: u32, slice: u32) {
+    var tex = vec2<u32>(x * 4 + 0, y * 4 + 0);
+    var pix = vec2<u32>(x * 2 + 0, y * 2 + 0);
+    let v0 = spd_reduce_load_mid_mip_4(tex, slice);
+    spd_store(pix, v0, 6, slice);
+
+    tex = vec2<u32>(x * 4 + 2, y * 4 + 0);
+    pix = vec2<u32>(x * 2 + 1, y * 2 + 0);
+    let v1 = spd_reduce_load_mid_mip_4(tex, slice);
+    spd_store(pix, v1, 6, slice);
+
+    tex = vec2<u32>(x * 4 + 0, y * 4 + 2);
+    pix = vec2<u32>(x * 2 + 0, y * 2 + 1);
+    let v2 = spd_reduce_load_mid_mip_4(tex, slice);
+    spd_store(pix, v2, 6, slice);
+
+    tex = vec2<u32>(x * 4 + 2, y * 4 + 2);
+    pix = vec2<u32>(x * 2 + 1, y * 2 + 1);
+    let v3 = spd_reduce_load_mid_mip_4(tex, slice);
+    spd_store(pix, v3, 6, slice);
+
+    if mips <= 7 {
+        return;
+    }
+    // no barrier needed, working on values only from the same thread
+
+    let v = spd_reduce_4(v0, v1, v2, v3);
+    spd_store(vec2<u32>(x, y), v, 7, slice);
+    spd_store_intermediate(x, y, v);
+}
+
+fn spd_downsample_last_6(x: u32, y: u32, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) {
+    if mips <= 6 {
+        return;
+    }
+
+    // increase the global atomic counter for the given slice and check if it's the last remaining thread group:
+    // terminate if not, continue if yes.
+    let exit = spd_exit_workgroup(num_work_groups, local_invocation_index, slice);
+
+    // can't exit directly because subsequent barrier calls break uniform control flow...
+    if !exit {
+        // reset the global atomic counter back to 0 for the next spd dispatch
+        spd_reset_atomic_counter(slice);
+
+        // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
+        // compute MIP level 6 and 7
+        spd_downsample_mips_6_7(x, y, mips, slice);
+    }
+
+    // compute MIP level 8, 9, 10, 11
+    spd_downsample_last_four(x, y, vec2<u32>(0, 0), local_invocation_index, 8, mips, slice, exit);
+}
+
+/// Downsamples a 64x64 tile based on the work group id.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+///
+/// @param [in] workGroupID        index of the work group / thread group
+/// @param [in] localInvocationIndex   index of the thread within the thread group in 1D
+/// @param [in] mips             the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups        the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice             the slice of the input texture
+fn spd_downsample(workgroup_id: vec2<u32>, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) {
+    let xy = map_to_xy(local_invocation_index);
+    spd_downsample_mips_0_1(xy.x, xy.y, workgroup_id, local_invocation_index, mips, slice);
+    spd_downsample_next_four(xy.x, xy.y, workgroup_id, local_invocation_index, 2, mips, slice);
+)";
+    if (numMips > 6) {
+        ss << "    spd_downsample_last_6(xy.x, xy.y, local_invocation_index, mips, num_work_groups, slice);\n";
+    }
+    ss << R"(}
+
+// Entry points -------------------------------------------------------------------------------------------------------
+
+@compute
+@workgroup_size(256, 1, 1)
+fn downsample(@builtin(local_invocation_index) local_invocation_index: u32, @builtin(workgroup_id) workgroup_id: vec3<u32>) {
+    spd_downsample(
+        workgroup_id.xy + get_work_group_offset(),
+        local_invocation_index,
+        get_mips(),
+        get_num_work_groups(),
+        workgroup_id.z
+    );
+}
+)";
+    return ss.str();
+}
+
+
+} // namespace spd
--- a/filament/backend/src/webgpu/SpdMipmapGenerator.h
+++ b/filament/backend/src/webgpu/SpdMipmapGenerator.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include <webgpu/webgpu_cpp.h>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <optional>
+// C++ port of https://github.com/JolifantoBambla/webgpu-spd for early experiments- do not merge like this
+namespace spd {
+
+    // Enum for selecting the downsampling filter.
+    enum class SPDFilter {
+        Average,
+        Min,
+        Max,
+        MinMax
+    };
+
+    // Enum for shader scalar types.
+    enum class SPDScalarType {
+        F32,
+        F16,
+        I32,
+        U32
+    };
+
+    // Configuration for a single mipmap generation pass.
+struct SPDPassConfig {
+    SPDFilter filter = SPDFilter::Average;
+    wgpu::Texture targetTexture = nullptr;
+    uint32_t numMips = 0;
+    bool halfPrecision = false;
+    uint32_t sourceMipLevel = 0;
+};
+    
+    // Holds a pipeline and its corresponding bind group layout.
+    struct SPDPipeline {
+        wgpu::BindGroupLayout mipsBindGroupLayout = nullptr;
+        wgpu::ComputePipeline pipeline = nullptr;
+    };
+
+    // Manages pipeline creation, caching, and execution for mipmap generation.
+    class MipmapGenerator {
+    public:
+        MipmapGenerator(const wgpu::Device& device);
+
+        // Pre-creates pipelines for specified formats and filters.
+        void PreparePipelines(wgpu::TextureFormat format, SPDFilter filter, bool halfPrecision = false);
+
+        // Generates a compute pass for creating mipmaps.
+        void Generate(
+            wgpu::CommandEncoder& commandEncoder,
+            wgpu::Texture srcTexture,
+            const SPDPassConfig& config
+        );
+
+    private:
+        wgpu::Device m_device;
+        wgpu::BindGroupLayout m_internalResourcesBindGroupLayout;
+
+        // Cached pipelines: Map<TextureFormat, Map<SPDScalarType, Map<Filter, Map<NumMips, Pipeline>>>>
+        std::unordered_map<wgpu::TextureFormat,
+            std::unordered_map<SPDScalarType,
+                std::unordered_map<SPDFilter,
+                    std::unordered_map<uint32_t, SPDPipeline>>>> m_pipelines;
+        
+        // Helper methods
+        SPDPipeline& GetOrCreatePipeline(wgpu::TextureFormat format, SPDFilter filter, uint32_t numMips, SPDScalarType scalarType);
+        SPDScalarType SanitizeScalarType(wgpu::TextureFormat format, bool halfPrecision);
+        std::string GetFilterCode(SPDFilter filter);
+    };
+    // Assuming SPD_FILTER_AVERAGE is a string constant
+    const std::string SPD_FILTER_AVERAGE = "value * 0.25"; // Original filter operation
+
+    // Generates the WGSL shader code dynamically.
+    std::string MakeShaderCode(
+        wgpu::TextureFormat outputFormat,
+        const std::string& filterOp = SPD_FILTER_AVERAGE,
+        uint32_t numMips = 0,
+        SPDScalarType scalarType = SPDScalarType::F32
+    );
+
+} // namespace spd
--- a/filament/backend/src/webgpu/WGPUProgram.cpp
+++ b/filament/backend/src/webgpu/WGPUProgram.cpp
@@ -1,203 +0,0 @@
-/*
- * Copyright (C) 2025 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WebGPUHandles.h"
-
-#include "WebGPUConstants.h"
-
-#include "DriverBase.h"
-#include <backend/DriverEnums.h>
-#include <backend/Program.h>
-
-#include <utils/Panic.h>
-#include <utils/ostream.h>
-
-#include <webgpu/webgpu_cpp.h>
-
-#include <algorithm>
-#include <sstream>
-#include <string_view>
-#include <vector>
-
-namespace filament::backend {
-
-namespace {
-
-[[nodiscard]] constexpr std::string_view toString(ShaderStage stage) {
-    switch (stage) {
-        case ShaderStage::VERTEX:
-            return "vertex";
-        case ShaderStage::FRAGMENT:
-            return "fragment";
-        case ShaderStage::COMPUTE:
-            return "compute";
-    }
-}
-
-[[nodiscard]] wgpu::ShaderModule createShaderModule(wgpu::Device& device, const char* programName,
-        std::array<utils::FixedCapacityVector<uint8_t>, Program::SHADER_TYPE_COUNT> const&
-                shaderSource,
-        ShaderStage stage) {
-    utils::FixedCapacityVector<uint8_t> const& sourceBytes =
-            shaderSource[static_cast<size_t>(stage)];
-    if (sourceBytes.empty()) {
-        return nullptr;// nothing to compile, the shader was not provided
-    }
-    wgpu::ShaderModuleWGSLDescriptor wgslDescriptor{};
-    wgslDescriptor.code = wgpu::StringView(reinterpret_cast<const char*>(sourceBytes.data()));
-    std::stringstream labelStream;
-    labelStream << programName << " " << toString(stage) << " shader";
-    auto label = labelStream.str();
-    wgpu::ShaderModuleDescriptor descriptor{
-        .nextInChain = &wgslDescriptor,
-        .label = label.data()
-    };
-    wgpu::ShaderModule module = device.CreateShaderModule(&descriptor);
-    FILAMENT_CHECK_POSTCONDITION(module != nullptr) << "Failed to create " << descriptor.label;
-
-    wgpu::Instance instance = device.GetAdapter().GetInstance();
-    instance.WaitAny(
-            module.GetCompilationInfo(wgpu::CallbackMode::WaitAnyOnly,
-                    [&descriptor](auto const& status,
-                            wgpu::CompilationInfo const* info) {
-                        switch (status) {
-                            case wgpu::CompilationInfoRequestStatus::CallbackCancelled:
-                                FWGPU_LOGW << "Shader compilation info callback cancelled for "
-                                           << descriptor.label << "?" << utils::io::endl;
-                                return;
-                            case wgpu::CompilationInfoRequestStatus::Success:
-                                break;
-                        }
-                        if (info != nullptr) {
-                            std::stringstream errorStream;
-                            int errorCount = 0;
-                            for (size_t msgIndex = 0; msgIndex < info->messageCount; msgIndex++) {
-                                wgpu::CompilationMessage const& message = info->messages[msgIndex];
-                                switch (message.type) {
-                                    case wgpu::CompilationMessageType::Info:
-                                        FWGPU_LOGI << descriptor.label << ": " << message.message
-                                                   << " line#:" << message.lineNum
-                                                   << " linePos:" << message.linePos
-                                                   << " offset:" << message.offset
-                                                   << " length:" << message.length
-                                                   << utils::io::endl;
-                                        break;
-                                    case wgpu::CompilationMessageType::Warning:
-                                        FWGPU_LOGW
-                                                << "Warning compiling " << descriptor.label << ": "
-                                                << message.message << " line#:" << message.lineNum
-                                                << " linePos:" << message.linePos
-                                                << " offset:" << message.offset
-                                                << " length:" << message.length << utils::io::endl;
-                                        break;
-                                    case wgpu::CompilationMessageType::Error:
-                                        errorCount++;
-                                        errorStream << "Error " << errorCount << " : "
-                                                    << std::string_view(message.message)
-                                                    << " line#:" << message.lineNum
-                                                    << " linePos:" << message.linePos
-                                                    << " offset:" << message.offset
-                                                    << " length:" << message.length << "\n";
-                                        break;
-                                }
-                            }
-                            FILAMENT_CHECK_POSTCONDITION(errorCount < 1)
-                                    << errorCount << " error(s) compiling " << descriptor.label
-                                    << ":\n"
-                                    << errorStream.str();
-                        }
-                        FWGPU_LOGD << descriptor.label << " compiled successfully"
-                                   << utils::io::endl;
-                    }),
-            UINT16_MAX);
-    return module;
-}
-
-// This is a 1 to 1 mapping of the ReservedSpecializationConstants enum in EngineEnums.h
-// The _hack is a workaround until https://issues.chromium.org/issues/42250586 is resolved
-// This workaround is the same one being used on the generateSpecializationConstant() function
-wgpu::StringView getSpecConstantStringId(uint32_t id) {
-    switch (id) {
-        case 0:
-            return "0";// BACKEND_FEATURE_LEVEL_hack
-        case 1:
-            return "1";// CONFIG_MAX_INSTANCES_hack
-        case 2:
-            return "2";// ONFIG_STATIC_TEXTURE_TARGET_WORKAROUND_hack
-        case 3:
-            return "3";// CONFIG_SRGB_SWAPCHAIN_EMULATION_hack
-        case 4:
-            return "4";// CONFIG_FROXEL_BUFFER_HEIGHT_hack
-        case 5:
-            return "5";// CONFIG_POWER_VR_SHADER_WORKAROUNDS_hack
-        case 6:
-            return "6";// CONFIG_DEBUG_DIRECTIONAL_SHADOWMAP_hack
-        case 7:
-            return "7";// CONFIG_DEBUG_FROXEL_VISUALIZATION_hack
-        case 8:
-            return "8";// CONFIG_STEREO_EYE_COUNT_hack
-        case 9:
-            return "9";// CONFIG_SH_BANDS_COUNT_hack
-        case 10:
-            return "10";// CONFIG_SHADOW_SAMPLING_METHOD_hack
-        default:
-            PANIC_POSTCONDITION("Unknown/unhandled spec constant key/id: %d", id);
-    }
-}
-
-std::vector<wgpu::ConstantEntry> convertConstants(
-        utils::FixedCapacityVector<filament::backend::Program::SpecializationConstant> const&
-                constantsInfo) {
-    std::vector<wgpu::ConstantEntry> constants;
-    constants.reserve(constantsInfo.size());
-    for (filament::backend::Program::SpecializationConstant const& constant: constantsInfo) {
-        // CONFIG_MAX_INSTANCES (1) and CONFIG_FROXEL_BUFFER_HEIGHT (4) will not be present
-        // as constant overrides in the generated WGSL, because WGSL doesn't support specialization
-        // constants as an array length
-        // More information at https://github.com/gpuweb/gpuweb/issues/572#issuecomment-649760005
-        // CONFIG_SRGB_SWAPCHAIN_EMULATION (3) is being skipped all together since it's only
-        // included for the case of mFeatureLevel == FeatureLevel::FEATURE_LEVEL_0, which should
-        // not be possible for WebGPU
-        if (constant.id == 1 || constant.id == 3 || constant.id == 4) {
-            continue;
-        }
-        double value = 0.0;
-        if (auto* v = std::get_if<int32_t>(&constant.value)) {
-            value = static_cast<double>(*v);
-        } else if (auto* f = std::get_if<float>(&constant.value)) {
-            value = static_cast<double>(*f);
-        } else if (auto* b = std::get_if<bool>(&constant.value)) {
-            value = *b ? 0.0 : 1.0;
-        }
-        constants.push_back(
-                wgpu::ConstantEntry{ .key = getSpecConstantStringId(constant.id), .value = value });
-    }
-    return constants;
-}
-
-}// namespace
-
-WGPUProgram::WGPUProgram(wgpu::Device& device, Program& program)
-    : HwProgram(program.getName()),
-      vertexShaderModule(createShaderModule(device, name.c_str_safe(), program.getShadersSource(),
-              ShaderStage::VERTEX)),
-      fragmentShaderModule(createShaderModule(device, name.c_str_safe(), program.getShadersSource(),
-              ShaderStage::FRAGMENT)),
-      computeShaderModule(createShaderModule(device, name.c_str_safe(), program.getShadersSource(),
-              ShaderStage::COMPUTE)),
-      constants(convertConstants(program.getSpecializationConstants())) {}
-
-}// namespace filament::backend
--- a/filament/backend/src/webgpu/WebGPUBufferBase.cpp
+++ b/filament/backend/src/webgpu/WebGPUBufferBase.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WebGPUBufferBase.h"
+
+#include "WebGPUConstants.h"
+
+#include "DriverBase.h"
+#include <backend/BufferDescriptor.h>
+
+#include <utils/Panic.h>
+#include <utils/debug.h>
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <cstdint>
+#include <cstring>
+
+namespace filament::backend {
+
+namespace {
+
+[[nodiscard]] wgpu::Buffer createBuffer(wgpu::Device const& device, const wgpu::BufferUsage usage,
+        uint32_t size, const char* const label) {
+    // Write size must be divisible by WEBGPU_BUFFER_SIZE_MODULUS (e.g. 4).
+    // If the whole buffer is written to as is common, so must the buffer size.
+    size += (WEBGPU_BUFFER_SIZE_MODULUS - (size % WEBGPU_BUFFER_SIZE_MODULUS)) %
+            WEBGPU_BUFFER_SIZE_MODULUS;
+    wgpu::BufferDescriptor descriptor{
+        .label = label,
+        .usage = usage,
+        .size = size,
+        .mappedAtCreation = false };
+    wgpu::Buffer buffer = device.CreateBuffer(&descriptor);
+    FILAMENT_CHECK_POSTCONDITION(buffer) << "Failed to create buffer for " << label;
+    return buffer;
+}
+
+} // namespace
+
+WebGPUBufferBase::WebGPUBufferBase(wgpu::Device const& device, const wgpu::BufferUsage usage,
+        const uint32_t size, char const* const label)
+    : mBuffer{ createBuffer(device, usage, size, label) } {}
+
+void WebGPUBufferBase::updateGPUBuffer(BufferDescriptor const& bufferDescriptor,
+        const uint32_t byteOffset, wgpu::Queue const& queue) {
+    FILAMENT_CHECK_PRECONDITION(bufferDescriptor.buffer)
+            << "updateGPUBuffer called with a null buffer";
+    FILAMENT_CHECK_PRECONDITION(bufferDescriptor.size + byteOffset <= mBuffer.GetSize())
+            << "Attempting to copy " << bufferDescriptor.size << " bytes into a buffer of size "
+            << mBuffer.GetSize() << " at offset " << byteOffset;
+    FILAMENT_CHECK_PRECONDITION(byteOffset % WEBGPU_BUFFER_SIZE_MODULUS == 0)
+            << "Byte offset must be a multiple of " << WEBGPU_BUFFER_SIZE_MODULUS << " but is "
+            << byteOffset;
+
+    // TODO: All buffer objects are created with CopyDst usage.
+    // This may have some performance implications. That should be investigated later.
+    assert_invariant(mBuffer.GetUsage() & wgpu::BufferUsage::CopyDst);
+
+    const size_t remainder = bufferDescriptor.size % WEBGPU_BUFFER_SIZE_MODULUS;
+
+    // WriteBuffer is an async call. But cpu buffer data is already written to the staging
+    // buffer on return from the WriteBuffer.
+    const size_t legalSize = bufferDescriptor.size - remainder;
+    queue.WriteBuffer(mBuffer, byteOffset, bufferDescriptor.buffer, legalSize);
+    if (remainder != 0) {
+        const uint8_t* remainderStart =
+                static_cast<const uint8_t*>(bufferDescriptor.buffer) + legalSize;
+        memcpy(mRemainderChunk.data(), remainderStart, remainder);
+        // Pad the remainder with zeros to ensure deterministic behavior, though GPU shouldn't
+        // access this
+        std::memset(mRemainderChunk.data() + remainder, 0, WEBGPU_BUFFER_SIZE_MODULUS - remainder);
+
+        queue.WriteBuffer(mBuffer, byteOffset + legalSize, &mRemainderChunk,
+                WEBGPU_BUFFER_SIZE_MODULUS);
+    }
+}
+
+}// namespace filament::backend
--- a/filament/backend/src/webgpu/WebGPUBufferBase.h
+++ b/filament/backend/src/webgpu/WebGPUBufferBase.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_WEBGPUBUFFERBASE_H
+#define TNT_FILAMENT_BACKEND_WEBGPUBUFFERBASE_H
+
+#include "WebGPUConstants.h"
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <array>
+#include <cstdint>
+
+namespace filament::backend {
+
+class BufferDescriptor;
+
+class WebGPUBufferBase /* intended to be extended */ {
+public:
+    void updateGPUBuffer(BufferDescriptor const&, uint32_t byteOffset, wgpu::Queue const&);
+
+    [[nodiscard]] wgpu::Buffer const& getBuffer() const { return mBuffer; }
+
+protected:
+    WebGPUBufferBase(wgpu::Device const&, wgpu::BufferUsage, uint32_t size, char const* label);
+
+private:
+    const wgpu::Buffer mBuffer;
+    // WEBGPU_BUFFER_SIZE_MODULUS (e.g. 4) bytes to hold any extra chunk we need.
+    std::array<uint8_t, WEBGPU_BUFFER_SIZE_MODULUS> mRemainderChunk{};
+};
+
+} // namespace filament::backend
+
+#endif // TNT_FILAMENT_BACKEND_WEBGPUBUFFERBASE_H
--- a/filament/backend/src/webgpu/WebGPUBufferObject.cpp
+++ b/filament/backend/src/webgpu/WebGPUBufferObject.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WebGPUBufferObject.h"
+
+#include "WebGPUBufferBase.h"
+
+#include "DriverBase.h"
+#include <backend/DriverEnums.h>
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <cstdint>
+
+namespace filament::backend {
+
+namespace {
+
+[[nodiscard]] constexpr wgpu::BufferUsage getBufferObjectUsage(
+        const BufferObjectBinding bindingType) noexcept {
+    switch (bindingType) {
+        case BufferObjectBinding::VERTEX:         return wgpu::BufferUsage::Vertex;
+        case BufferObjectBinding::UNIFORM:        return wgpu::BufferUsage::Uniform;
+        case BufferObjectBinding::SHADER_STORAGE: return wgpu::BufferUsage::Storage;
+    }
+}
+
+} // namespace
+
+WebGPUBufferObject::WebGPUBufferObject(wgpu::Device const& device,
+        const BufferObjectBinding bindingType, const uint32_t byteCount)
+    : HwBufferObject{ byteCount },
+      WebGPUBufferBase{ device, wgpu::BufferUsage::CopyDst | getBufferObjectUsage(bindingType),
+          byteCount, "buffer_object" } {}
+
+} // namespace filament::backend
--- a/filament/backend/src/webgpu/WebGPUBufferObject.h
+++ b/filament/backend/src/webgpu/WebGPUBufferObject.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_WEBGPUBUFFEROBJECT_H
+#define TNT_FILAMENT_BACKEND_WEBGPUBUFFEROBJECT_H
+
+#include "WebGPUBufferBase.h"
+
+#include "DriverBase.h"
+
+#include <cstdint>
+
+namespace wgpu {
+class Device;
+}
+
+namespace filament::backend {
+
+enum class BufferObjectBinding : uint8_t;
+
+class WebGPUBufferObject final : public HwBufferObject, public WebGPUBufferBase {
+public:
+    WebGPUBufferObject(wgpu::Device const&, BufferObjectBinding, uint32_t byteCount);
+};
+
+} // namespace filament::backend
+
+#endif // TNT_FILAMENT_BACKEND_WEBGPUBUFFEROBJECT_H
--- a/filament/backend/src/webgpu/WebGPUConstants.h
+++ b/filament/backend/src/webgpu/WebGPUConstants.h
@@ -21,6 +21,8 @@

 #include <cstdint>

+constexpr size_t WEBGPU_BUFFER_SIZE_MODULUS = 4;
+
 // FWGPU is short for Filament WebGPU

 // turn on runtime validation, namely for debugging, that would normally not run (for release)
@@ -63,4 +65,13 @@
    #define FWGPU_LOGI (utils::slog.i)
 #endif

+constexpr uint64_t REQUEST_ADAPTER_TIMEOUT_NANOSECONDS =
+        /* milliseconds */ 1000u * /* converted to ns */ 1000000u;
+
+constexpr uint64_t REQUEST_DEVICE_TIMEOUT_NANOSECONDS =
+        /* milliseconds */ 1000u * /* converted to ns */ 1000000u;
+
+constexpr uint64_t SHADER_COMPILATION_TIMEOUT_NANOSECONDS =
+        /* milliseconds */ 1000u * /* converted to ns */ 1000000u;
+
 #endif// TNT_FILAMENT_BACKEND_WEBGPUCONSTANTS_H
--- a/filament/backend/src/webgpu/WebGPUDescriptorSet.cpp
+++ b/filament/backend/src/webgpu/WebGPUDescriptorSet.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WebGPUDescriptorSet.h"
+
+#include "WebGPUDescriptorSetLayout.h"
+
+#include <backend/DriverEnums.h>
+
+#include <utils/Panic.h>
+#include <utils/debug.h>
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+namespace filament::backend {
+
+namespace {
+
+constexpr uint8_t INVALID_INDEX = MAX_DESCRIPTOR_COUNT + 1;
+
+} // namespace
+
+WebGPUDescriptorSet::WebGPUDescriptorSet(wgpu::BindGroupLayout const& layout,
+        std::vector<WebGPUDescriptorSetLayout::BindGroupEntryInfo> const& bindGroupEntries)
+    : mLayout{ layout },
+      mEntriesWithDynamicOffsetsCount{ static_cast<size_t>(std::count_if(bindGroupEntries.begin(),
+              bindGroupEntries.end(), [](auto const& entry) { return entry.hasDynamicOffset; })) } {
+
+    mEntries.resize(bindGroupEntries.size());
+    for (size_t i = 0; i < bindGroupEntries.size(); ++i) {
+        mEntries[i].binding = bindGroupEntries[i].binding;
+    }
+    // Establish the size of entries based on the layout. This should be reliable and efficient.
+    assert_invariant(INVALID_INDEX > mEntryIndexByBinding.size());
+    for (size_t i = 0; i < mEntryIndexByBinding.size(); i++) {
+        mEntryIndexByBinding[i] = INVALID_INDEX;
+    }
+    for (size_t index = 0; index < mEntries.size(); index++) {
+        wgpu::BindGroupEntry const& entry = mEntries[index];
+        assert_invariant(entry.binding < mEntryIndexByBinding.size());
+        mEntryIndexByBinding[entry.binding] = static_cast<uint8_t>(index);
+    }
+}
+
+void WebGPUDescriptorSet::addEntry(const unsigned int index, wgpu::BindGroupEntry&& entry) {
+    if (mBindGroup) {
+        // We will keep getting hits from future updates, but shouldn't do anything
+        // Filament guarantees this won't change after things have locked.
+        return;
+    }
+    // TODO: Putting some level of trust that Filament is not going to reuse indexes or go past the
+    // layout index for efficiency. Add guards if wrong.
+    FILAMENT_CHECK_POSTCONDITION(index < mEntryIndexByBinding.size())
+            << "impossible/invalid index for a descriptor/binding (our of range or >= "
+               "MAX_DESCRIPTOR_COUNT) "
+            << index;
+    uint8_t entryIndex = mEntryIndexByBinding[index];
+    FILAMENT_CHECK_POSTCONDITION(entryIndex != INVALID_INDEX && entryIndex < mEntries.size())
+            << "Invalid binding " << index;
+    entry.binding = index;
+    mEntries[entryIndex] = std::move(entry);
+}
+
+wgpu::BindGroup WebGPUDescriptorSet::lockAndReturn(wgpu::Device const& device) {
+    if (mBindGroup) {
+        return mBindGroup;
+    }
+    // TODO label? Should we just copy layout label?
+    const wgpu::BindGroupDescriptor descriptor{
+        .layout = mLayout,
+        .entryCount = mEntries.size(),
+        .entries = mEntries.data()
+    };
+    mBindGroup = device.CreateBindGroup(&descriptor);
+    FILAMENT_CHECK_POSTCONDITION(mBindGroup) << "Failed to create bind group?";
+    // once we have created the bind group itself we should no longer need any other state
+    mLayout = nullptr;
+    mEntries.clear();
+    mEntries.shrink_to_fit();
+    return mBindGroup;
+}
+
+WebGPUDescriptorSet::~WebGPUDescriptorSet() {
+    mBindGroup = nullptr;
+    mLayout = nullptr;
+}
+
+} // namespace filament::backend
--- a/filament/backend/src/webgpu/WebGPUDescriptorSet.h
+++ b/filament/backend/src/webgpu/WebGPUDescriptorSet.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_WEBGPUDESCRIPTORSET_H
+#define TNT_FILAMENT_BACKEND_WEBGPUDESCRIPTORSET_H
+
+#include "WebGPUDescriptorSetLayout.h"
+
+#include "DriverBase.h"
+#include <backend/DriverEnums.h>
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <array>
+#include <cstdint>
+#include <vector>
+
+namespace filament::backend {
+
+class WebGPUDescriptorSet final : public HwDescriptorSet {
+public:
+    WebGPUDescriptorSet(wgpu::BindGroupLayout const&,
+            std::vector<WebGPUDescriptorSetLayout::BindGroupEntryInfo> const&);
+    ~WebGPUDescriptorSet();
+
+    void addEntry(unsigned int index, wgpu::BindGroupEntry&& entry);
+
+    [[nodiscard]] wgpu::BindGroup lockAndReturn(wgpu::Device const&);
+
+    [[nodiscard]] bool getIsLocked() const { return mBindGroup != nullptr; }
+
+    [[nodiscard]] size_t getEntitiesWithDynamicOffsetsCount() const {
+        return mEntriesWithDynamicOffsetsCount;
+    }
+
+    // May be nullptr. Use lockAndReturn to create the bind group when appropriate
+    [[nodiscard]] wgpu::BindGroup const& getBindGroup() const { return mBindGroup; }
+
+private:
+    wgpu::BindGroupLayout mLayout = nullptr;
+    std::array<uint8_t, MAX_DESCRIPTOR_COUNT> mEntryIndexByBinding{};
+    std::vector<wgpu::BindGroupEntry> mEntries;
+    const size_t mEntriesWithDynamicOffsetsCount;
+    wgpu::BindGroup mBindGroup = nullptr;
+};
+
+} // namespace filament::backend
+
+#endif // TNT_FILAMENT_BACKEND_WEBGPUDESCRIPTORSET_H
--- a/filament/backend/src/webgpu/WebGPUDescriptorSetLayout.cpp
+++ b/filament/backend/src/webgpu/WebGPUDescriptorSetLayout.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WebGPUDescriptorSetLayout.h"
+
+#include <backend/DriverEnums.h>
+
+#include <utils/BitmaskEnum.h>
+#include <utils/CString.h>
+#include <utils/Panic.h>
+#include <utils/StaticString.h>
+#include <utils/debug.h>
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <algorithm>
+#include <string>
+#include <variant>
+#include <vector>
+
+namespace filament::backend {
+
+namespace {
+
+// Convert Filament Shader Stage Flags bitmask to webgpu equivalent
+[[nodiscard]] wgpu::ShaderStage filamentStageToWGPUStage(const ShaderStageFlags fFlags) {
+    wgpu::ShaderStage retStages = wgpu::ShaderStage::None;
+    if (any(ShaderStageFlags::VERTEX & fFlags)) {
+        retStages |= wgpu::ShaderStage::Vertex;
+    }
+    if (any(ShaderStageFlags::FRAGMENT & fFlags)) {
+        retStages |= wgpu::ShaderStage::Fragment;
+    }
+    if (any(ShaderStageFlags::COMPUTE & fFlags)) {
+        retStages |= wgpu::ShaderStage::Compute;
+    }
+    return retStages;
+}
+
+} // namespace
+
+WebGPUDescriptorSetLayout::WebGPUDescriptorSetLayout(DescriptorSetLayout const& layout,
+        wgpu::Device const& device) {
+    assert_invariant(device);
+
+    std::string baseLabel;
+    if (std::holds_alternative<utils::StaticString>(layout.label)) {
+        const auto& temp = std::get_if<utils::StaticString>(&layout.label);
+        baseLabel = temp->c_str();
+    } else if (std::holds_alternative<utils::CString>(layout.label)) {
+        const auto& temp = std::get_if<utils::CString>(&layout.label);
+        baseLabel = temp->c_str();
+    }
+
+    // TODO: layoutDescriptor has a "Label". Ideally we can get info on what this layout is for
+    // debugging. For now, hack an incrementing value.
+    static int layoutNum = 0;
+
+    const unsigned int samplerCount =
+            std::count_if(layout.bindings.begin(), layout.bindings.end(), [](auto& fEntry) {
+                return DescriptorSetLayoutBinding::isSampler(fEntry.type);
+            });
+
+    std::vector<wgpu::BindGroupLayoutEntry> wEntries;
+    wEntries.reserve(layout.bindings.size() + samplerCount);
+    mBindGroupEntries.reserve(wEntries.capacity());
+
+    for (auto fEntry: layout.bindings) {
+        auto& wEntry = wEntries.emplace_back();
+        auto& entryInfo = mBindGroupEntries.emplace_back();
+        wEntry.visibility = filamentStageToWGPUStage(fEntry.stageFlags);
+        wEntry.binding = fEntry.binding * 2;
+        entryInfo.binding = wEntry.binding;
+
+        switch (fEntry.type) {
+            case DescriptorType::SAMPLER_2D_FLOAT:
+            case DescriptorType::SAMPLER_2D_INT:
+            case DescriptorType::SAMPLER_2D_UINT:
+            case DescriptorType::SAMPLER_2D_DEPTH:
+            case DescriptorType::SAMPLER_2D_ARRAY_FLOAT:
+            case DescriptorType::SAMPLER_2D_ARRAY_INT:
+            case DescriptorType::SAMPLER_2D_ARRAY_UINT:
+            case DescriptorType::SAMPLER_2D_ARRAY_DEPTH:
+            case DescriptorType::SAMPLER_CUBE_FLOAT:
+            case DescriptorType::SAMPLER_CUBE_INT:
+            case DescriptorType::SAMPLER_CUBE_UINT:
+            case DescriptorType::SAMPLER_CUBE_DEPTH:
+            case DescriptorType::SAMPLER_CUBE_ARRAY_FLOAT:
+            case DescriptorType::SAMPLER_CUBE_ARRAY_INT:
+            case DescriptorType::SAMPLER_CUBE_ARRAY_UINT:
+            case DescriptorType::SAMPLER_CUBE_ARRAY_DEPTH:
+            case DescriptorType::SAMPLER_3D_FLOAT:
+            case DescriptorType::SAMPLER_3D_INT:
+            case DescriptorType::SAMPLER_3D_UINT:
+            case DescriptorType::SAMPLER_2D_MS_FLOAT:
+            case DescriptorType::SAMPLER_2D_MS_INT:
+            case DescriptorType::SAMPLER_2D_MS_UINT:
+            case DescriptorType::SAMPLER_2D_MS_ARRAY_FLOAT:
+            case DescriptorType::SAMPLER_2D_MS_ARRAY_INT:
+            case DescriptorType::SAMPLER_2D_MS_ARRAY_UINT: {
+                auto& samplerEntry = wEntries.emplace_back();
+                auto& samplerEntryInfo = mBindGroupEntries.emplace_back();
+                samplerEntry.binding = fEntry.binding * 2 + 1;
+                samplerEntryInfo.binding = samplerEntry.binding;
+                samplerEntry.visibility = wEntry.visibility;
+                wEntry.texture.multisampled = isMultiSampledTypeDescriptor(fEntry.type);
+                // TODO: Set once we have the filtering values
+                if (isDepthDescriptor(fEntry.type)) {
+                    samplerEntry.sampler.type = wgpu::SamplerBindingType::Comparison;
+                } else if (isIntDescriptor(fEntry.type)) {
+                    samplerEntry.sampler.type = wgpu::SamplerBindingType::NonFiltering;
+                } else {
+                    samplerEntry.sampler.type = wgpu::SamplerBindingType::Filtering;
+                }
+                break;
+            }
+            case DescriptorType::UNIFORM_BUFFER: {
+                wEntry.buffer.hasDynamicOffset =
+                        any(fEntry.flags & DescriptorFlags::DYNAMIC_OFFSET);
+                entryInfo.hasDynamicOffset = wEntry.buffer.hasDynamicOffset;
+                wEntry.buffer.type = wgpu::BufferBindingType::Uniform;
+                // TODO: Ideally we fill minBindingSize
+                break;
+            }
+            case DescriptorType::INPUT_ATTACHMENT: {
+                PANIC_POSTCONDITION("Input Attachment is not supported");
+                break;
+            }
+            case DescriptorType::SHADER_STORAGE_BUFFER: {
+                PANIC_POSTCONDITION("Shader storage is not supported");
+                break;
+            }
+            case DescriptorType::SAMPLER_EXTERNAL: {
+                PANIC_POSTCONDITION("External Sampler is not supported");
+                break;
+            }
+        }
+        if (isDepthDescriptor(fEntry.type)) {
+            wEntry.texture.sampleType = wgpu::TextureSampleType::Depth;
+        } else if (isFloatDescriptor(fEntry.type)) {
+            // TODO: Set once we have the filtering values
+            wEntry.texture.sampleType = wgpu::TextureSampleType::Float;
+        } else if (isIntDescriptor(fEntry.type)) {
+            wEntry.texture.sampleType = wgpu::TextureSampleType::Sint;
+        } else if (isUnsignedIntDescriptor(fEntry.type)) {
+            wEntry.texture.sampleType = wgpu::TextureSampleType::Uint;
+        }
+
+        if (is3dTypeDescriptor(fEntry.type)) {
+            wEntry.texture.viewDimension = wgpu::TextureViewDimension::e3D;
+        } else if (is2dTypeDescriptor(fEntry.type)) {
+            wEntry.texture.viewDimension = wgpu::TextureViewDimension::e2D;
+        } else if (is2dArrayTypeDescriptor(fEntry.type)) {
+            wEntry.texture.viewDimension = wgpu::TextureViewDimension::e2DArray;
+        } else if (isCubeTypeDescriptor(fEntry.type)) {
+            wEntry.texture.viewDimension = wgpu::TextureViewDimension::Cube;
+        } else if (isCubeArrayTypeDescriptor(fEntry.type)) {
+            wEntry.texture.viewDimension = wgpu::TextureViewDimension::CubeArray;
+        }
+        // fEntry.count is unused currently
+    }
+    std::string label =  "layout_" + baseLabel + std::to_string(++layoutNum) ;
+    const wgpu::BindGroupLayoutDescriptor layoutDescriptor{
+        .label{label.c_str()}, // Use .c_str() if label needs to be const char*
+        .entryCount = wEntries.size(),
+        .entries = wEntries.data()
+    };
+    mLayout = device.CreateBindGroupLayout(&layoutDescriptor);
+    FILAMENT_CHECK_POSTCONDITION(mLayout)
+            << "Failed to create bind group layout with label " << label;
+}
+
+} // namespace filament::backend
--- a/filament/backend/src/webgpu/WebGPUDescriptorSetLayout.h
+++ b/filament/backend/src/webgpu/WebGPUDescriptorSetLayout.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_WEBGPUDESCRIPTORSETLAYOUT_H
+#define TNT_FILAMENT_BACKEND_WEBGPUDESCRIPTORSETLAYOUT_H
+
+#include "DriverBase.h"
+#include <backend/DriverEnums.h>
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <cstdint>
+#include <vector>
+
+namespace filament::backend {
+
+class WebGPUDescriptorSetLayout final : public HwDescriptorSetLayout {
+public:
+    struct BindGroupEntryInfo final {
+        uint8_t binding = 0;
+        bool hasDynamicOffset = false;
+    };
+
+    WebGPUDescriptorSetLayout(DescriptorSetLayout const&, wgpu::Device const&);
+    ~WebGPUDescriptorSetLayout() = default;
+
+    [[nodiscard]] wgpu::BindGroupLayout const& getLayout() const { return mLayout; }
+
+    [[nodiscard]] std::vector<BindGroupEntryInfo> const& getBindGroupEntries() const {
+        return mBindGroupEntries;
+    }
+
+private:
+    std::vector<BindGroupEntryInfo> mBindGroupEntries;
+    wgpu::BindGroupLayout mLayout = nullptr;
+};
+
+} // namespace filament::backend
+
+#endif // TNT_FILAMENT_BACKEND_WEBGPUDESCRIPTORSETLAYOUT_H
--- a/filament/backend/src/webgpu/WebGPUDriver.cpp
+++ b/filament/backend/src/webgpu/WebGPUDriver.cpp
--- a/filament/backend/src/webgpu/WebGPUDriver.h
+++ b/filament/backend/src/webgpu/WebGPUDriver.h
@@ -17,7 +17,7 @@
 #ifndef TNT_FILAMENT_BACKEND_WEBGPUDRIVER_H
 #define TNT_FILAMENT_BACKEND_WEBGPUDRIVER_H

-#include "WebGPUHandles.h"
+#include "WebGPURenderTarget.h"
 #include "webgpu/WebGPUConstants.h"
 #include <backend/platforms/WebGPUPlatform.h>

@@ -64,18 +64,27 @@ private:
    WebGPUPlatform& mPlatform;
    wgpu::Adapter mAdapter = nullptr;
    wgpu::Device mDevice = nullptr;
-    uint32_t mMinUniformBufferOffsetAlignment;
+    wgpu::Limits mDeviceLimits = {};
    wgpu::Queue mQueue = nullptr;
    void* mNativeWindow = nullptr;
    WebGPUSwapChain* mSwapChain = nullptr;
    uint64_t mNextFakeHandle = 1;
    wgpu::CommandEncoder mCommandEncoder = nullptr;
+    std::vector<Handle<HwTexture>> mMipQueue;
    wgpu::TextureView mTextureView = nullptr;
    wgpu::RenderPassEncoder mRenderPassEncoder = nullptr;
    wgpu::CommandBuffer mCommandBuffer = nullptr;
-    WGPURenderTarget* mDefaultRenderTarget = nullptr;
+    WebGPURenderTarget* mDefaultRenderTarget = nullptr;
+    WebGPURenderTarget* mCurrentRenderTarget = nullptr;

    tsl::robin_map<uint32_t, wgpu::RenderPipeline> mPipelineMap;
+
+    struct DescriptorSetBindingInfo{
+        wgpu::BindGroup bindGroup;
+        size_t offsetCount;
+        backend::DescriptorSetOffsetArray offsets;
+    };
+    std::array<DescriptorSetBindingInfo,MAX_DESCRIPTOR_SET_COUNT> mCurrentDescriptorSets;
    /*
     * Driver interface
     */
--- a/filament/backend/src/webgpu/WebGPUHandles.cpp
+++ b/filament/backend/src/webgpu/WebGPUHandles.cpp
@@ -1,980 +0,0 @@
-/*
- * Copyright (C) 2025 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WebGPUHandles.h"
-
-#include <backend/DriverEnums.h>
-
-#include <utils/BitmaskEnum.h>
-#include <utils/Panic.h>
-#include <private/backend/BackendUtils.h>
-#include <webgpu/webgpu_cpp.h>
-
-#include <algorithm>
-#include <cstdint>
-#include <utility>
-#include <vector>
-
-namespace {
-constexpr wgpu::BufferUsage getBufferObjectUsage(
-        filament::backend::BufferObjectBinding bindingType) noexcept {
-    switch (bindingType) {
-        case filament::backend::BufferObjectBinding::VERTEX:
-            return wgpu::BufferUsage::Vertex;
-        case filament::backend::BufferObjectBinding::UNIFORM:
-            return wgpu::BufferUsage::Uniform;
-        case filament::backend::BufferObjectBinding::SHADER_STORAGE:
-            return wgpu::BufferUsage::Storage;
-    }
-}
-
-wgpu::VertexFormat getVertexFormat(filament::backend::ElementType type, bool normalized, bool integer) {
-    using ElementType = filament::backend::ElementType;
-    using VertexFormat = wgpu::VertexFormat;
-    if (normalized) {
-        switch (type) {
-            // Single Component Types
-            case ElementType::BYTE: return VertexFormat::Snorm8;
-            case ElementType::UBYTE: return VertexFormat::Unorm8;
-            case ElementType::SHORT: return VertexFormat::Snorm16;
-            case ElementType::USHORT: return VertexFormat::Unorm16;
-            // Two Component Types
-            case ElementType::BYTE2: return VertexFormat::Snorm8x2;
-            case ElementType::UBYTE2: return VertexFormat::Unorm8x2;
-            case ElementType::SHORT2: return VertexFormat::Snorm16x2;
-            case ElementType::USHORT2: return VertexFormat::Unorm16x2;
-            // Three Component Types
-            // There is no vertex format type for 3 byte data in webgpu. Use
-            // 4 byte signed normalized type and ignore the last byte.
-            // TODO: This is to be verified.
-            case ElementType::BYTE3: return VertexFormat::Snorm8x4;    // NOT MINSPEC
-            case ElementType::UBYTE3: return VertexFormat::Unorm8x4;   // NOT MINSPEC
-            case ElementType::SHORT3: return VertexFormat::Snorm16x4;  // NOT MINSPEC
-            case ElementType::USHORT3: return VertexFormat::Unorm16x4; // NOT MINSPEC
-            // Four Component Types
-            case ElementType::BYTE4: return VertexFormat::Snorm8x4;
-            case ElementType::UBYTE4: return VertexFormat::Unorm8x4;
-            case ElementType::SHORT4: return VertexFormat::Snorm16x4;
-            case ElementType::USHORT4: return VertexFormat::Unorm16x4;
-            default:
-                FILAMENT_CHECK_POSTCONDITION(false) << "Normalized format does not exist.";
-                return VertexFormat::Float32x3;
-        }
-    }
-    switch (type) {
-        // Single Component Types
-        // There is no direct alternative for SSCALED in webgpu. Convert them to Float32 directly.
-        // This will result in increased memory on the cpu side.
-        // TODO: Is Float16 acceptable instead with some potential accuracy errors?
-        case ElementType::BYTE: return integer ? VertexFormat::Sint8 : VertexFormat::Float32;
-        case ElementType::UBYTE: return integer ? VertexFormat::Uint8 : VertexFormat::Float32;
-        case ElementType::SHORT: return integer ? VertexFormat::Sint16 : VertexFormat::Float32;
-        case ElementType::USHORT: return integer ? VertexFormat::Uint16 : VertexFormat::Float32;
-        case ElementType::HALF: return VertexFormat::Float16;
-        case ElementType::INT: return VertexFormat::Sint32;
-        case ElementType::UINT: return VertexFormat::Uint32;
-        case ElementType::FLOAT: return VertexFormat::Float32;
-        // Two Component Types
-        case ElementType::BYTE2: return integer ? VertexFormat::Sint8x2 : VertexFormat::Float32x2;
-        case ElementType::UBYTE2: return integer ? VertexFormat::Uint8x2 : VertexFormat::Float32x2;
-        case ElementType::SHORT2: return integer ? VertexFormat::Sint16x2 : VertexFormat::Float32x2;
-        case ElementType::USHORT2: return integer ? VertexFormat::Uint16x2 : VertexFormat::Float32x2;
-        case ElementType::HALF2: return VertexFormat::Float16x2;
-        case ElementType::FLOAT2: return VertexFormat::Float32x2;
-        // Three Component Types
-        case ElementType::BYTE3: return VertexFormat::Sint8x4;    // NOT MINSPEC
-        case ElementType::UBYTE3: return VertexFormat::Uint8x4;   // NOT MINSPEC
-        case ElementType::SHORT3: return VertexFormat::Sint16x4;  // NOT MINSPEC
-        case ElementType::USHORT3: return VertexFormat::Uint16x4; // NOT MINSPEC
-        case ElementType::HALF3: return VertexFormat::Float16x4;  // NOT MINSPEC
-        case ElementType::FLOAT3: return VertexFormat::Float32x3;
-        // Four Component Types
-        case ElementType::BYTE4: return integer ? VertexFormat::Sint8x4 : VertexFormat::Float32x4;
-        case ElementType::UBYTE4: return integer ? VertexFormat::Uint8x4 : VertexFormat::Float32x4;
-        case ElementType::SHORT4: return integer ? VertexFormat::Sint16x4 : VertexFormat::Float32x4;
-        case ElementType::USHORT4: return integer ? VertexFormat::Uint16x4 : VertexFormat::Float32x4;
-        case ElementType::HALF4: return VertexFormat::Float16x4;
-        case ElementType::FLOAT4: return VertexFormat::Float32x4;
-    }
-}
-
-wgpu::StringView getUserTextureLabel(filament::backend::SamplerType target) {
-    // TODO will be helpful to get more useful info than this
-    using filament::backend::SamplerType;
-    switch (target) {
-        case SamplerType::SAMPLER_2D:
-            return "a_2D_user_texture";
-        case SamplerType::SAMPLER_2D_ARRAY:
-            return "a_2D_array_user_texture";
-        case SamplerType::SAMPLER_CUBEMAP:
-            return "a_cube_map_user_texture";
-        case SamplerType::SAMPLER_EXTERNAL:
-            return "an_external_user_texture";
-        case SamplerType::SAMPLER_3D:
-            return "a_3D_user_texture";
-        case SamplerType::SAMPLER_CUBEMAP_ARRAY:
-            return "a_cube_map_array_user_texture";
-    }
-}
-
-wgpu::StringView getUserTextureViewLabel(filament::backend::SamplerType target) {
-    // TODO will be helpful to get more useful info than this
-    using filament::backend::SamplerType;
-    switch (target) {
-        case SamplerType::SAMPLER_2D:
-            return "a_2D_user_texture_view";
-        case SamplerType::SAMPLER_2D_ARRAY:
-            return "a_2D_array_user_texture_view";
-        case SamplerType::SAMPLER_CUBEMAP:
-            return "a_cube_map_user_texture_view";
-        case SamplerType::SAMPLER_EXTERNAL:
-            return "an_external_user_texture_view";
-        case SamplerType::SAMPLER_3D:
-            return "a_3D_user_texture_view";
-        case SamplerType::SAMPLER_CUBEMAP_ARRAY:
-            return "a_cube_map_array_user_texture_view";
-    }
-}
-
-}// namespace
-
-namespace filament::backend {
-
-void WGPUBufferBase::createBuffer(const wgpu::Device& device, wgpu::BufferUsage usage,
-        uint32_t size, const char* label) {
-    // Write size must be divisible by 4. If the whole buffer is written to as is common, so must
-    // the buffer size.
-    size += (4 - (size % 4)) % 4;
-    wgpu::BufferDescriptor descriptor{ .label = label,
-        .usage = usage,
-        .size = size,
-        .mappedAtCreation = false };
-    buffer = device.CreateBuffer(&descriptor);
-}
-
-void WGPUBufferBase::updateGPUBuffer(BufferDescriptor& bufferDescriptor, uint32_t byteOffset,
-        wgpu::Queue queue) {
-    FILAMENT_CHECK_PRECONDITION(bufferDescriptor.buffer)
-            << "copyIntoBuffer called with a null buffer";
-    FILAMENT_CHECK_PRECONDITION(bufferDescriptor.size + byteOffset <= buffer.GetSize())
-            << "Attempting to copy " << bufferDescriptor.size << " bytes into a buffer of size "
-            << buffer.GetSize() << " at offset " << byteOffset;
-    FILAMENT_CHECK_PRECONDITION(byteOffset % 4 == 0)
-            << "Byte offset must be a multiple of 4 but is " << byteOffset;
-
-    // TODO: All buffer objects are created with CopyDst usage.
-    // This may have some performance implications. That should be investigated later.
-    assert_invariant(buffer.GetUsage() & wgpu::BufferUsage::CopyDst);
-
-    size_t remainder = bufferDescriptor.size % 4;
-
-    // WriteBuffer is an async call. But cpu buffer data is already written to the staging
-    // buffer on return from the WriteBuffer.
-    auto legalSize = bufferDescriptor.size - remainder;
-    queue.WriteBuffer(buffer, byteOffset, bufferDescriptor.buffer, legalSize);
-    if (remainder != 0) {
-        const uint8_t* remainderStart =
-                static_cast<const uint8_t*>(bufferDescriptor.buffer) + legalSize;
-        memcpy(mRemainderChunk.data(), remainderStart, remainder);
-        // Pad the remainder with zeros to ensure deterministic behavior, though GPU shouldn't
-        // access this
-        std::memset(mRemainderChunk.data() + remainder, 0, 4 - remainder);
-
-        queue.WriteBuffer(buffer, byteOffset + legalSize, &mRemainderChunk, 4);
-    }
-}
-WGPUVertexBufferInfo::WGPUVertexBufferInfo(uint8_t bufferCount, uint8_t attributeCount,
-        AttributeArray const& attributes)
-    : HwVertexBufferInfo(bufferCount, attributeCount),
-      mVertexBufferLayout(bufferCount),
-      mAttributes(bufferCount) {
-    assert_invariant(attributeCount > 0);
-    assert_invariant(bufferCount > 0);
-    for (uint32_t attribIndex = 0; attribIndex < attributes.size(); attribIndex++) {
-        Attribute const& attrib = attributes[attribIndex];
-        // Ignore the attributes which are not bind to vertex buffers.
-        if (attrib.buffer == Attribute::BUFFER_UNUSED) {
-            continue;
-        }
-
-        assert_invariant(attrib.buffer < bufferCount);
-        bool const isInteger = attrib.flags & Attribute::FLAG_INTEGER_TARGET;
-        bool const isNormalized = attrib.flags & Attribute::FLAG_NORMALIZED;
-        wgpu::VertexFormat vertexFormat = getVertexFormat(attrib.type, isNormalized, isInteger);
-
-        // Attributes are sequential per buffer
-        mAttributes[attrib.buffer].push_back({
-            .format = vertexFormat,
-            .offset = attrib.offset,
-            .shaderLocation = attribIndex,
-        });
-
-        mVertexBufferLayout[attrib.buffer].stepMode = wgpu::VertexStepMode::Vertex;
-        if (mVertexBufferLayout[attrib.buffer].arrayStride == 0) {
-            mVertexBufferLayout[attrib.buffer].arrayStride = attrib.stride;
-        } else {
-            assert_invariant(mVertexBufferLayout[attrib.buffer].arrayStride == attrib.stride);
-        }
-    }
-
-    for (uint32_t bufferIndex = 0; bufferIndex < bufferCount; bufferIndex++) {
-        mVertexBufferLayout[bufferIndex].attributeCount = mAttributes[bufferIndex].size();
-        mVertexBufferLayout[bufferIndex].attributes = mAttributes[bufferIndex].data();
-    }
-}
-
-WGPUIndexBuffer::WGPUIndexBuffer(wgpu::Device const& device, uint8_t elementSize,
-        uint32_t indexCount)
-    : indexFormat(elementSize == 2 ? wgpu::IndexFormat::Uint16 : wgpu::IndexFormat::Uint32) {
-    createBuffer(device, wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::Index,
-            elementSize * indexCount, "index_buffer");
-}
-
-
-WGPUVertexBuffer::WGPUVertexBuffer(wgpu::Device const& device, uint32_t vertexCount,
-        uint32_t bufferCount, Handle<HwVertexBufferInfo> vbih)
-    : HwVertexBuffer(vertexCount),
-      vbih(vbih),
-      buffers(bufferCount) {}
-
-WGPUBufferObject::WGPUBufferObject(wgpu::Device const& device, BufferObjectBinding bindingType,
-        uint32_t byteCount)
-    : HwBufferObject(byteCount) {
-    createBuffer(device, wgpu::BufferUsage::CopyDst | getBufferObjectUsage(bindingType), byteCount,
-            "buffer_object");
-}
-
-wgpu::ShaderStage WebGPUDescriptorSetLayout::filamentStageToWGPUStage(ShaderStageFlags fFlags) {
-    wgpu::ShaderStage retStages = wgpu::ShaderStage::None;
-    if (any(ShaderStageFlags::VERTEX & fFlags)) {
-        retStages |= wgpu::ShaderStage::Vertex;
-    }
-    if (any(ShaderStageFlags::FRAGMENT & fFlags)) {
-        retStages |= wgpu::ShaderStage::Fragment;
-    }
-    if (any(ShaderStageFlags::COMPUTE & fFlags)) {
-        retStages |= wgpu::ShaderStage::Compute;
-    }
-    return retStages;
-}
-
-WebGPUDescriptorSetLayout::WebGPUDescriptorSetLayout(DescriptorSetLayout const& layout,
-        wgpu::Device const& device) {
-    assert_invariant(device);
-
-    std::string baseLabel;
-    if (std::holds_alternative<utils::StaticString>(layout.label)) {
-        const auto& temp = std::get_if<utils::StaticString>(&layout.label);
-        baseLabel = temp->c_str();
-    } else if (std::holds_alternative<utils::CString>(layout.label)) {
-        const auto& temp = std::get_if<utils::CString>(&layout.label);
-        baseLabel = temp->c_str();
-    }
-
-    // TODO: layoutDescriptor has a "Label". Ideally we can get info on what this layout is for
-    // debugging. For now, hack an incrementing value.
-    static int layoutNum = 0;
-
-    unsigned int samplerCount =
-            std::count_if(layout.bindings.begin(), layout.bindings.end(), [](auto& fEntry) {
-                return DescriptorSetLayoutBinding::isSampler(fEntry.type);
-            });
-
-
-    std::vector<wgpu::BindGroupLayoutEntry> wEntries;
-    wEntries.reserve(layout.bindings.size() + samplerCount);
-    mBindGroupEntries.reserve(wEntries.capacity());
-
-    for (auto fEntry: layout.bindings) {
-        auto& wEntry = wEntries.emplace_back();
-        auto& entryInfo = mBindGroupEntries.emplace_back();
-        wEntry.visibility = filamentStageToWGPUStage(fEntry.stageFlags);
-        wEntry.binding = fEntry.binding * 2;
-        entryInfo.binding = wEntry.binding;
-
-        switch (fEntry.type) {
-            case DescriptorType::SAMPLER_2D_FLOAT:
-            case DescriptorType::SAMPLER_2D_INT:
-            case DescriptorType::SAMPLER_2D_UINT:
-            case DescriptorType::SAMPLER_2D_DEPTH:
-            case DescriptorType::SAMPLER_2D_ARRAY_FLOAT:
-            case DescriptorType::SAMPLER_2D_ARRAY_INT:
-            case DescriptorType::SAMPLER_2D_ARRAY_UINT:
-            case DescriptorType::SAMPLER_2D_ARRAY_DEPTH:
-            case DescriptorType::SAMPLER_CUBE_FLOAT:
-            case DescriptorType::SAMPLER_CUBE_INT:
-            case DescriptorType::SAMPLER_CUBE_UINT:
-            case DescriptorType::SAMPLER_CUBE_DEPTH:
-            case DescriptorType::SAMPLER_CUBE_ARRAY_FLOAT:
-            case DescriptorType::SAMPLER_CUBE_ARRAY_INT:
-            case DescriptorType::SAMPLER_CUBE_ARRAY_UINT:
-            case DescriptorType::SAMPLER_CUBE_ARRAY_DEPTH:
-            case DescriptorType::SAMPLER_3D_FLOAT:
-            case DescriptorType::SAMPLER_3D_INT:
-            case DescriptorType::SAMPLER_3D_UINT:
-            case DescriptorType::SAMPLER_2D_MS_FLOAT:
-            case DescriptorType::SAMPLER_2D_MS_INT:
-            case DescriptorType::SAMPLER_2D_MS_UINT:
-            case DescriptorType::SAMPLER_2D_MS_ARRAY_FLOAT:
-            case DescriptorType::SAMPLER_2D_MS_ARRAY_INT:
-            case DescriptorType::SAMPLER_2D_MS_ARRAY_UINT: {
-                auto& samplerEntry = wEntries.emplace_back();
-                auto& samplerEntryInfo = mBindGroupEntries.emplace_back();
-                samplerEntry.binding = fEntry.binding * 2 + 1;
-                samplerEntryInfo.binding = samplerEntry.binding;
-                samplerEntry.visibility = wEntry.visibility;
-                wEntry.texture.multisampled = isMultiSampledTypeDescriptor(fEntry.type);
-                // TODO: Set once we have the filtering values
-                if (isDepthDescriptor(fEntry.type)) {
-                    samplerEntry.sampler.type = wgpu::SamplerBindingType::Comparison;
-                } else {
-                    samplerEntry.sampler.type =
-                            wgpu::SamplerBindingType::NonFiltering;
-                }
-                break;
-            }
-            case DescriptorType::UNIFORM_BUFFER: {
-                wEntry.buffer.hasDynamicOffset =
-                        any(fEntry.flags & DescriptorFlags::DYNAMIC_OFFSET);
-                entryInfo.hasDynamicOffset = wEntry.buffer.hasDynamicOffset;
-                wEntry.buffer.type = wgpu::BufferBindingType::Uniform;
-                // TODO: Ideally we fill minBindingSize
-                break;
-            }
-            case DescriptorType::INPUT_ATTACHMENT: {
-                PANIC_POSTCONDITION("Input Attachment is not supported");
-                break;
-            }
-            case DescriptorType::SHADER_STORAGE_BUFFER: {
-                PANIC_POSTCONDITION("Shader storage is not supported");
-                break;
-            }
-            case DescriptorType::SAMPLER_EXTERNAL: {
-                PANIC_POSTCONDITION("External Sampler is not supported");
-                break;
-            }
-        }
-        if (isDepthDescriptor(fEntry.type))
-        {
-            wEntry.texture.sampleType = wgpu::TextureSampleType::Depth;
-        }
-        else if (isFloatDescriptor(fEntry.type))
-        {
-            // TODO: Set once we have the filtering values
-            wEntry.texture.sampleType = wgpu::TextureSampleType::UnfilterableFloat;
-        }
-        else if (isIntDescriptor(fEntry.type))
-        {
-            wEntry.texture.sampleType = wgpu::TextureSampleType::Sint;
-        }
-        else if (isUnsignedIntDescriptor(fEntry.type))
-        {
-            wEntry.texture.sampleType = wgpu::TextureSampleType::Uint;
-        }
-
-        if (is3dTypeDescriptor(fEntry.type))
-        {
-            wEntry.texture.viewDimension = wgpu::TextureViewDimension::e3D;
-        }
-        else if (is2dTypeDescriptor(fEntry.type))
-        {
-            wEntry.texture.viewDimension = wgpu::TextureViewDimension::e2D;
-        }
-        else if (is2dArrayTypeDescriptor(fEntry.type))
-        {
-            wEntry.texture.viewDimension = wgpu::TextureViewDimension::e2DArray;
-        }
-        else if (isCubeTypeDescriptor(fEntry.type))
-        {
-            wEntry.texture.viewDimension = wgpu::TextureViewDimension::Cube;
-        }
-        else if (isCubeArrayTypeDescriptor(fEntry.type))
-        {
-            wEntry.texture.viewDimension = wgpu::TextureViewDimension::CubeArray;
-        }
-        // fEntry.count is unused currently
-    }
-    std::string label =  "layout_" + baseLabel + std::to_string(++layoutNum) ;
-    wgpu::BindGroupLayoutDescriptor layoutDescriptor{
-        .label{label.c_str()}, // Use .c_str() if label needs to be const char*
-        .entryCount = wEntries.size(),
-        .entries = wEntries.data()
-    };
-    mLayout = device.CreateBindGroupLayout(&layoutDescriptor);
-}
-
-WebGPUDescriptorSetLayout::~WebGPUDescriptorSetLayout() {}
-
-WebGPUDescriptorSet::WebGPUDescriptorSet(wgpu::BindGroupLayout const& layout,
-        std::vector<WebGPUDescriptorSetLayout::BindGroupEntryInfo> const& bindGroupEntries)
-    : mLayout(layout),
-      mEntriesWithDynamicOffsetsCount(std::count_if(bindGroupEntries.begin(),
-              bindGroupEntries.end(), [](auto const& entry) { return entry.hasDynamicOffset; })) {
-
-    mEntries.resize(bindGroupEntries.size());
-    for (size_t i = 0; i < bindGroupEntries.size(); ++i) {
-        mEntries[i].binding = bindGroupEntries[i].binding;
-    }
-    // Establish the size of entries based on the layout. This should be reliable and efficient.
-    assert_invariant(INVALID_INDEX > mEntryIndexByBinding.size());
-    for (size_t i = 0; i < mEntryIndexByBinding.size(); i++) {
-        mEntryIndexByBinding[i] = INVALID_INDEX;
-    }
-    for (size_t index = 0; index < mEntries.size(); index++) {
-        wgpu::BindGroupEntry const& entry = mEntries[index];
-        assert_invariant(entry.binding < mEntryIndexByBinding.size());
-        mEntryIndexByBinding[entry.binding] = static_cast<uint8_t>(index);
-    }
-}
-
-WebGPUDescriptorSet::~WebGPUDescriptorSet() {
-    mBindGroup = nullptr;
-    mLayout = nullptr;
-}
-
-wgpu::BindGroup WebGPUDescriptorSet::lockAndReturn(const wgpu::Device& device) {
-    if (mBindGroup) {
-        return mBindGroup;
-    }
-    // TODO label? Should we just copy layout label?
-    wgpu::BindGroupDescriptor desc{
-        .layout = mLayout,
-        .entryCount = mEntries.size(),
-        .entries = mEntries.data()
-    };
-    mBindGroup = device.CreateBindGroup(&desc);
-    FILAMENT_CHECK_POSTCONDITION(mBindGroup) << "Failed to create bind group?";
-    // once we have created the bind group itself we should no longer need any other state
-    mLayout = nullptr;
-    mEntries.clear();
-    mEntries.shrink_to_fit();
-    return mBindGroup;
-}
-
-void WebGPUDescriptorSet::addEntry(unsigned int index, wgpu::BindGroupEntry&& entry) {
-    if (mBindGroup) {
-        // We will keep getting hits from future updates, but shouldn't do anything
-        // Filament guarantees this won't change after things have locked.
-        return;
-    }
-    // TODO: Putting some level of trust that Filament is not going to reuse indexes or go past the
-    // layout index for efficiency. Add guards if wrong.
-    FILAMENT_CHECK_POSTCONDITION(index < mEntryIndexByBinding.size())
-            << "impossible/invalid index for a descriptor/binding (our of range or >= "
-               "MAX_DESCRIPTOR_COUNT) "
-            << index;
-    uint8_t entryIndex = mEntryIndexByBinding[index];
-    FILAMENT_CHECK_POSTCONDITION(
-            entryIndex != INVALID_INDEX && entryIndex < mEntries.size())
-            << "Invalid binding " << index;
-    entry.binding = index;
-    mEntries[entryIndex] = std::move(entry);
-}
-
-size_t WebGPUDescriptorSet::countEntitiesWithDynamicOffsets() const {
-    return mEntriesWithDynamicOffsetsCount;
-}
-
-WGPUTexture::WGPUTexture(SamplerType target, uint8_t levels, TextureFormat format, uint8_t samples,
-        uint32_t width, uint32_t height, uint32_t depth, TextureUsage usage,
-        wgpu::Device const& device) noexcept {
-    assert_invariant(
-            samples == 1 ||
-            samples == 4 &&
-                    "An invalid number of samples were requested, as WGPU requires the sample "
-                    "count to either be 1 (no multisampling) or 4, at least as of April 2025 of "
-                    "the spec. See https://www.w3.org/TR/webgpu/#texture-creation or "
-                    "https://gpuweb.github.io/gpuweb/#multisample-state");
-    // First, the texture aspect, starting with the defaults/basic configuration
-    mUsage = fToWGPUTextureUsage(usage);
-    mFormat = fToWGPUTextureFormat(format);
-    mAspect = fToWGPUTextureViewAspect(usage, format);
-    mBlockWidth = filament::backend::getBlockWidth(format);
-    mBlockHeight = filament::backend::getBlockHeight(format);
-    wgpu::TextureDescriptor textureDescriptor{
-        .label = getUserTextureLabel(target),
-        .usage = mUsage,
-        .dimension = target == SamplerType::SAMPLER_3D ? wgpu::TextureDimension::e3D
-                                                       : wgpu::TextureDimension::e2D,
-        .size = { .width = width, .height = height, .depthOrArrayLayers = depth },
-        .format = mFormat,
-        .mipLevelCount = levels,
-        .sampleCount = samples,
-        // TODO Is this fine? Could do all-the-things, a naive mapping or get something from
-        // Filament
-        .viewFormatCount = 0,
-        .viewFormats = nullptr,
-    };
-    // adjust for specific cases
-    switch (target) {
-        case SamplerType::SAMPLER_2D:
-            mArrayLayerCount = 1;
-            break;
-        case SamplerType::SAMPLER_2D_ARRAY:
-            mArrayLayerCount = textureDescriptor.size.depthOrArrayLayers;
-            break;
-        case SamplerType::SAMPLER_CUBEMAP:
-            textureDescriptor.size.depthOrArrayLayers = 6;
-            mArrayLayerCount = textureDescriptor.size.depthOrArrayLayers;
-            break;
-        case SamplerType::SAMPLER_EXTERNAL:
-        case SamplerType::SAMPLER_3D:
-            mArrayLayerCount = 1;
-            break;
-        case SamplerType::SAMPLER_CUBEMAP_ARRAY:
-            textureDescriptor.size.depthOrArrayLayers = depth * 6;
-            mArrayLayerCount = textureDescriptor.size.depthOrArrayLayers;
-            break;
-    }
-    assert_invariant(textureDescriptor.format != wgpu::TextureFormat::Undefined &&
-                     "Could not find appropriate WebGPU format");
-    mTexture = device.CreateTexture(&textureDescriptor);
-    FILAMENT_CHECK_POSTCONDITION(mTexture)
-            << "Failed to create texture for " << textureDescriptor.label;
-    // Second, the texture view aspect
-    mTexView = makeTextureView(0, levels, target);
-}
-
-WGPUTexture::WGPUTexture(WGPUTexture* src, uint8_t baseLevel, uint8_t levelCount) noexcept {
-    mTexture = src->mTexture;
-    mAspect = src->mAspect;
-    mBlockWidth = src->mBlockWidth;
-    mBlockHeight = src->mBlockHeight;
-
-    mTexView = makeTextureView(baseLevel, levelCount, target);
-}
-
-wgpu::TextureUsage WGPUTexture::fToWGPUTextureUsage(TextureUsage const& fUsage) {
-    wgpu::TextureUsage retUsage = wgpu::TextureUsage::None;
-
-    // Basing this mapping off of VulkanTexture.cpp's getUsage func and suggestions from Gemini
-    // TODO Validate assumptions, revisit if issues.
-    if (any(TextureUsage::BLIT_SRC & fUsage)) {
-        retUsage |= wgpu::TextureUsage::CopySrc;
-    }
-    if (any((TextureUsage::BLIT_DST | TextureUsage::UPLOADABLE) & fUsage)) {
-        retUsage |= wgpu::TextureUsage::CopyDst;
-    }
-    if (any(TextureUsage::SAMPLEABLE & fUsage)) {
-        retUsage |= wgpu::TextureUsage::TextureBinding;
-    }
-    // WGPU Render attachment covers either color or stencil situation dependant
-    // NOTE: Depth attachment isn't used this way in Vulkan but logically maps to WGPU docs. If
-    // issues, investigate here.
-    if (any((TextureUsage::COLOR_ATTACHMENT | TextureUsage::STENCIL_ATTACHMENT |
-                    TextureUsage::DEPTH_ATTACHMENT) &
-                fUsage)) {
-        retUsage |= wgpu::TextureUsage::RenderAttachment;
-    }
-
-    // This is from Vulkan logic- if there are any issues try disabling this first, allows perf
-    // benefit though
-    const bool useTransientAttachment =
-            // Usage consists of attachment flags only.
-            none(fUsage & ~TextureUsage::ALL_ATTACHMENTS) &&
-            // Usage contains at least one attachment flag.
-            any(fUsage & TextureUsage::ALL_ATTACHMENTS) &&
-            // Depth resolve cannot use transient attachment because it uses a custom shader.
-            // TODO: see VulkanDriver::isDepthStencilResolveSupported() to know when to remove this
-            // restriction.
-            // Note that the custom shader does not resolve stencil. We do need to move to vk 1.2
-            // and above to be able to support stencil resolve (along with depth).
-            !(any(fUsage & TextureUsage::DEPTH_ATTACHMENT) && samples > 1);
-    if (useTransientAttachment) {
-        retUsage |= wgpu::TextureUsage::TransientAttachment;
-    }
-    // NOTE: Unused wgpu flags:
-    //  StorageBinding
-    //  StorageAttachment
-
-    // NOTE: Unused Filament flags:
-    //  SUBPASS_INPUT VK goes to input attachment which we don't support right now
-    //  PROTECTED
-    return retUsage;
-}
-
-wgpu::TextureFormat WGPUTexture::fToWGPUTextureFormat(TextureFormat const& fFormat) {
-    switch (fFormat) {
-        case filament::backend::TextureFormat::R8:
-            return wgpu::TextureFormat::R8Unorm;
-        case filament::backend::TextureFormat::R8_SNORM:
-            return wgpu::TextureFormat::R8Snorm;
-        case filament::backend::TextureFormat::R8UI:
-            return wgpu::TextureFormat::R8Uint;
-        case filament::backend::TextureFormat::R8I:
-            return wgpu::TextureFormat::R8Sint;
-        case filament::backend::TextureFormat::STENCIL8:
-            return wgpu::TextureFormat::Stencil8;
-        case filament::backend::TextureFormat::R16F:
-            return wgpu::TextureFormat::R16Float;
-        case filament::backend::TextureFormat::R16UI:
-            return wgpu::TextureFormat::R16Uint;
-        case filament::backend::TextureFormat::R16I:
-            return wgpu::TextureFormat::R16Sint;
-        case filament::backend::TextureFormat::RG8:
-            return wgpu::TextureFormat::RG8Unorm;
-        case filament::backend::TextureFormat::RG8_SNORM:
-            return wgpu::TextureFormat::RG8Snorm;
-        case filament::backend::TextureFormat::RG8UI:
-            return wgpu::TextureFormat::RG8Uint;
-        case filament::backend::TextureFormat::RG8I:
-            return wgpu::TextureFormat::RG8Sint;
-        case filament::backend::TextureFormat::R32F:
-            return wgpu::TextureFormat::R32Float;
-        case filament::backend::TextureFormat::R32UI:
-            return wgpu::TextureFormat::R32Uint;
-        case filament::backend::TextureFormat::R32I:
-            return wgpu::TextureFormat::R32Sint;
-        case filament::backend::TextureFormat::RG16F:
-            return wgpu::TextureFormat::RG16Float;
-        case filament::backend::TextureFormat::RG16UI:
-            return wgpu::TextureFormat::RG16Uint;
-        case filament::backend::TextureFormat::RG16I:
-            return wgpu::TextureFormat::RG16Sint;
-        case filament::backend::TextureFormat::RGBA8:
-            return wgpu::TextureFormat::RGBA8Unorm;
-        case filament::backend::TextureFormat::SRGB8_A8:
-            return wgpu::TextureFormat::RGBA8UnormSrgb;
-        case filament::backend::TextureFormat::RGBA8_SNORM:
-            return wgpu::TextureFormat::RGBA8Snorm;
-        case filament::backend::TextureFormat::RGBA8UI:
-            return wgpu::TextureFormat::RGBA8Uint;
-        case filament::backend::TextureFormat::RGBA8I:
-            return wgpu::TextureFormat::RGBA8Sint;
-        case filament::backend::TextureFormat::DEPTH16:
-            return wgpu::TextureFormat::Depth16Unorm;
-        case filament::backend::TextureFormat::DEPTH24:
-            return wgpu::TextureFormat::Depth24Plus;
-        case filament::backend::TextureFormat::DEPTH32F:
-            return wgpu::TextureFormat::Depth32Float;
-        case filament::backend::TextureFormat::DEPTH24_STENCIL8:
-            return wgpu::TextureFormat::Depth24PlusStencil8;
-        case filament::backend::TextureFormat::DEPTH32F_STENCIL8:
-            return wgpu::TextureFormat::Depth32FloatStencil8;
-        case filament::backend::TextureFormat::RG32F:
-            return wgpu::TextureFormat::RG32Float;
-        case filament::backend::TextureFormat::RG32UI:
-            return wgpu::TextureFormat::RG32Uint;
-        case filament::backend::TextureFormat::RG32I:
-            return wgpu::TextureFormat::RG32Sint;
-        case filament::backend::TextureFormat::RGBA16F:
-            return wgpu::TextureFormat::RGBA16Float;
-        case filament::backend::TextureFormat::RGBA16UI:
-            return wgpu::TextureFormat::RGBA16Uint;
-        case filament::backend::TextureFormat::RGBA16I:
-            return wgpu::TextureFormat::RGBA16Sint;
-        case filament::backend::TextureFormat::RGBA32F:
-            return wgpu::TextureFormat::RGBA32Float;
-        case filament::backend::TextureFormat::RGBA32UI:
-            return wgpu::TextureFormat::RGBA32Uint;
-        case filament::backend::TextureFormat::RGBA32I:
-            return wgpu::TextureFormat::RGBA32Sint;
-        case filament::backend::TextureFormat::EAC_R11:
-            return wgpu::TextureFormat::EACR11Unorm;
-        case filament::backend::TextureFormat::EAC_R11_SIGNED:
-            return wgpu::TextureFormat::EACR11Snorm;
-        case filament::backend::TextureFormat::EAC_RG11:
-            return wgpu::TextureFormat::EACRG11Unorm;
-        case filament::backend::TextureFormat::EAC_RG11_SIGNED:
-            return wgpu::TextureFormat::EACRG11Snorm;
-        case filament::backend::TextureFormat::ETC2_RGB8:
-            return wgpu::TextureFormat::ETC2RGB8Unorm;
-        case filament::backend::TextureFormat::ETC2_SRGB8:
-            return wgpu::TextureFormat::ETC2RGB8UnormSrgb;
-        case filament::backend::TextureFormat::ETC2_RGB8_A1:
-            return wgpu::TextureFormat::ETC2RGB8A1Unorm;
-        case filament::backend::TextureFormat::ETC2_SRGB8_A1:
-            return wgpu::TextureFormat::ETC2RGB8A1UnormSrgb;
-        case filament::backend::TextureFormat::ETC2_EAC_RGBA8:
-            return wgpu::TextureFormat::ETC2RGBA8Unorm;
-        case filament::backend::TextureFormat::ETC2_EAC_SRGBA8:
-            return wgpu::TextureFormat::ETC2RGBA8UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_4x4:
-            return wgpu::TextureFormat::ASTC4x4Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_4x4:
-            return wgpu::TextureFormat::ASTC4x4UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_5x4:
-            return wgpu::TextureFormat::ASTC5x4Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_5x4:
-            return wgpu::TextureFormat::ASTC5x4UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_5x5:
-            return wgpu::TextureFormat::ASTC5x5Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_5x5:
-            return wgpu::TextureFormat::ASTC5x5UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_6x5:
-            return wgpu::TextureFormat::ASTC6x5Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_6x5:
-            return wgpu::TextureFormat::ASTC6x5UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_6x6:
-            return wgpu::TextureFormat::ASTC6x6Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_6x6:
-            return wgpu::TextureFormat::ASTC6x6UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_8x5:
-            return wgpu::TextureFormat::ASTC8x5Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_8x5:
-            return wgpu::TextureFormat::ASTC8x5UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_8x6:
-            return wgpu::TextureFormat::ASTC8x6Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_8x6:
-            return wgpu::TextureFormat::ASTC8x6UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_8x8:
-            return wgpu::TextureFormat::ASTC8x8Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_8x8:
-            return wgpu::TextureFormat::ASTC8x8UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_10x5:
-            return wgpu::TextureFormat::ASTC10x5Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_10x5:
-            return wgpu::TextureFormat::ASTC10x5UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_10x6:
-            return wgpu::TextureFormat::ASTC10x6Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_10x6:
-            return wgpu::TextureFormat::ASTC10x6UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_10x8:
-            return wgpu::TextureFormat::ASTC10x8Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_10x8:
-            return wgpu::TextureFormat::ASTC10x8UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_10x10:
-            return wgpu::TextureFormat::ASTC10x10Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_10x10:
-            return wgpu::TextureFormat::ASTC10x10UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_12x10:
-            return wgpu::TextureFormat::ASTC12x10Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_12x10:
-            return wgpu::TextureFormat::ASTC12x10UnormSrgb;
-        case filament::backend::TextureFormat::RGBA_ASTC_12x12:
-            return wgpu::TextureFormat::ASTC12x12Unorm;
-        case filament::backend::TextureFormat::SRGB8_ALPHA8_ASTC_12x12:
-            return wgpu::TextureFormat::ASTC12x12UnormSrgb;
-        case filament::backend::TextureFormat::RED_RGTC1:
-            return wgpu::TextureFormat::BC4RUnorm;
-        case filament::backend::TextureFormat::SIGNED_RED_RGTC1:
-            return wgpu::TextureFormat::BC4RSnorm;
-        case filament::backend::TextureFormat::RED_GREEN_RGTC2:
-            return wgpu::TextureFormat::BC5RGUnorm;
-        case filament::backend::TextureFormat::SIGNED_RED_GREEN_RGTC2:
-            return wgpu::TextureFormat::BC5RGSnorm;
-        case filament::backend::TextureFormat::RGB_BPTC_UNSIGNED_FLOAT:
-            return wgpu::TextureFormat::BC6HRGBUfloat;
-        case filament::backend::TextureFormat::RGB_BPTC_SIGNED_FLOAT:
-            return wgpu::TextureFormat::BC6HRGBFloat;
-        case filament::backend::TextureFormat::RGBA_BPTC_UNORM:
-            return wgpu::TextureFormat::BC7RGBAUnorm;
-        case filament::backend::TextureFormat::SRGB_ALPHA_BPTC_UNORM:
-            return wgpu::TextureFormat::BC7RGBAUnormSrgb;
-        case filament::backend::TextureFormat::RGB565:
-            // No direct mapping in wgpu. Could potentially map to RGBA8Unorm
-            // and discard the alpha and lower precision.
-            return wgpu::TextureFormat::Undefined;
-        case filament::backend::TextureFormat::RGB9_E5:
-            return wgpu::TextureFormat::RGB9E5Ufloat;
-        case filament::backend::TextureFormat::RGB5_A1:
-            // No direct mapping in wgpu. Could potentially map to RGBA8Unorm
-            // and handle the packing/unpacking in shaders.
-            return wgpu::TextureFormat::Undefined;
-        case filament::backend::TextureFormat::RGBA4:
-            // No direct mapping in wgpu. Could potentially map to RGBA8Unorm
-            // and handle the packing/unpacking in shaders.
-            return wgpu::TextureFormat::Undefined;
-        case filament::backend::TextureFormat::RGB8:
-            // No direct sRGB equivalent in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA8Unorm;
-        case filament::backend::TextureFormat::SRGB8:
-            // No direct sRGB equivalent in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA8UnormSrgb;
-        case filament::backend::TextureFormat::RGB8_SNORM:
-            // No direct mapping in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA8Snorm;
-        case filament::backend::TextureFormat::RGB8UI:
-            // No direct mapping in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA8Uint;
-        case filament::backend::TextureFormat::RGB8I:
-            // No direct mapping in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA8Sint;
-        case filament::backend::TextureFormat::R11F_G11F_B10F:
-            return wgpu::TextureFormat::RG11B10Ufloat;
-        case filament::backend::TextureFormat::UNUSED:
-            return wgpu::TextureFormat::Undefined;
-        case filament::backend::TextureFormat::RGB10_A2:
-            return wgpu::TextureFormat::RGB10A2Unorm;
-        case filament::backend::TextureFormat::RGB16F:
-            // No direct mapping in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA16Float;
-        case filament::backend::TextureFormat::RGB16UI:
-            // No direct mapping in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA16Uint;
-        case filament::backend::TextureFormat::RGB16I:
-            // No direct mapping in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA16Sint;
-        case filament::backend::TextureFormat::RGB32F:
-            // No direct mapping in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA32Float;
-        case filament::backend::TextureFormat::RGB32UI:
-            // No direct mapping in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA32Uint;
-        case filament::backend::TextureFormat::RGB32I:
-            // No direct mapping in wgpu without alpha.
-            return wgpu::TextureFormat::RGBA32Sint;
-        case filament::backend::TextureFormat::DXT1_RGB:
-            return wgpu::TextureFormat::BC1RGBAUnorm;
-        case filament::backend::TextureFormat::DXT1_RGBA:
-            return wgpu::TextureFormat::BC1RGBAUnorm;
-        case filament::backend::TextureFormat::DXT3_RGBA:
-            return wgpu::TextureFormat::BC2RGBAUnorm;
-        case filament::backend::TextureFormat::DXT5_RGBA:
-            return wgpu::TextureFormat::BC3RGBAUnorm;
-        case filament::backend::TextureFormat::DXT1_SRGB:
-            return wgpu::TextureFormat::BC1RGBAUnormSrgb;
-        case filament::backend::TextureFormat::DXT1_SRGBA:
-            return wgpu::TextureFormat::BC1RGBAUnormSrgb;
-        case filament::backend::TextureFormat::DXT3_SRGBA:
-            return wgpu::TextureFormat::BC2RGBAUnormSrgb;
-        case filament::backend::TextureFormat::DXT5_SRGBA:
-            return wgpu::TextureFormat::BC3RGBAUnormSrgb;
-    }
-}
-
-wgpu::TextureAspect WGPUTexture::fToWGPUTextureViewAspect(TextureUsage const& fUsage,
-        TextureFormat const& fFormat) {
-
-    const bool isDepth = any(fUsage & TextureUsage::DEPTH_ATTACHMENT);
-    const bool isStencil = any(fUsage & TextureUsage::STENCIL_ATTACHMENT);
-    const bool isColor = any(fUsage & TextureUsage::COLOR_ATTACHMENT);
-    const bool isSample = (fUsage == TextureUsage::SAMPLEABLE);
-
-    if (isDepth && !isColor && !isStencil) {
-        return wgpu::TextureAspect::DepthOnly;
-    }
-
-    if (isStencil && !isColor && !isDepth) {
-        return wgpu::TextureAspect::StencilOnly;
-    }
-
-    if (fFormat == filament::backend::TextureFormat::DEPTH32F ||
-            fFormat == filament::backend::TextureFormat::DEPTH24 ||
-            fFormat == filament::backend::TextureFormat::DEPTH16) {
-        return wgpu::TextureAspect::DepthOnly;
-    }
-
-    if (fFormat == filament::backend::TextureFormat::STENCIL8) {
-        return wgpu::TextureAspect::StencilOnly;
-    }
-
-    if (fFormat == filament::backend::TextureFormat::DEPTH24_STENCIL8 ||
-            fFormat == filament::backend::TextureFormat::DEPTH32F_STENCIL8) {
-        if (isSample) {
-            return wgpu::TextureAspect::DepthOnly;
-        }
-    }
-
-    return wgpu::TextureAspect::All;
-}
-
-wgpu::TextureView WGPUTexture::makeTextureView(const uint8_t& baseLevel, const uint8_t& levelCount,
-        SamplerType target) {
-
-    wgpu::TextureViewDescriptor textureViewDescriptor{
-        .label = getUserTextureViewLabel(target),
-        .format = mFormat,
-        .baseMipLevel = baseLevel,
-        .mipLevelCount = levelCount,
-        // TODO: check if this baseArrayLayer assumption is correct
-        .baseArrayLayer = 0,
-        .arrayLayerCount = mArrayLayerCount,
-        .aspect = mAspect,
-        .usage = mUsage
-    };
-
-    switch (target) {
-        case SamplerType::SAMPLER_2D:
-            textureViewDescriptor.dimension = wgpu::TextureViewDimension::e2D;
-            break;
-        case SamplerType::SAMPLER_2D_ARRAY:
-            textureViewDescriptor.dimension = wgpu::TextureViewDimension::e2DArray;
-            break;
-        case SamplerType::SAMPLER_CUBEMAP:
-            textureViewDescriptor.dimension = wgpu::TextureViewDimension::Cube;
-            break;
-        case SamplerType::SAMPLER_EXTERNAL:
-            textureViewDescriptor.dimension = wgpu::TextureViewDimension::e2D;
-            break;
-        case SamplerType::SAMPLER_3D:
-            textureViewDescriptor.dimension = wgpu::TextureViewDimension::e3D;
-            break;
-        case SamplerType::SAMPLER_CUBEMAP_ARRAY:
-            textureViewDescriptor.dimension = wgpu::TextureViewDimension::CubeArray;
-            break;
-    }
-    wgpu::TextureView textureView = mTexture.CreateView(&textureViewDescriptor);
-    FILAMENT_CHECK_POSTCONDITION(textureView)
-            << "Failed to create texture view " << textureViewDescriptor.label;
-    return textureView;
-}
-
-WGPURenderTarget::Attachment WGPURenderTarget::getDrawColorAttachment(size_t index) {
-    assert_invariant( index < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT);
-    auto result = color[index];
-    if (index == 0 && defaultRenderTarget) {
-
-    }
-
-    return result;
-}
-
-wgpu::LoadOp WGPURenderTarget::getLoadOperation(RenderPassParams const& params,
-                                             TargetBufferFlags buffer) {
-    auto clearFlags = params.flags.clear;
-    auto discardStartFlags = params.flags.discardStart;
-    if (any(clearFlags & buffer)) {
-        return wgpu::LoadOp::Clear;
-    } else if (any(discardStartFlags & buffer)) {
-        return wgpu::LoadOp::Clear;
-    }
-    return wgpu::LoadOp::Load;
-}
-
-wgpu::StoreOp WGPURenderTarget::getStoreOperation(RenderPassParams const& params,
-                                               TargetBufferFlags buffer) {
-    const auto discardEndFlags = params.flags.discardEnd;
-    if (any(discardEndFlags & buffer)) {
-        return wgpu::StoreOp::Discard;
-    }
-    return wgpu::StoreOp::Store;
-}
-void WGPURenderTarget::setUpRenderPassAttachments(wgpu::RenderPassDescriptor& descriptor,
-        wgpu::TextureView const& textureView, RenderPassParams const& params) {
-    // auto discardFlags = params.flags.discardEnd;
-    // (void) discardFlags;
-    // std::vector<wgpu::RenderPassColorAttachment> colorAttachments;
-    colorAttachments.clear();
-    for (size_t i = 0; i < 1/*MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT*/; i++) {
-        // auto attachment = getDrawColorAttachment(i);
-        // if (attachment) {
-            wgpu::RenderPassColorAttachment colorAttachment;
-            colorAttachment.view = textureView;
-            colorAttachment.loadOp  = getLoadOperation(params, getTargetBufferFlagsAt(i));
-            colorAttachment.storeOp = getStoreOperation(params, getTargetBufferFlagsAt(i));
-            colorAttachment.clearValue = { params.clearColor.r, params.clearColor.g, params.clearColor.b, params.clearColor.a };
-            colorAttachments.emplace_back(colorAttachment);
-        // }
-    }
-    descriptor.colorAttachments = colorAttachments.data();
-    descriptor.colorAttachmentCount = colorAttachments.size();
-    descriptor.depthStencilAttachment = nullptr;
-    descriptor.timestampWrites = nullptr;
-}
-
-
-}// namespace filament::backend
--- a/filament/backend/src/webgpu/WebGPUHandles.h
+++ b/filament/backend/src/webgpu/WebGPUHandles.h
@@ -1,253 +0,0 @@
-/*
-* Copyright (C) 2025 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#ifndef TNT_FILAMENT_BACKEND_WEBGPUHANDLES_H
-#define TNT_FILAMENT_BACKEND_WEBGPUHANDLES_H
-
-#include "DriverBase.h"
-
-#include <backend/DriverEnums.h>
-#include <backend/Handle.h>
-
-#include <utils/FixedCapacityVector.h>
-
-#include <webgpu/webgpu_cpp.h>
-
-#include <array>
-#include <cstdint>
-#include <vector>
-
-namespace filament::backend {
-
-class WGPUProgram final : public HwProgram {
-public:
-    WGPUProgram(wgpu::Device&, Program&);
-
-    wgpu::ShaderModule vertexShaderModule = nullptr;
-    wgpu::ShaderModule fragmentShaderModule = nullptr;
-    wgpu::ShaderModule computeShaderModule = nullptr;
-    std::vector<wgpu::ConstantEntry> constants;
-};
-
-
-// VertexBufferInfo contains layout info for Vertex Buffer based on WebGPU structs. In WebGPU each
-// VertexBufferLayout is associated with a single vertex buffer. So number of mVertexBufferLayout
-// is equal to bufferCount. Each VertexBufferLayout can contain multiple VertexAttribute. Bind index
-// of vertex buffer is implicitly calculated by the position of VertexBufferLayout in an array.
-class WGPUVertexBufferInfo : public HwVertexBufferInfo {
-public:
-    WGPUVertexBufferInfo(uint8_t bufferCount, uint8_t attributeCount,
-            AttributeArray const& attributes);
-    inline  wgpu::VertexBufferLayout const* getVertexBufferLayout() const {
-        return mVertexBufferLayout.data();
-    }
-
-    inline uint32_t getVertexBufferLayoutSize() const {
-        return mVertexBufferLayout.size();
-    }
-
-    inline wgpu::VertexAttribute const* getVertexAttributeForIndex(uint32_t index) const {
-        assert_invariant(index < mAttributes.size());
-        return mAttributes[index].data();
-    }
-
-    inline uint32_t getVertexAttributeSize(uint32_t index) const {
-        assert_invariant(index < mAttributes.size());
-        return mAttributes[index].size();
-    }
-
-private:
-    // TODO: can we do better in terms on heap management.
-    std::vector<wgpu::VertexBufferLayout> mVertexBufferLayout{};
-    std::vector<std::vector<wgpu::VertexAttribute>> mAttributes{};
-};
-
-struct WGPUVertexBuffer : public HwVertexBuffer {
-    WGPUVertexBuffer(wgpu::Device const &device, uint32_t vertexCount, uint32_t bufferCount,
-                     Handle<HwVertexBufferInfo> vbih);
-
-    Handle<HwVertexBufferInfo> vbih;
-    utils::FixedCapacityVector<wgpu::Buffer> buffers;
-};
-
-class WGPUBufferBase {
-public:
-    void createBuffer(wgpu::Device const& device, wgpu::BufferUsage usage, uint32_t size,
-            char const* label);
-    void updateGPUBuffer(BufferDescriptor& bufferDescriptor, uint32_t byteOffset,
-            wgpu::Queue queue);
-    const wgpu::Buffer& getBuffer() const { return buffer; }
-protected:
-    wgpu::Buffer buffer;
-private:
-    // 4 bytes to hold any extra chunk we need.
-    std::array<uint8_t,4> mRemainderChunk;
-};
-
-class WGPUIndexBuffer : public HwIndexBuffer, public WGPUBufferBase {
-public:
-    WGPUIndexBuffer(wgpu::Device const &device, uint8_t elementSize,
-                    uint32_t indexCount);
-    wgpu::IndexFormat indexFormat;
-};
-
-class WGPUBufferObject : public HwBufferObject, public WGPUBufferBase {
-public:
-    WGPUBufferObject(wgpu::Device const &device, BufferObjectBinding bindingType, uint32_t byteCount);
-};
-
-class WebGPUDescriptorSetLayout final : public HwDescriptorSetLayout {
-public:
-
-    struct BindGroupEntryInfo final {
-        uint8_t binding = 0;
-        bool hasDynamicOffset = false;
-    };
-
-    WebGPUDescriptorSetLayout(DescriptorSetLayout const& layout, wgpu::Device const& device);
-    ~WebGPUDescriptorSetLayout();
-    [[nodiscard]] const wgpu::BindGroupLayout& getLayout() const { return mLayout; }
-    [[nodiscard]] std::vector<BindGroupEntryInfo> const& getBindGroupEntries() const {
-        return mBindGroupEntries;
-    }
-
-private:
-    // TODO: If this is useful elsewhere, remove it from this class
-    // Convert Filament Shader Stage Flags bitmask to webgpu equivalent
-    static wgpu::ShaderStage filamentStageToWGPUStage(ShaderStageFlags fFlags);
-    std::vector<BindGroupEntryInfo> mBindGroupEntries;
-    wgpu::BindGroupLayout mLayout;
-};
-
-class WebGPUDescriptorSet final : public HwDescriptorSet {
-public:
-
-    WebGPUDescriptorSet(wgpu::BindGroupLayout const& layout,
-            std::vector<WebGPUDescriptorSetLayout::BindGroupEntryInfo> const& bindGroupEntries);
-    ~WebGPUDescriptorSet();
-
-    wgpu::BindGroup lockAndReturn(wgpu::Device const&);
-    void addEntry(unsigned int index, wgpu::BindGroupEntry&& entry);
-    [[nodiscard]] bool getIsLocked() const { return mBindGroup != nullptr; }
-    [[nodiscard]] size_t countEntitiesWithDynamicOffsets() const;
-
-private:
-    wgpu::BindGroupLayout mLayout = nullptr;
-    static constexpr uint8_t INVALID_INDEX = MAX_DESCRIPTOR_COUNT + 1;
-    std::array<uint8_t, MAX_DESCRIPTOR_COUNT> mEntryIndexByBinding{};
-    std::vector<wgpu::BindGroupEntry> mEntries;
-    const size_t mEntriesWithDynamicOffsetsCount;
-    wgpu::BindGroup mBindGroup = nullptr;
-};
-
-class WGPUTexture : public HwTexture {
-public:
-    WGPUTexture(SamplerType target, uint8_t levels, TextureFormat format, uint8_t samples,
-            uint32_t width, uint32_t height, uint32_t depth, TextureUsage usage,
-            wgpu::Device const& device) noexcept;
-
-    WGPUTexture(WGPUTexture* src, uint8_t baseLevel, uint8_t levelCount) noexcept;
-    wgpu::TextureAspect getAspect() const { return mAspect; }
-    size_t getBlockWidth() const { return mBlockWidth; }
-    size_t getBlockHeight() const { return mBlockHeight; }
-
-    [[nodiscard]] const wgpu::Texture& getTexture() const { return mTexture; }
-    [[nodiscard]] const wgpu::TextureView& getTexView() const { return mTexView; }
-
-    static wgpu::TextureFormat fToWGPUTextureFormat(
-            filament::backend::TextureFormat const& fFormat);
-    static wgpu::TextureAspect fToWGPUTextureViewAspect(
-            filament::backend::TextureUsage const& fUsage,
-            filament::backend::TextureFormat const& fFormat);
-
-private:
-    wgpu::TextureView makeTextureView(const uint8_t& baseLevel, const uint8_t& levelCount,
-            SamplerType target);
-    // CreateTextureR has info for a texture and sampler. Texture Views are needed for binding,
-    // along with a sampler Current plan: Inherit the sampler and Texture to always exist (It is a
-    // ref counted pointer) when making views. View is optional
-    wgpu::Texture mTexture = nullptr;
-    wgpu::TextureUsage mUsage = wgpu::TextureUsage::None;
-    wgpu::TextureFormat mFormat = wgpu::TextureFormat::Undefined;
-    wgpu::TextureAspect mAspect = wgpu::TextureAspect::Undefined;
-    uint32_t mArrayLayerCount = 1;
-    wgpu::TextureView mTexView = nullptr;
-    wgpu::TextureUsage fToWGPUTextureUsage(filament::backend::TextureUsage const& fUsage);
-    size_t mBlockWidth;
-    size_t mBlockHeight;
-};
-
-struct WGPURenderPrimitive : public HwRenderPrimitive {
-
-    WGPUVertexBuffer* vertexBuffer = nullptr;
-    WGPUIndexBuffer* indexBuffer = nullptr;
-};
-
-class WGPURenderTarget : public HwRenderTarget {
-public:
-    class Attachment {
-    public:
-        friend class WGPURenderTarget;
-
-        Attachment() = default;
-        Attachment(WGPUTexture* gpuTexture, uint8_t level = 0, uint16_t layer = 0)
-            : level(level),
-              layer(layer),
-              texture(gpuTexture->getTexture()),
-              mWGPUTexture(gpuTexture) {}
-        operator bool() const {
-            return mWGPUTexture != nullptr;
-        }
-
-        uint8_t level = 0;
-        uint16_t layer = 0;
-
-    private:
-        wgpu::Texture texture = nullptr;
-        WGPUTexture* mWGPUTexture = nullptr;
-    };
-
-    WGPURenderTarget(uint32_t width, uint32_t height, uint8_t samples,
-            Attachment colorAttachments[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT]);
-    WGPURenderTarget()
-        : HwRenderTarget(0, 0),
-          defaultRenderTarget(true) {}
-
-    void setUpRenderPassAttachments(wgpu::RenderPassDescriptor& descriptor,
-            wgpu::TextureView const& textureView, RenderPassParams const& params);
-
-    math::uint2 getAttachmentSize() noexcept;
-
-    bool isDefaultRenderTarget() const { return defaultRenderTarget; }
-    uint8_t getSamples() const { return samples; }
-
-    Attachment getDrawColorAttachment(size_t index);
-    Attachment getReadColorAttachment(size_t index);
-
-    static wgpu::LoadOp getLoadOperation(const RenderPassParams& params, TargetBufferFlags buffer);
-    static wgpu::StoreOp getStoreOperation(const RenderPassParams& params, TargetBufferFlags buffer);
-private:
-    bool defaultRenderTarget = false;
-    uint8_t samples = 1;
-
-    Attachment color[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT] = {};
-    math::uint2 attachmentSize = {};
-    std::vector<wgpu::RenderPassColorAttachment> colorAttachments{};
-};
-
-}// namespace filament::backend
-#endif// TNT_FILAMENT_BACKEND_WEBGPUHANDLES_H
--- a/filament/backend/src/webgpu/WebGPUIndexBuffer.cpp
+++ b/filament/backend/src/webgpu/WebGPUIndexBuffer.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WebGPUIndexBuffer.h"
+
+#include "WebGPUBufferBase.h"
+
+#include "DriverBase.h"
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <cstdint>
+
+namespace filament::backend {
+
+WebGPUIndexBuffer::WebGPUIndexBuffer(wgpu::Device const& device, const uint8_t elementSize,
+        const uint32_t indexCount)
+    : HwIndexBuffer{ elementSize, indexCount },
+      WebGPUBufferBase{ device, wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::Index,
+          elementSize * indexCount, "index_buffer" },
+      mIndexFormat{ elementSize == 2 ? wgpu::IndexFormat::Uint16 : wgpu::IndexFormat::Uint32 } {}
+
+} // namespace filament::backend
--- a/filament/backend/src/webgpu/WebGPUIndexBuffer.h
+++ b/filament/backend/src/webgpu/WebGPUIndexBuffer.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_WEBGPUINDEXBUFFER_H
+#define TNT_FILAMENT_BACKEND_WEBGPUINDEXBUFFER_H
+
+#include "WebGPUBufferBase.h"
+
+#include "DriverBase.h"
+
+#include <cstdint>
+
+namespace wgpu {
+class Device;
+enum class IndexFormat : uint32_t;
+} // namespace wgpu
+
+namespace filament::backend {
+
+class WebGPUIndexBuffer final : public HwIndexBuffer, public WebGPUBufferBase {
+public:
+    WebGPUIndexBuffer(wgpu::Device const&, uint8_t elementSize, uint32_t indexCount);
+
+    [[nodiscard]] wgpu::IndexFormat getIndexFormat() const { return mIndexFormat; }
+
+private:
+    const wgpu::IndexFormat mIndexFormat;
+};
+
+} // namespace filament::backend
+
+#endif // TNT_FILAMENT_BACKEND_WEBGPUINDEXBUFFER_H
--- a/filament/backend/src/webgpu/WebGPUPipelineCreation.cpp
+++ b/filament/backend/src/webgpu/WebGPUPipelineCreation.cpp
@@ -16,7 +16,9 @@

 #include "WebGPUPipelineCreation.h"

-#include "WebGPUHandles.h"
+#include "WebGPUProgram.h"
+#include "WebGPURenderTarget.h"
+#include "WebGPUVertexBufferInfo.h"

 #include <backend/DriverEnums.h>
 #include <backend/TargetBufferInfo.h>
@@ -65,6 +67,17 @@ constexpr wgpu::CullMode toWebGPU(CullingMode cullMode) {
    }
 }

+bool hasStencilAspect(wgpu::TextureFormat format) {
+    switch (format) {
+        case wgpu::TextureFormat::Stencil8:
+        case wgpu::TextureFormat::Depth24PlusStencil8:
+        case wgpu::TextureFormat::Depth32FloatStencil8:
+            return true;
+        default:
+            return false;
+    }
+}
+
 constexpr wgpu::CompareFunction toWebGPU(SamplerCompareFunc compareFunction) {
    switch (compareFunction) {
        case SamplerCompareFunc::LE:
@@ -152,47 +165,72 @@ constexpr wgpu::BlendFactor toWebGPU(BlendFunction blendFunction) {
 }// namespace

 wgpu::RenderPipeline createWebGPURenderPipeline(wgpu::Device const& device,
-        WGPUProgram const& program, WGPUVertexBufferInfo const& vertexBufferInfo,
+        WebGPUProgram const& program, WebGPUVertexBufferInfo const& vertexBufferInfo,
        wgpu::PipelineLayout const& layout, RasterState const& rasterState,
        StencilState const& stencilState, PolygonOffset const& polygonOffset,
-        PrimitiveType primitiveType, wgpu::TextureFormat colorFormat,
-        wgpu::TextureFormat depthFormat) {
+        const PrimitiveType primitiveType, std::vector<wgpu::TextureFormat> const& colorFormats,
+        const wgpu::TextureFormat depthFormat, const uint8_t samplesCount) {
    assert_invariant(program.vertexShaderModule);
-    const wgpu::DepthStencilState depthStencilState {
-        .format = depthFormat,
-        .depthWriteEnabled = rasterState.depthWrite,
-        .depthCompare = toWebGPU(rasterState.depthFunc),
-        .stencilFront = {
+    wgpu::DepthStencilState depthStencilState{};
+    if (depthFormat != wgpu::TextureFormat::Undefined) {
+        depthStencilState.format = depthFormat;
+        depthStencilState.depthWriteEnabled = rasterState.depthWrite;
+        depthStencilState.depthCompare = toWebGPU(rasterState.depthFunc);
+        depthStencilState.stencilFront = {
            .compare = toWebGPU(stencilState.front.stencilFunc),
            .failOp = toWebGPU(stencilState.front.stencilOpStencilFail),
            .depthFailOp = toWebGPU(stencilState.front.stencilOpDepthFail),
            .passOp = toWebGPU(stencilState.front.stencilOpDepthStencilPass),
-        },
-        .stencilBack = {
+        };
+        depthStencilState.stencilBack = {
            .compare = toWebGPU(stencilState.back.stencilFunc),
            .failOp = toWebGPU(stencilState.back.stencilOpStencilFail),
            .depthFailOp = toWebGPU(stencilState.back.stencilOpDepthFail),
            .passOp = toWebGPU(stencilState.back.stencilOpDepthStencilPass),
-        },
-        .stencilReadMask = 0,
-        .stencilWriteMask = stencilState.stencilWrite ? 0xFFFFFFFF : 0,
-        .depthBias = static_cast<int32_t>(polygonOffset.constant),
-        .depthBiasSlopeScale = polygonOffset.slope,
-        .depthBiasClamp = 0.0f
-    };
+        };
+        depthStencilState.stencilReadMask =
+                stencilState.front.readMask; // Use front face's comparison mask for read mask
+        depthStencilState.stencilWriteMask = stencilState.stencilWrite ? 0xFFFFFFFF : 0u;
+        depthStencilState.depthBias = static_cast<int32_t>(polygonOffset.constant);
+        depthStencilState.depthBiasSlopeScale = polygonOffset.slope;
+        depthStencilState.depthBiasClamp = 0.0f;
+
+        if (!hasStencilAspect(depthFormat)) {
+            depthStencilState.stencilFront.compare = wgpu::CompareFunction::Always;
+            depthStencilState.stencilFront.failOp = wgpu::StencilOperation::Keep;
+            depthStencilState.stencilFront.depthFailOp = wgpu::StencilOperation::Keep;
+            depthStencilState.stencilFront.passOp = wgpu::StencilOperation::Keep;
+            depthStencilState.stencilBack =
+                    depthStencilState.stencilFront; // Keep back and front consistent
+            depthStencilState.stencilReadMask = 0;
+            depthStencilState.stencilWriteMask = 0;
+        }
+    }
+
    std::stringstream pipelineLabelStream;
    pipelineLabelStream << program.name.c_str() << " pipeline";
    const auto pipelineLabel = pipelineLabelStream.str();
    wgpu::RenderPipelineDescriptor pipelineDescriptor{
        .label = wgpu::StringView(pipelineLabel),
        .layout = layout,
-        .vertex = {
-            .module = program.vertexShaderModule,
+        .vertex = { .module = program.vertexShaderModule,
            .entryPoint = "main",
-            .constantCount = program.constants.size(),
-            .constants = program.constants.data(),
-            .bufferCount = vertexBufferInfo.getVertexBufferLayoutSize(),
-            .buffers = vertexBufferInfo.getVertexBufferLayout()
+            // we do not use WebGPU's override constants due to 2 limitations
+            // (at least at the time of write this):
+            // 1. they cannot be used for the size of an array, which is needed
+            // 2. if we pass the WebGPU API (CPU-side) constants not referenced in the
+            //    shader WebGPU fails. This is a problem with how Filament is designed,
+            //    where certain constants may be optimized out of the shader based
+            //    on build configuration, etc.
+            //
+            // to bypass these problems, we do not use override constants in the
+            // WebGPU backend, instead replacing placeholder constants in the shader
+            // text before creating the shader module (essentially implementing
+            // override constants ourselves)
+            .constantCount = 0,
+            .constants = nullptr,
+            .bufferCount = vertexBufferInfo.getVertexBufferLayoutCount(),
+            .buffers = vertexBufferInfo.getVertexBufferLayouts()
        },
        .primitive = {
            .topology = toWebGPU(primitiveType),
@@ -207,9 +245,9 @@ wgpu::RenderPipeline createWebGPURenderPipeline(wgpu::Device const& device,
            .unclippedDepth = !rasterState.depthClamp &&
                              device.HasFeature(wgpu::FeatureName::DepthClipControl)
        },
-        .depthStencil = &depthStencilState,
+        .depthStencil = depthFormat != wgpu::TextureFormat::Undefined ? &depthStencilState: nullptr,
        .multisample = {
-            .count = 1, // TODO need to get this from the render target
+            .count = samplesCount,
            .mask = 0xFFFFFFFF,
            .alphaToCoverageEnabled = rasterState.alphaToCoverage
        },
@@ -232,14 +270,20 @@ wgpu::RenderPipeline createWebGPURenderPipeline(wgpu::Device const& device,
    if (program.fragmentShaderModule != nullptr) {
        fragmentState.module = program.fragmentShaderModule;
        fragmentState.entryPoint = "main";
-        fragmentState.constantCount = program.constants.size(),
-        fragmentState.constants = program.constants.data(),
-        fragmentState.targetCount = 1; // TODO need to get this from the render target
+        // see the comment about constants for the vertex state, as the same reasoning applies
+        // here
+        fragmentState.constantCount = 0,
+        fragmentState.constants = nullptr,
+        fragmentState.targetCount = colorFormats.size();
        fragmentState.targets = colorTargets.data();
        assert_invariant(fragmentState.targetCount <= MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT);
+        // We expect a fragment shader implies at least one color target if it outputs color.
+        // This should be guaranteed by the caller ensuring colorFormats is not empty.
+        // However, this fails on shadowtest.cpp, TODO investigate why
+        // assert_invariant(fragmentState.targetCount > 0);
        for (size_t targetIndex = 0; targetIndex < fragmentState.targetCount; targetIndex++) {
            auto& colorTarget = colorTargets[targetIndex];
-            colorTarget.format = colorFormat;
+            colorTarget.format = colorFormats[targetIndex];
            colorTarget.blend = rasterState.hasBlending() ? &blendState : nullptr;
            colorTarget.writeMask =
                    rasterState.colorWrite ? wgpu::ColorWriteMask::All : wgpu::ColorWriteMask::None;
--- a/filament/backend/src/webgpu/WebGPUPipelineCreation.h
+++ b/filament/backend/src/webgpu/WebGPUPipelineCreation.h
@@ -18,6 +18,7 @@
 #define TNT_FILAMENT_BACKEND_WEBGPUPIPELINECREATION_H

 #include <cstdint>
+#include <vector>

 namespace wgpu {
 class Device;
@@ -33,13 +34,14 @@ enum class PrimitiveType : uint8_t;
 struct RasterState;
 struct StencilState;

-class WGPUVertexBufferInfo;
-class WGPUProgram;
+class WebGPUVertexBufferInfo;
+class WebGPUProgram;

 [[nodiscard]] wgpu::RenderPipeline createWebGPURenderPipeline(wgpu::Device const&,
-        WGPUProgram const&, WGPUVertexBufferInfo const&, wgpu::PipelineLayout const&,
-        RasterState const&, StencilState const&, PolygonOffset const&, PrimitiveType,
-        wgpu::TextureFormat colorFormat, wgpu::TextureFormat depthFormat);
+        WebGPUProgram const&, WebGPUVertexBufferInfo const&, wgpu::PipelineLayout const&,
+        RasterState const&, StencilState const&, PolygonOffset const&, PrimitiveType primitiveType,
+        std::vector<wgpu::TextureFormat> const& colorFormats, wgpu::TextureFormat depthFormat,
+        uint8_t samplesCount);

 }// namespace filament::backend

--- a/filament/backend/src/webgpu/WebGPUProgram.cpp
+++ b/filament/backend/src/webgpu/WebGPUProgram.cpp
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WebGPUProgram.h"
+
+#include "WebGPUConstants.h"
+#include "WebGPUStrings.h"
+
+#include "DriverBase.h"
+#include <backend/DriverEnums.h>
+#include <backend/Program.h>
+
+#include <utils/FixedCapacityVector.h>
+#include <utils/Panic.h>
+#include <utils/debug.h>
+#include <utils/ostream.h>
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <array>
+#include <cstdint>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <variant>
+
+namespace filament::backend {
+
+namespace {
+
+/**
+ * Given the source code of a [WGSL] WebGPU shader in text (string view to it) and the constants
+ * to be overwritten now (at runtime), returns updated [WGSL] shader code text, where the constants
+ * have been replaced.
+ * @param shaderLabel Something to call this shader for troubleshooting, error messaging, etc.
+ * @param shaderSource The original WGSL WebGPU shader code as text to be processed. This is a
+ *                     view to that text and this function does not change it; it is immutable.
+ *                     Instead, the processed version of this source is returned by the function.
+ * @param specConstants The constants to replace in the shader code, indexed by constant id.
+ * @return Processed version of the WGSL WebGPU shader code provided, where the constants have
+ * been replaced with the values provided by the specConstants parameter.
+ */
+[[nodiscard]] std::string replaceSpecConstants(std::string_view shaderLabel,
+        std::string_view shaderSource,
+        std::unordered_map<uint32_t, std::variant<int32_t, float, bool>> const&
+                specConstants) {
+    // this function is not expected to be called at all if no spec constants are to be replaced
+    assert_invariant(!specConstants.empty());
+    static constexpr std::string_view specConstantPrefix = "FILAMENT_SPEC_CONST_";
+    static constexpr size_t specConstantPrefixSize = specConstantPrefix.size();
+    const char* const sourceData = shaderSource.data();
+    std::stringstream processedShaderSource{};
+    size_t pos = 0;
+    while (pos < shaderSource.size()) {
+        const size_t posOfNextSpecConstant = shaderSource.find(specConstantPrefix, pos);
+        if (posOfNextSpecConstant == std::string::npos) {
+            // no more spec constants, so just stream the rest of the source code string
+            processedShaderSource << std::string_view(sourceData + pos, shaderSource.size() - pos);
+            break;
+        }
+        const size_t posOfId = posOfNextSpecConstant + specConstantPrefixSize;
+        const size_t posAfterId = shaderSource.find('_', posOfId);
+        FILAMENT_CHECK_POSTCONDITION(posAfterId != std::string::npos)
+                << "malformed " << shaderLabel << ". Found spec constant prefix '"
+                << specConstantPrefix << "' without an id or '_' after it.";
+        const std::string_view idStr =
+                std::string_view(sourceData + posOfId, posAfterId - posOfId);
+        const size_t posEndOfStatement = shaderSource.find(';', posAfterId);
+        FILAMENT_CHECK_POSTCONDITION(posEndOfStatement != std::string::npos)
+                << "malformed " << shaderLabel << ". Found spec constant assignment with id "
+                << idStr << " without a terminating ';' character?";
+        // this is a view into part of the statement, from after the id to the ';'
+        const std::string_view statementSegment =
+                std::string_view(sourceData + posAfterId, posEndOfStatement - posAfterId);
+        size_t posOfEqual = statementSegment.find('=');
+        if (posOfEqual == std::string::npos) {
+            // not an assignment statement, so stream to the end of the statement and continue...
+            processedShaderSource << std::string_view(sourceData + pos,
+                    posEndOfStatement + 1 - pos);
+            pos = posEndOfStatement + 1;
+            continue;
+        }
+        posOfEqual += posAfterId; // position in original source overall, not just the segment
+        int constantId = 0;
+        try {
+            constantId = std::stoi(idStr.data());
+        } catch (const std::invalid_argument& e) {
+            PANIC_POSTCONDITION("Invalid spec constant id '%s' in %s (not a valid integer?): %s",
+                    idStr.data(), shaderLabel.data(), e.what());
+        } catch (const std::out_of_range& e) {
+            PANIC_POSTCONDITION(
+                    "Invalid spec constant id '%s' in %s (not an integer? out of range?): %s",
+                    idStr.data(), shaderLabel.data(), e.what());
+        }
+        const auto newValueItr = specConstants.find(static_cast<uint32_t>(constantId));
+        if (newValueItr == specConstants.end()) {
+            // not going to override the constant,
+            // as the specConstants parameter doesn't specify it. So, we will keep the default
+            // already in the source text
+            // (stream to the end of the statement)...
+            processedShaderSource << std::string_view(sourceData + pos,
+                    posEndOfStatement + 1 - pos);
+            pos = posEndOfStatement + 1;
+            continue;
+        }
+        // need to override the constant...
+        const std::variant<int32_t, float, bool> newValue = newValueItr->second;
+        // stream up to the equal sign...
+        processedShaderSource << std::string_view(sourceData + pos, posOfEqual + 1 - pos);
+        // stream the new value...
+        if (auto* v = std::get_if<int32_t>(&newValue)) {
+            processedShaderSource << " " << *v << "i";
+        } else if (auto* f = std::get_if<float>(&newValue)) {
+            processedShaderSource << " " << *f << "f";
+        } else if (auto* b = std::get_if<bool>(&newValue)) {
+            processedShaderSource << " " << ((*b) ? "true" : "false");
+        }
+        // end the statement...
+        processedShaderSource << ";";
+        // and skip to after the end of the statement in the original source and continue...
+        pos = posEndOfStatement + 1;
+    }
+    return processedShaderSource.str();
+}
+
+/**
+ * Creates a WebGPU shader module for a given "program" "stage", accounting for override constants.
+ * Effectively, this function is responsible for preprocessing the shader source and compiling it.
+ * @param device The WebGPU device, which is the WebGPU API entry point for creating/registering
+ * a shader module
+ * @param program The "program" to compile/create the shader, which includes the shader source
+ * @param stage The stage (e.g. vertex, fragment, etc.) to create the shader module
+ * @param specConstants Override constants to apply when creating/compiling the shader module.
+ * The expectation is that this is consistent with the program's spec constants, just in a map
+ * format for quick access
+ * @return the proper WebGPU shader module compiled/created from the input parameters. This might
+ * wrap a null handle if the shader is not present (if the shader source is empty), such as
+ * a missing fragment or compute shader.
+ */
+[[nodiscard]] wgpu::ShaderModule createShaderModule(wgpu::Device const& device,
+        Program const& program, const ShaderStage stage,
+        std::unordered_map<uint32_t, std::variant<int32_t, float, bool>> const& specConstants) {
+    const char* const programName = program.getName().c_str_safe();
+    std::array<utils::FixedCapacityVector<uint8_t>, Program::SHADER_TYPE_COUNT> const&
+            shaderSource = program.getShadersSource();
+    utils::FixedCapacityVector<uint8_t> const& sourceBytes =
+            shaderSource[static_cast<size_t>(stage)];
+    if (sourceBytes.empty()) {
+        return nullptr;// nothing to compile/create, the shader was not provided
+    }
+    std::stringstream labelStream;
+    labelStream << programName << " " << filamentShaderStageToString(stage) << " shader";
+    const auto label = labelStream.str();
+    const std::string processedShaderSource =
+            specConstants.empty()
+                    ? reinterpret_cast<const char*>(sourceBytes.data())
+                    : replaceSpecConstants(label, reinterpret_cast<const char*>(sourceBytes.data()),
+                              specConstants);
+    wgpu::ShaderModuleWGSLDescriptor wgslDescriptor{};
+    wgslDescriptor.code = wgpu::StringView(processedShaderSource);
+    const wgpu::ShaderModuleDescriptor descriptor{
+        .nextInChain = &wgslDescriptor,
+        .label = label.data()
+    };
+    const wgpu::ShaderModule shaderModule = device.CreateShaderModule(&descriptor);
+    const wgpu::Instance instance = device.GetAdapter().GetInstance();
+    // synchronously creates the shader module...
+    const wgpu::WaitStatus waitResult = instance.WaitAny(
+            shaderModule.GetCompilationInfo(wgpu::CallbackMode::WaitAnyOnly,
+                    [&descriptor](auto const& status,
+                            wgpu::CompilationInfo const* info) {
+                        switch (status) {
+                            case wgpu::CompilationInfoRequestStatus::CallbackCancelled:
+                                FWGPU_LOGW << "Shader compilation info callback cancelled for "
+                                           << descriptor.label << "?" << utils::io::endl;
+                                return;
+                            case wgpu::CompilationInfoRequestStatus::Success:
+                                break;
+                        }
+                        if (info != nullptr) {
+                            std::stringstream errorStream;
+                            int errorCount = 0;
+                            for (size_t msgIndex = 0; msgIndex < info->messageCount; msgIndex++) {
+                                wgpu::CompilationMessage const& message = info->messages[msgIndex];
+                                switch (message.type) {
+                                    case wgpu::CompilationMessageType::Info:
+                                        FWGPU_LOGI << descriptor.label << ": " << message.message
+                                                   << " line#:" << message.lineNum
+                                                   << " linePos:" << message.linePos
+                                                   << " offset:" << message.offset
+                                                   << " length:" << message.length
+                                                   << utils::io::endl;
+                                        break;
+                                    case wgpu::CompilationMessageType::Warning:
+                                        FWGPU_LOGW
+                                                << "Warning compiling " << descriptor.label << ": "
+                                                << message.message << " line#:" << message.lineNum
+                                                << " linePos:" << message.linePos
+                                                << " offset:" << message.offset
+                                                << " length:" << message.length << utils::io::endl;
+                                        break;
+                                    case wgpu::CompilationMessageType::Error:
+                                        errorCount++;
+                                        errorStream << "Error " << errorCount << " : "
+                                                    << std::string_view(message.message)
+                                                    << " line#:" << message.lineNum
+                                                    << " linePos:" << message.linePos
+                                                    << " offset:" << message.offset
+                                                    << " length:" << message.length << "\n";
+                                        break;
+                                }
+                            }
+                            FILAMENT_CHECK_POSTCONDITION(errorCount < 1)
+                                    << errorCount << " error(s) compiling " << descriptor.label
+                                    << ":\n"
+                                    << errorStream.str();
+                        }
+#if FWGPU_ENABLED(FWGPU_DEBUG_VALIDATION)
+                        FWGPU_LOGD << descriptor.label << " compiled successfully"
+                                   << utils::io::endl;
+#endif
+                    }),
+            SHADER_COMPILATION_TIMEOUT_NANOSECONDS);
+    switch (waitResult) {
+        case wgpu::WaitStatus::Success:
+            break;
+        case wgpu::WaitStatus::Error:
+            PANIC_POSTCONDITION("Error creating/compiling shader %s (detected after wait).",
+                    descriptor.label.data);
+            break;
+        case wgpu::WaitStatus::TimedOut:
+            PANIC_POSTCONDITION("Timed out creating/compiling shader %s", descriptor.label.data);
+            break;
+    }
+    FILAMENT_CHECK_POSTCONDITION(shaderModule) << "Failed to create " << descriptor.label;
+    return shaderModule;
+}
+
+/**
+ * Convenience function to convert the array structure of constants to a map indexed by constant
+ * id.
+ * @param specConstants Original spec constant structure (immutable)
+ * @param outConstantById Output map of spec constants indexed by constant id
+ */
+void toMap(utils::FixedCapacityVector<Program::SpecializationConstant> const& specConstants,
+        std::unordered_map<uint32_t, std::variant<int32_t, float, bool>>& outConstantById) {
+    outConstantById.reserve(specConstants.size());
+    for (auto const& specConstant: specConstants) {
+        outConstantById.emplace(specConstant.id, specConstant.value);
+    }
+}
+
+}// namespace
+
+WebGPUProgram::WebGPUProgram(wgpu::Device const& device, Program const& program)
+    : HwProgram{ program.getName() } {
+    std::unordered_map<uint32_t, std::variant<int32_t, float, bool>> specConstants;
+    toMap(program.getSpecializationConstants(), specConstants);
+    // TODO consider creating/compiling these shaders in parallel
+    vertexShaderModule = createShaderModule(device, program, ShaderStage::VERTEX, specConstants);
+    fragmentShaderModule =
+            createShaderModule(device, program, ShaderStage::FRAGMENT, specConstants);
+    computeShaderModule = createShaderModule(device, program, ShaderStage::COMPUTE, specConstants);
+}
+
+}// namespace filament::backend
--- a/filament/backend/src/webgpu/WebGPUProgram.h
+++ b/filament/backend/src/webgpu/WebGPUProgram.h
@@ -0,0 +1,39 @@
+/*
+* Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_WEBGPUPROGRAM_H
+#define TNT_FILAMENT_BACKEND_WEBGPUPROGRAM_H
+
+#include "DriverBase.h"
+
+#include <webgpu/webgpu_cpp.h>
+
+namespace filament::backend {
+
+class Program;
+
+class WebGPUProgram final : public HwProgram {
+public:
+    WebGPUProgram(wgpu::Device const&, Program const&);
+
+    wgpu::ShaderModule vertexShaderModule = nullptr;
+    wgpu::ShaderModule fragmentShaderModule = nullptr;
+    wgpu::ShaderModule computeShaderModule = nullptr;
+};
+
+} // namespace filament::backend
+
+#endif // TNT_FILAMENT_BACKEND_WEBGPUPROGRAM_H
--- a/filament/backend/src/webgpu/WebGPURenderPrimitive.h
+++ b/filament/backend/src/webgpu/WebGPURenderPrimitive.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_FILAMENT_BACKEND_WEBGPURENDERPRIMITIVE_H
+#define TNT_FILAMENT_BACKEND_WEBGPURENDERPRIMITIVE_H
+
+#include "DriverBase.h"
+
+namespace filament::backend {
+
+class WebGPUVertexBuffer;
+class WebGPUIndexBuffer;
+
+struct WebGPURenderPrimitive : public HwRenderPrimitive {
+    WebGPUVertexBuffer* vertexBuffer = nullptr;
+    WebGPUIndexBuffer* indexBuffer = nullptr;
+};
+
+} // namespace filament::backend
+
+#endif // TNT_FILAMENT_BACKEND_WEBGPURENDERPRIMITIVE_H
--- a/filament/backend/src/webgpu/WebGPURenderTarget.cpp
+++ b/filament/backend/src/webgpu/WebGPURenderTarget.cpp
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) 2025 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "WebGPURenderTarget.h"
+
+#include <backend/DriverEnums.h>
+#include <backend/TargetBufferInfo.h>
+
+#include <private/backend/BackendUtils.h>
+#include <utils/BitmaskEnum.h>
+#include <utils/Panic.h>
+#include <utils/debug.h>
+
+#include <webgpu/webgpu_cpp.h>
+
+#include <cstdint>
+
+namespace filament::backend {
+
+WebGPURenderTarget::WebGPURenderTarget(const uint32_t width, const uint32_t height,
+        const uint8_t samples, const uint8_t layerCount, MRT const& colorAttachmentsMRT,
+        Attachment const& depthAttachmentInfo, Attachment const& stencilAttachmentInfo)
+    : HwRenderTarget{ width, height },
+      mDefaultRenderTarget{ false },
+      mSamples{ samples },
+      mLayerCount{ layerCount },
+      mColorAttachments{ colorAttachmentsMRT },
+      mDepthAttachment{ depthAttachmentInfo },
+      mStencilAttachment{ stencilAttachmentInfo } {
+    // TODO consider making this an array
+    mColorAttachmentDescriptors.reserve(MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT);
+}
+
+// Default constructor for the default render target
+WebGPURenderTarget::WebGPURenderTarget()
+    : HwRenderTarget{ 0, 0 },
+      mDefaultRenderTarget{ true },
+      mSamples{ 1 },
+      mLayerCount{ 1 } {}
+
+wgpu::LoadOp WebGPURenderTarget::getLoadOperation(RenderPassParams const& params,
+        const TargetBufferFlags bufferToOperateOn) {
+    if (any(params.flags.clear & bufferToOperateOn)) {
+        return wgpu::LoadOp::Clear;
+    }
+    if (any(params.flags.discardStart & bufferToOperateOn)) {
+        return wgpu::LoadOp::Clear; // Or wgpu::LoadOp::Undefined if clear is not desired on discard
+    }
+    return wgpu::LoadOp::Load;
+}
+
+wgpu::StoreOp WebGPURenderTarget::getStoreOperation(RenderPassParams const& params,
+        const TargetBufferFlags bufferToOperateOn) {
+    if (any(params.flags.discardEnd & bufferToOperateOn)) {
+        return wgpu::StoreOp::Discard;
+    }
+    return wgpu::StoreOp::Store;
+}
+
+void WebGPURenderTarget::setUpRenderPassAttachments(wgpu::RenderPassDescriptor& outDescriptor,
+        RenderPassParams const& params, wgpu::TextureView const& defaultColorTextureView,
+        wgpu::TextureView const& defaultDepthStencilTextureView,
+        wgpu::TextureView const* customColorTextureViews, uint32_t customColorTextureViewCount,
+        wgpu::TextureView const& customDepthTextureView,
+        wgpu::TextureView const& customStencilTextureView,
+        const wgpu::TextureFormat customDepthFormat,
+        const wgpu::TextureFormat customStencilFormat) {
+    mColorAttachmentDescriptors.clear();
+    mHasDepthStencilAttachment = false;
+
+    if (mDefaultRenderTarget) {
+        assert_invariant(defaultColorTextureView);
+        mColorAttachmentDescriptors.push_back({ .view = defaultColorTextureView,
+            .resolveTarget = nullptr,
+            .loadOp = WebGPURenderTarget::getLoadOperation(params, TargetBufferFlags::COLOR0),
+            .storeOp = WebGPURenderTarget::getStoreOperation(params, TargetBufferFlags::COLOR0),
+            .clearValue = { params.clearColor.r, params.clearColor.g, params.clearColor.b,
+                params.clearColor.a } });
+
+        if (defaultDepthStencilTextureView) {
+            mDepthStencilAttachmentDescriptor = {
+                .view = defaultDepthStencilTextureView,
+                .depthLoadOp = WebGPURenderTarget::getLoadOperation(params, TargetBufferFlags::DEPTH),
+                .depthStoreOp =
+                        WebGPURenderTarget::getStoreOperation(params, TargetBufferFlags::DEPTH),
+                .depthClearValue = static_cast<float>(params.clearDepth),
+                .depthReadOnly =
+                        (params.readOnlyDepthStencil & RenderPassParams::READONLY_DEPTH) > 0,
+                .stencilLoadOp =
+                        WebGPURenderTarget::getLoadOperation(params, TargetBufferFlags::STENCIL),
+                .stencilStoreOp =
+                        WebGPURenderTarget::getStoreOperation(params, TargetBufferFlags::STENCIL),
+                .stencilClearValue = params.clearStencil,
+                .stencilReadOnly =
+                        (params.readOnlyDepthStencil & RenderPassParams::READONLY_STENCIL) > 0,
+            };
+            mHasDepthStencilAttachment = true;
+        }
+    } else {// Custom Render Target
+        for (uint32_t i = 0; i < customColorTextureViewCount; ++i) {
+            if (customColorTextureViews[i]) {
+                mColorAttachmentDescriptors.push_back({ .view = customColorTextureViews[i],
+                    // .resolveTarget = nullptr; // TODO: MSAA resolve for custom RT
+                    .loadOp = WebGPURenderTarget::getLoadOperation(params, getTargetBufferFlagsAt(i)),
+                    .storeOp =
+                            WebGPURenderTarget::getStoreOperation(params, getTargetBufferFlagsAt(i)),
+                    .clearValue = { .r = params.clearColor.r,
+                        .g = params.clearColor.g,
+                        .b = params.clearColor.b,
+                        .a = params.clearColor.a } });
+            }
+        }
+
+        FILAMENT_CHECK_POSTCONDITION(!(customDepthTextureView && customStencilTextureView))
+                << "WebGPU CANNOT support separate texture views for depth + stencil. depth + "
+                   "stencil needs to be in one texture view";
+
+        const bool hasStencil =
+                customStencilTextureView ||
+                (customDepthFormat == wgpu::TextureFormat::Depth24PlusStencil8 ||
+                        customDepthFormat == wgpu::TextureFormat::Depth32FloatStencil8);
+
+        const bool hasDepth =
+                customDepthTextureView ||
+                (customStencilFormat == wgpu::TextureFormat::Depth24PlusStencil8 ||
+                        customDepthFormat == wgpu::TextureFormat::Depth32FloatStencil8);
+
+        if (customDepthTextureView || customStencilTextureView) {
+            assert_invariant((hasDepth || hasStencil) &&
+                             "Depth or Texture view without a valid texture format");
+            mDepthStencilAttachmentDescriptor = {};
+            mDepthStencilAttachmentDescriptor.view =
+                    customDepthTextureView ? customDepthTextureView : customStencilTextureView;
+
+            if (hasDepth) {
+                mDepthStencilAttachmentDescriptor.depthLoadOp =
+                        WebGPURenderTarget::getLoadOperation(params, TargetBufferFlags::DEPTH);
+                mDepthStencilAttachmentDescriptor.depthStoreOp =
+                        WebGPURenderTarget::getStoreOperation(params, TargetBufferFlags::DEPTH);
+                mDepthStencilAttachmentDescriptor.depthClearValue =
+                        static_cast<float>(params.clearDepth);
+                mDepthStencilAttachmentDescriptor.depthReadOnly =
+                        (params.readOnlyDepthStencil & RenderPassParams::READONLY_DEPTH) > 0;
+            } else {
+                mDepthStencilAttachmentDescriptor.depthLoadOp = wgpu::LoadOp::Undefined;
+                mDepthStencilAttachmentDescriptor.depthStoreOp = wgpu::StoreOp::Undefined;
+                mDepthStencilAttachmentDescriptor.depthReadOnly = true;
+            }
+
+            if (hasStencil) {
+                mDepthStencilAttachmentDescriptor.stencilLoadOp =
+                        WebGPURenderTarget::getLoadOperation(params, TargetBufferFlags::STENCIL);
+                mDepthStencilAttachmentDescriptor.stencilStoreOp =
+                        WebGPURenderTarget::getStoreOperation(params, TargetBufferFlags::STENCIL);
+                mDepthStencilAttachmentDescriptor.stencilClearValue = params.clearStencil;
+                mDepthStencilAttachmentDescriptor.stencilReadOnly =
+                        (params.readOnlyDepthStencil & RenderPassParams::READONLY_STENCIL) > 0;
+            } else {
+                mDepthStencilAttachmentDescriptor.stencilLoadOp = wgpu::LoadOp::Undefined;
+                mDepthStencilAttachmentDescriptor.stencilStoreOp = wgpu::StoreOp::Undefined;
+                mDepthStencilAttachmentDescriptor.stencilReadOnly = true;
+            }
+            mHasDepthStencilAttachment = true;
+        }
+    }
+
+    outDescriptor.colorAttachmentCount = mColorAttachmentDescriptors.size();
+    outDescriptor.colorAttachments = mColorAttachmentDescriptors.data();
+    outDescriptor.depthStencilAttachment =
+            mHasDepthStencilAttachment ? &mDepthStencilAttachmentDescriptor : nullptr;
+
+    // descriptor.sampleCount was removed from the core spec. If your webgpu.h still has it,
+    // and your Dawn version expects it, you might need to set it here based on this->samples.
+    // e.g., descriptor.sampleCount = this->samples;
+}
+
+}// namespace filament::backend
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
bridgewaterrobbie	c611b4df6f	Fixup shader code	2025-06-10 18:04:41 -04:00
bridgewaterrobbie	6b91d7ca2d	Reduce the number of prepared mipmap pipelines- this should be configurable really if we want to preserve this logic in the long run.	2025-06-10 16:54:28 -04:00
bridgewaterrobbie	df264372d9	Add check for if the format (Or non srgb format) is storage compatible	2025-06-10 16:54:03 -04:00
bridgewaterrobbie	bf9435d0fd	Never merge: Arbitrarilly raise max storage texutres per stage for easier tinkering, though this s houldn't be relied upon, spec limit is 4	2025-06-10 16:18:17 -04:00
bridgewaterrobbie	726177fead	Hacky version of playing with formats and usages.	2025-06-10 15:58:02 -04:00
bridgewaterrobbie	2ef01ae1e8	Try to go in multiple passes to avoid hitting limits	2025-06-10 15:57:48 -04:00
bridgewaterrobbie	1374e8acef	tinker2	2025-06-10 14:20:27 -04:00
bridgewaterrobbie	622beb211a	WIP: Use converted JS library for compute pass mipmap generation	2025-06-10 14:16:57 -04:00
Juan Caldas	b310e3d24a	webgpu: Add missing WEBGPU cases (#8839 )	2025-06-10 13:56:48 -04:00
Juan Caldas	b359d77669	webgpu: Adjust the wrap modes (#8844 ) * Change the WebGPU wrap modes	2025-06-10 11:09:37 -04:00
Sungun Park	e57a4061cc	Add missing header (#8842 )	2025-06-09 21:01:29 +00:00
Powei Feng	53f82e6b71	vk: rename selectMemory params (#8835 ) Use more accurate parameter names. 'types' indicate the types of memory we are considering. 'reqs' indicate the requirements each type must meet. BUGS=401579988	2025-06-09 20:33:08 +00:00
rafadevai	e88bba5940	VK: Disable VMA internal synchronization (#8836 ) The vulkan backend is currently single threads, so there's no need for VMA to use synchronization internally. This will improve the CPU performance when going through VMA. Co-authored-by: Serge Metral <sergemetral@google.com>	2025-06-09 20:05:02 +00:00
Matthew Hoffman	0b83454d08	Fix backend tests on macos vulkan. (#8815 )	2025-06-09 19:05:03 +00:00
Matthew Hoffman	1ef0d36f79	Improve and document the backend test helper script. (#8816 )	2025-06-09 18:19:30 +00:00
Powei Feng	e350737d1e	Remove texture pbd check for compressed textures (#8837 ) This will also fix the failing ktx test.	2025-06-09 17:47:55 +00:00
Andy Hovingh	cb203b13de	webgpu: refactor: sort CMake source files	2025-06-09 11:05:15 -05:00
Andy Hovingh	92a0d7bfc3	webgpu: refactor: move WebGPU render target class to its own source+header files.	2025-06-09 11:05:15 -05:00
Andy Hovingh	c1a7ed2799	webgpu: refactor: move WebGPU texture class to its own source+header files.	2025-06-09 11:05:15 -05:00
Andy Hovingh	692301bdf1	webgpu: refactor: move WebGPU descriptor set related classes to their own source+header files.	2025-06-09 11:05:15 -05:00
Andy Hovingh	05b87b3c42	webgpu: refactor: move WebGPU buffer classes to their own source+header files.	2025-06-09 11:05:15 -05:00
Andy Hovingh	9825297e70	webgpu: refactor: move WebGPUProgram to its own source+header files.	2025-06-09 11:05:15 -05:00
Powei Feng	6a59c887de	vk: implement proper offset when uploading to texture (#8830 ) We didn't take into account the top/left/stride parameters of the PixelBufferDescriptor.	2025-06-06 20:46:45 +00:00
bridgewaterrobbie	33b4b46220	Create simple gltf viewer based on gltf_instances, primarily to facilitate webgpu development	2025-06-06 14:51:17 -04:00
Powei Feng	06170a25f7	Fix PBD size check in Texture (#8826 ) Bug found when running ./samples/heightfield and press add pixel buffer padding.	2025-06-06 18:20:05 +00:00
Konrad Piascik	418dad883d	webgpu: Fix Android material compilation Instead of requiring GL_EXT_shader_non_constant_global_initializers we can just make the new variables const BUGS=[421457710]	2025-06-06 13:46:37 -04:00
Jeremy Nelson	447661efed	Use colorInfos.layer	2025-06-05 22:18:34 -07:00
Jeremy Nelson	bd61ab691c	add Default miplevel and array layer	2025-06-05 22:18:34 -07:00
Jeremy Nelson	2a04cee97f	rename member variables	2025-06-05 22:18:34 -07:00
Jeremy Nelson	5ae7760752	fix samplerType check	2025-06-05 22:18:34 -07:00
Jeremy Nelson	56b8bf5b4b	noexcept to makeTextureView	2025-06-05 22:18:34 -07:00
Jeremy Nelson	6af38fa47f	renaming param to samplerType	2025-06-05 22:18:34 -07:00
Jeremy Nelson	0c01799c50	overload getTextureView	2025-06-05 22:18:34 -07:00
Jeremy Nelson	e2519e0eed	Update WebGPUHandles.cpp	2025-06-05 22:18:34 -07:00
Jeremy Nelson	6178ae0459	Use colorInfos.layer # Conflicts: # filament/backend/src/webgpu/WebGPUHandles.cpp	2025-06-05 22:18:34 -07:00
Eliza	8a1a0b0fd2	materials: introduce mutable spec constants (#8795 ) * materials: introduce mutable spec constants Rationale & design of this feature has been discussed internally. The current implementation uses a `FixedCapacityVector` to store the new program handles, but I wouldn't object to replacing it with a hasmap as discussed offline. I have compiled but not tested this yet on Android, so I'm not certain that the API bindings are correctly wired up. * materials: mutable spec constant feedback * materials: address mutable spec constant comments	2025-06-06 03:52:20 +00:00
Powei Feng	a7c3cf4173	Ensure maximum number of vertex buffer does not exceed 8 (#8824 )	2025-06-05 21:16:38 +00:00
bridgewaterrobbie	57b68eeba3	webgpu: Add required feature RG11B10UfloatRenderable for gltf_viewer, with comment on why	2025-06-05 15:08:03 -04:00
Juan Caldas	ad8c9ce4e0	Add common CLI Args parser for the samples (#8819 )	2025-06-05 18:18:41 +00:00
Powei Feng	7dc0f2cc86	vk: fix two validation error (#8825 ) On starting gltf_viewer on mac, there were validation errors on 1) copying to a depth texture using the aspect COLOR. 2) querying formats that required extensions without enabling those extensions.	2025-06-05 17:50:40 +00:00
bridgewaterrobbie	37110799ef	webgpu: Correct stencil state handling for depth-only formats	2025-06-05 13:22:26 -04:00
Anish Goyal	88a06ec8e7	Switch to block-based stage-pool for Vulkan (#8742 ) * Switch to block-based stage-pool for Vulkan Instead of allocating a staging buffer every time one is needed, allocate a large (8mb) block of memory, and divvy it up as needed. We will make this configurable in the future, to allow for tuning for different apps as needed. * Address PR comments: use fvkmemory::Resource Instead of having the child block be a unique_ptr that we create a separate container for within the command buffers, just have the stage block segments be fvkmemory::Resource instances. * Address PR comments for staging buff change - As per discussion with @poweifeng, change the name of a variable called "stage" to "stageSegment" for clarity - As per discussion with @rafadevai, change the order of terminate calls in VulkanDriver to better reflect cleanup order of some objects. * Align stage pool to nonCoherentAtomSize In order to prevent flushing more atoms than were modified when writing data to host-mapped memory in a staging buffer, ensure that all segments allocated are aligned to nonCoherentAtomSize. Also - fix merge conflict compile errors. --------- Co-authored-by: Serge Metral <sergemetral@google.com>	2025-06-05 09:15:26 -07:00
bridgewaterrobbie	38705d6226	Add todos for unimplemented funcs that we haven't yet started on	2025-06-05 10:48:59 -04:00
bridgewaterrobbie	d325bb43cf	Add destroy texture implementation	2025-06-05 10:48:59 -04:00
Sungun Park	0c52d3c9bf	Fix: Submit callback handle on completion (#8818 ) This reverts a behavioral regression introduced in commit `c3542b135e`, which deferred callback submission until the program was first used. This commit restores the correct behavior by submitting the callback handle as soon as the token's work is complete. This occurs either upon successful `gl.program` population or via cancellation, ensuring the caller is properly notified that the resource loading operation has concluded.	2025-06-04 16:44:48 +00:00
Konrad Piascik	770176a1e3	Fix Windows build error C2512	2025-06-04 11:17:05 -04:00
bridgewaterrobbie	672603f9b4	Generate mipmaps when requested, deferring as needed	2025-06-03 17:45:35 -04:00
Powei Feng	0ecf6c46e2	github: Fix commit message parsing (#8817 )	2025-06-03 14:24:29 -07:00
Andy Hovingh	8b87a54c1a	webgpu: fix vertex buffer info for hellopbr	2025-06-03 16:08:12 -05:00
Powei Feng	cd1d3e8749	github: update windows runner due to 2019 being "stuck" (#8814 )	2025-06-03 13:27:31 -07:00
rafadevai	e88072cec0	VK: Introduce VulkanBufferCache (#8757 ) This class will allow better tracking of memory allocations and recycling of buffers. Currently only the uniform buffers are recycled. It will eventually allow us to dynamically change the underlying GPU buffer of a VulkanBuffer when updating an UBO for uniforms and also keep track which of those buffers are still inflight and which ones are ready to be reuse for an UBO. Its the first step on moving towards by passing the staging buffer in UMA.	2025-06-03 12:36:50 -07:00
Powei Feng	1d2e165d99	Release Filament 1.61.0	2025-06-03 12:02:10 -07:00
Powei Feng	cfc4ac5511	Add missing include in JobSystem.cpp (#8812 )	2025-06-03 11:44:42 -07:00
Powei Feng	73a03d7af0	Flip conditional for fixing missing samplers (#8811 )	2025-06-03 11:19:29 -07:00
Powei Feng	56dc348cc8	utils: add additional guards for Tracing (#8810 ) The addition JobSystem.cpp allows for defining FILAMENT_TRACING_ENABLED across targets. Addingin FILAMENT_TRACING_ENABLED to the #if in Tracing.h prevents perfetto from being included.	2025-06-03 11:18:11 -07:00
Juan Caldas	1b46ddd8b6	Webgpu: Check for Shadows (#8807 ) BUGS=[397432947]	2025-06-03 11:57:34 +00:00
Powei Feng	a68aaa114e	renderdiff: fix breakage in parsing commit message (#8808 )	2025-06-02 15:55:35 -07:00
Powei Feng	3da7dabb2a	renderdiff: enable update goldens on commit merge (#8771 )	2025-06-02 14:12:26 -07:00
bridgewaterrobbie	1e2311da3d	Fix the fact that isSample might be combined with the other flags	2025-06-02 16:18:08 -04:00
bridgewaterrobbie	a38c55c82d	Fix use accidental use of uninitalized HwTexture::target	2025-06-02 16:18:08 -04:00
Powei Feng	cd22478e4f	renderdiff: add viewer for image differences (#8768 ) - Modify the compare script to output more details of a comparison. This will include the source/golden directory, the comparison directory (the new renderings), and a file path to difference images if the golden does not match the rendered image. - The image_diff script can now output a TIFF that is the difference of two input TIFFs. - Add a viewer for examining the differences between rendered output and golden images. - The viewer consists of a simple server of web API endpoints for querying difference results (along with rendered images in TIFF). - And a web-based (html + lit-element) UI for looking at the rendered images and differences.	2025-06-02 19:37:46 +00:00
bridgewaterrobbie	60036c75fe	Remove asserts that throw on shadowtest. Investigate them later	2025-06-02 14:17:39 -04:00
Powei Feng	044f2aeb7c	gl: keep external texture id in sync (#8803 ) (Attributed to @dsternfeld7)	2025-06-02 17:18:00 +00:00
Juan Caldas	c73d11858e	Webgpu: Move Filter mode check (#8804 ) BUGS=[397432947]	2025-06-02 14:10:33 +00:00
Powei Feng	23b67be41a	Fix always bind uniform logic in MaterialInstance (#8801 ) The logic for duplicating UBO was omitted after #8739	2025-06-01 23:27:55 -07:00
bridgewaterrobbie	7d53baad5c	Add float32 filtering feature, so that we can safely assume non-int non-depth types are filtering.	2025-05-30 17:53:04 -04:00
doriswu	1ae33a23fe	Use highp for square distance	2025-05-30 13:52:44 -07:00
Rafael Dominguez	351d9287af	Update the clang-format rule for comments Make sure theres a space before the comment So now instead of getting #endif// comment it will be #endif // comment	2025-05-30 13:52:01 -07:00
Mathias Agopian	e7e5004946	attempt to fix external streams with protected context There was several issues: 1) when we're switching contexts (e.g. between protect and regular) we needed up reattach all SurfaceView (i.e. streams), because they need to be attached on currently active context. 2) reattaching, because it's implemented as detach + attach, would destroy the current gl texture id and create a new one. However, because of the way descriptor-sets were implemented, that GL texture id was kept inside the descriptor, later leading to using a destroyed texture id. The fix here is to store texture handles in descriptors, so that we can update the id independently. 3) we also needed to invalidate all bound descriptor sets because it's now possible for descriptor sets to have outdated descriptors	2025-05-30 13:15:40 -07:00
Juan Caldas	b9be9b4f2d	Set Render Pipeline	2025-05-30 15:06:50 -05:00
Powei Feng	ce88a56579	Fix leaking dummy depth array texture (#8796 )	2025-05-30 17:19:55 +00:00
Powei Feng	689e769f9a	utils: Fix FixedCircularBufferTest.Exceptions (#8794 ) Switching from assert_invariant to assert produced a different error message.	2025-05-30 10:02:31 -07:00
Jeremy Nelson	2f0a8b19b5	use params in draw call	2025-05-30 11:49:05 -05:00
Andy Hovingh	1bed68afb7	webgpu: support user-defined spec constants in addition to native Filament ones	2025-05-30 11:33:14 -05:00
Konrad Piascik	130e02da5c	Add helper script to linux documentation (#8783 )	2025-05-30 14:16:36 +00:00
bridgewaterrobbie	8e06a68446	Relocate calling mRenderPassEncoder.SetBindGroup to draw call, deferring it to handle non-default render targets being used.	2025-05-29 20:23:06 -04:00
bridgewaterrobbie	656b50b304	Manually remove Tint annotations that Dawn complains about when consuming. Dawn complains when consuming these lines with WebGPU device error: ErrorType::Validation Error while parsing WGSL: :24:4 error: expected attribute Did you mean 'size'? Possible values: 'align', 'binding', 'blend_src', 'builtin', 'color', 'compute', 'diagnostic', 'fragment', 'group', 'id', 'input_attachment_index', 'interpolate', 'invariant', 'location', 'must_use', 'size', 'vertex', 'workgroup_size' @stride(16) @internal(disable_validation__ignore_stride) And we don't see a better workaround at this time.	2025-05-29 17:59:12 -04:00
Mathias Agopian	880db4ec43	fix a buffer overflow during init the default cubemap has RGBA pixels (4 bytes per pixel).	2025-05-29 14:27:38 -07:00
Syed Idris Shah	2ce294720d	vertex buffer info hardening. Implement vertexbufferinfo taking both interleaved and block attributes into account. Handle the unused buffers by dedicating a slot for that. Update the usage of vertexbufferinfo for setVertexBuffer call	2025-05-29 15:51:55 -05:00
Juan Caldas	eb07decb4b	wgpu: Ignore spec constants greater than 10 (#8789 )	2025-05-29 15:58:35 -04:00
Mathias Agopian	47930edf70	don't use assert_invariant in public headers this is to eventually suppress the dependency on utils/debug.h from public headers	2025-05-29 09:56:52 -07:00
Mathias Agopian	05b89905d8	fix a use after free of texture data during init (#8786 )	2025-05-28 22:32:24 -07:00
Powei Feng	3e0df36c1c	Add option to disable GTAO (#8785 )	2025-05-28 22:35:14 +00:00
Powei Feng	c9ce384f14	vk: add missing header (#8781 )	2025-05-28 22:17:55 +00:00
Sungun Park	6a6bdd5be7	Fix broken android samples (#8784 ) that use DescriptorType::SAMPLER_EXTERNAL.	2025-05-28 20:56:20 +00:00
Sungun Park	d56ade924e	Add checks for MSAA (#8782 )	2025-05-28 19:48:12 +00:00
Andy Hovingh	fe561e3e27	webgpu: support selection from multiple adapters/graphics devices	2025-05-28 12:52:04 -05:00
Jeremy Nelson	1d98e350e4	Set stencil ops to undefined if nonexistent	2025-05-27 16:28:01 -07:00
Matthew Hoffman	d34e018acb	Replace 512 magic number in backend tests. (#8775 )	2025-05-27 21:39:50 +00:00
Mathias Agopian	169c8d57ff	assert vertex attribute alignment offset and size must now be a multiple of 4	2025-05-27 10:19:53 -07:00
Mathias Agopian	a1e0cfa33c	fix a few noexcept through out the code base	2025-05-27 10:18:12 -07:00
Sungun Park	6d44db3ca0	Fix a compile error This is a fix for the compile error caused by `86a500c846`	2025-05-23 10:33:14 -07:00
Sungun Park	a261429d06	Add multiview MSAA support for custom swapchain (#8767 ) BUGS=[417311684]	2025-05-23 00:42:04 +00:00
Matthew Hoffman	69d0de3c6d	In backend tests replace calls to draw with draw2. (#8765 ) BUGS=[398199600]	2025-05-22 13:14:52 -05:00