Fixup shader code

Reduce the number of prepared mipmap pipelines- this should be configurable really if we want to preserve this logic in the long run.
Add check for if the format (Or non srgb format) is storage compatible
2025-06-10 18:04:41 -04:00 · 2025-06-10 16:54:28 -04:00 · 2025-06-10 16:54:03 -04:00 · 2025-06-10 16:18:17 -04:00 · 2025-06-10 15:58:02 -04:00 · 2025-06-10 15:57:48 -04:00
7 changed files with 1021 additions and 42 deletions
--- a/filament/backend/CMakeLists.txt
+++ b/filament/backend/CMakeLists.txt
@@ -286,6 +286,7 @@ if (FILAMENT_SUPPORTS_WEBGPU)
            src/webgpu/WebGPUVertexBuffer.h
            src/webgpu/WebGPUVertexBufferInfo.cpp
            src/webgpu/WebGPUVertexBufferInfo.h
+            src/webgpu/SpdMipmapGenerator.cpp
    )
    if (WIN32)
        list(APPEND SRCS src/webgpu/platform/WebGPUPlatformWindows.cpp)
--- a/filament/backend/src/webgpu/SpdMipmapGenerator.cpp
+++ b/filament/backend/src/webgpu/SpdMipmapGenerator.cpp
@@ -0,0 +1,795 @@
+#include "SpdMipmapGenerator.h"
+#include <sstream>
+#include <stdexcept>
+// C++ port of https://github.com/JolifantoBambla/webgpu-spd for early experiments- do not merge like this
+namespace spd {
+
+    // Helper to convert enums to strings for map keys or shader code
+    const char* to_string(wgpu::TextureFormat format) {
+        switch (format) {
+            case wgpu::TextureFormat::RGBA8Unorm: return "rgba8unorm";
+            case wgpu::TextureFormat::BGRA8Unorm: return "bgra8unorm";
+            case wgpu::TextureFormat::R32Float: return "r32float";
+            case wgpu::TextureFormat::RG32Float: return "rg32float";
+            case wgpu::TextureFormat::RGBA32Float: return "rgba32float";
+            case wgpu::TextureFormat::R16Float: return "r16float";
+            case wgpu::TextureFormat::RG16Float: return "rg16float";
+            case wgpu::TextureFormat::RGBA16Float: return "rgba16float";
+            // Add other formats as needed
+            default: return "rgba8unorm";
+        }
+    }
+
+
+    std::string MipmapGenerator::GetFilterCode(SPDFilter filter) {
+        switch (filter) {
+            case SPDFilter::Min:
+                return R"(
+    fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
+        return min(min(v0, v1), min(v2, v3));
+    }
+    )";
+            case SPDFilter::Max:
+                return R"(
+    fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
+        return max(max(v0, v1), max(v2, v3));
+    }
+    )";
+            case SPDFilter::MinMax:
+                 return R"(
+    fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
+        let max4 = max(max(v0.xy, v1.xy), max(v2.xy, v3.xy));
+        return vec4<SPDScalar>(min(min(v0.x, v1.x), min(v2.x, v3.x)), max(max4.x, max4.y), 0.0, 0.0);
+    }
+    )";
+            case SPDFilter::Average:
+            default:
+                return R"(
+    fn spd_reduce_4(v0: vec4<SPDScalar>, v1: vec4<SPDScalar>, v2: vec4<SPDScalar>, v3: vec4<SPDScalar>) -> vec4<SPDScalar> {
+        return (v0 + v1 + v2 + v3) * 0.25;
+    }
+    )";
+        }
+    }
+
+
+    MipmapGenerator::MipmapGenerator(const wgpu::Device& device) : m_device(device) {
+        wgpu::BindGroupLayoutEntry bglEntry{};
+        bglEntry.binding = 0;
+        bglEntry.visibility = wgpu::ShaderStage::Compute;
+        bglEntry.buffer.type = wgpu::BufferBindingType::Uniform;
+        bglEntry.buffer.minBindingSize = 16;
+
+        wgpu::BindGroupLayoutDescriptor bglDesc{};
+        bglDesc.entryCount = 1;
+        bglDesc.entries = &bglEntry;
+        m_internalResourcesBindGroupLayout = m_device.CreateBindGroupLayout(&bglDesc);
+    }
+
+    void MipmapGenerator::PreparePipelines(wgpu::TextureFormat format, SPDFilter filter, bool halfPrecision) {
+        SPDScalarType scalarType = SanitizeScalarType(format, halfPrecision);
+        // Prepare for a reasonable number of mips
+        for (uint32_t i = 1; i <= 4; ++i) {
+            GetOrCreatePipeline(format, filter, i, scalarType);
+        }
+    }
+    
+    SPDScalarType MipmapGenerator::SanitizeScalarType(wgpu::TextureFormat format, bool halfPrecision) {
+        std::string formatStr = to_string(format);
+        std::transform(formatStr.begin(), formatStr.end(), formatStr.begin(), ::tolower);
+
+        SPDScalarType texelType = SPDScalarType::F32;
+        if (formatStr.find("sint") != std::string::npos) {
+            texelType = SPDScalarType::I32;
+        } else if (formatStr.find("uint") != std::string::npos) {
+            texelType = SPDScalarType::U32;
+        }
+
+        if (halfPrecision) {
+            bool hasF16 = false;
+            // In a real Dawn app, you would check device.GetSupportedFeatures()
+            // For now, let's assume it's available if requested.
+             hasF16 = true; 
+
+            if (!hasF16) {
+                // Log warning: half precision requested but not supported
+            }
+            if (texelType != SPDScalarType::F32) {
+                // Log warning: half precision for non-float format
+            }
+            if (hasF16 && texelType == SPDScalarType::F32) {
+                return SPDScalarType::F16;
+            }
+        }
+        return texelType;
+    }
+
+    SPDPipeline& MipmapGenerator::GetOrCreatePipeline(wgpu::TextureFormat format, SPDFilter filter, uint32_t numMips, SPDScalarType scalarType) {
+        if (m_pipelines[format][scalarType][filter].count(numMips) == 0) {
+            // Create the pipeline
+            SPDPipeline spdPipeline;
+
+            std::vector<wgpu::BindGroupLayoutEntry> mipsBglEntries;
+            for (uint32_t i = 0; i <= numMips; ++i) {
+                wgpu::BindGroupLayoutEntry entry{};
+                entry.binding = i;
+                entry.visibility = wgpu::ShaderStage::Compute;
+                if (i == 0) {
+                    entry.texture.sampleType = (scalarType == SPDScalarType::I32) ? wgpu::TextureSampleType::Sint :
+                                               (scalarType == SPDScalarType::U32) ? wgpu::TextureSampleType::Uint :
+                                               wgpu::TextureSampleType::UnfilterableFloat;
+                    entry.texture.viewDimension = wgpu::TextureViewDimension::e2DArray;
+                } else {
+                    entry.storageTexture.access = wgpu::StorageTextureAccess::WriteOnly;
+                    entry.storageTexture.format = format;
+                    entry.storageTexture.viewDimension = wgpu::TextureViewDimension::e2DArray;
+                }
+                mipsBglEntries.push_back(entry);
+            }
+            
+            wgpu::BindGroupLayoutDescriptor mipsBglDesc{};
+            mipsBglDesc.entryCount = mipsBglEntries.size();
+            mipsBglDesc.entries = mipsBglEntries.data();
+            spdPipeline.mipsBindGroupLayout = m_device.CreateBindGroupLayout(&mipsBglDesc);
+
+            std::string shaderCode = MakeShaderCode(format, GetFilterCode(filter), numMips, scalarType);
+            wgpu::ShaderModuleWGSLDescriptor wgslDesc{};
+            wgslDesc.code = shaderCode.c_str();
+
+            wgpu::ShaderModuleDescriptor shaderModuleDesc{};
+            shaderModuleDesc.nextInChain = &wgslDesc;
+            wgpu::ShaderModule shaderModule = m_device.CreateShaderModule(&shaderModuleDesc);
+            
+            wgpu::BindGroupLayout bgls[] = { spdPipeline.mipsBindGroupLayout, m_internalResourcesBindGroupLayout };
+            wgpu::PipelineLayoutDescriptor layoutDesc{};
+            layoutDesc.bindGroupLayoutCount = 2;
+            layoutDesc.bindGroupLayouts = bgls;
+
+            wgpu::ComputePipelineDescriptor pipelineDesc{};
+            pipelineDesc.layout = m_device.CreatePipelineLayout(&layoutDesc);
+            pipelineDesc.compute.module = shaderModule;
+            pipelineDesc.compute.entryPoint = "downsample";
+
+            spdPipeline.pipeline = m_device.CreateComputePipeline(&pipelineDesc);
+            m_pipelines[format][scalarType][filter][numMips] = std::move(spdPipeline);
+        }
+        return m_pipelines[format][scalarType][filter][numMips];
+    }
+    
+    void MipmapGenerator::Generate(
+        wgpu::CommandEncoder& commandEncoder,
+        wgpu::Texture srcTexture,
+        const SPDPassConfig& config)
+    {
+        uint32_t width = srcTexture.GetWidth();
+        uint32_t height = srcTexture.GetHeight();
+        uint32_t arrayLayerCount = srcTexture.GetDepthOrArrayLayers();
+
+        wgpu::Texture target = config.targetTexture ? config.targetTexture : srcTexture;
+        uint32_t numMips = config.numMips > 0 ? config.numMips : target.GetMipLevelCount() - 1;
+
+        if (numMips == 0) return;
+
+        SPDScalarType scalarType = SanitizeScalarType(srcTexture.GetFormat(), config.halfPrecision);
+        SPDPipeline& spdPipeline = GetOrCreatePipeline(target.GetFormat(), config.filter, numMips, scalarType);
+        
+        // --- Create Bind Group 0 (Mips) ---
+        std::vector<wgpu::BindGroupEntry> mipEntries;
+
+        wgpu::TextureViewDescriptor srcViewDesc{};
+        srcViewDesc.dimension = wgpu::TextureViewDimension::e2DArray;
+        srcViewDesc.baseMipLevel = config.sourceMipLevel;
+        srcViewDesc.mipLevelCount = 1;
+        srcViewDesc.baseArrayLayer = 0;
+        srcViewDesc.arrayLayerCount = arrayLayerCount;
+        
+        wgpu::BindGroupEntry srcEntry{};
+        srcEntry.binding = 0;
+        srcEntry.textureView = srcTexture.CreateView(&srcViewDesc);
+        mipEntries.push_back(srcEntry);
+
+        for (uint32_t i = 0; i < numMips; ++i) {
+            wgpu::TextureViewDescriptor dstViewDesc{};
+            dstViewDesc.dimension = wgpu::TextureViewDimension::e2DArray;
+            dstViewDesc.baseMipLevel = config.sourceMipLevel + i + 1;
+            dstViewDesc.mipLevelCount = 1;
+            dstViewDesc.baseArrayLayer = 0;
+            dstViewDesc.arrayLayerCount = arrayLayerCount;
+           
+            wgpu::BindGroupEntry dstEntry{};
+            dstEntry.binding = i + 1;
+            dstEntry.textureView = target.CreateView(&dstViewDesc);
+            mipEntries.push_back(dstEntry);
+        }
+
+        wgpu::BindGroupDescriptor mipBindGroupDesc{};
+        mipBindGroupDesc.layout = spdPipeline.mipsBindGroupLayout;
+        mipBindGroupDesc.entryCount = mipEntries.size();
+        mipBindGroupDesc.entries = mipEntries.data();
+        wgpu::BindGroup mipsBindGroup = m_device.CreateBindGroup(&mipBindGroupDesc);
+
+        // --- Create Bind Group 1 (Internal Resources) ---
+        uint32_t numWorkGroupsX = (width + 63) / 64;
+        uint32_t numWorkGroupsY = (height + 63) / 64;
+        uint32_t numWorkGroups = numWorkGroupsX * numWorkGroupsY;
+
+        struct DownsamplePassMeta {
+            uint32_t work_group_offset[2] = {0, 0};
+            uint32_t num_work_groups;
+            uint32_t mips;
+            uint32_t padding[12]; // Ensure size is multiple of 16
+        } meta;
+        meta.num_work_groups = numWorkGroups;
+        meta.mips = numMips;
+        
+        wgpu::BufferDescriptor metaBufferDesc{};
+        metaBufferDesc.size = sizeof(DownsamplePassMeta);
+        metaBufferDesc.usage = wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst;
+        wgpu::Buffer metaBuffer = m_device.CreateBuffer(&metaBufferDesc);
+        m_device.GetQueue().WriteBuffer(metaBuffer, 0, &meta, sizeof(meta));
+
+        wgpu::BindGroupEntry metaEntry{};
+        metaEntry.binding = 0;
+        metaEntry.buffer = metaBuffer;
+        
+        wgpu::BindGroupDescriptor internalBindGroupDesc{};
+        internalBindGroupDesc.layout = m_internalResourcesBindGroupLayout;
+        internalBindGroupDesc.entryCount = 1;
+        internalBindGroupDesc.entries = &metaEntry;
+        wgpu::BindGroup internalBindGroup = m_device.CreateBindGroup(&internalBindGroupDesc);
+        
+        // --- Dispatch ---
+        wgpu::ComputePassEncoder pass = commandEncoder.BeginComputePass();
+        pass.SetPipeline(spdPipeline.pipeline);
+        pass.SetBindGroup(0, mipsBindGroup);
+        pass.SetBindGroup(1, internalBindGroup);
+        pass.DispatchWorkgroups(numWorkGroupsX, numWorkGroupsY, arrayLayerCount);
+        pass.End();
+    }
+
+    // Main shader generation logic
+
+// Helper function to check if a string is in a vector of strings
+bool includes(const std::vector<std::string>& vec, const std::string& str) {
+    for (const auto& s : vec) {
+        if (s == str) {
+            return true;
+        }
+    }
+    return false;
+}
+
+std::string MakeShaderCode(wgpu::TextureFormat outputFormat,
+                           const std::string& filterOp,
+                           unsigned int numMips, // Assuming a default value for numMips
+                           SPDScalarType scalarType) { // Default scalarType
+    std::stringstream ss;
+
+    std::string texelType;
+    if (scalarType == SPDScalarType::I32) {
+        texelType = "i32";
+    } else if (scalarType == SPDScalarType::U32) {
+        texelType = "u32";
+    } else {
+        texelType = "f32";
+    }
+
+    bool useF16 = (scalarType == SPDScalarType::F16);
+
+    std::string filterCode = filterOp;
+    if (filterOp == SPD_FILTER_AVERAGE && !includes({"f32", "f16"}, texelType)) {
+        // Replace "* 0.25" with "/ 4"
+        size_t pos = filterCode.find("* 0.25");
+        if (pos != std::string::npos) {
+            filterCode.replace(pos, std::string("* 0.25").length(), "/ 4");
+        }
+    }
+
+    // Generate mipsBindings
+    std::string mipsBindings;
+    for (unsigned int i = 0; i < numMips; ++i) {
+        mipsBindings += "@group(0) @binding(" + std::to_string(i + 1) + ") var dst_mip_" + std::to_string(i + 1) + ": texture_storage_2d_array<" + to_string(outputFormat)  + ", write>;\n";
+    }
+
+    // Generate mipsAccessorBody
+    std::string mipsAccessorBody;
+    for (unsigned int i = 0; i < numMips; ++i) {
+        if (i == 5 && numMips > 6) {
+            mipsAccessorBody += " else if mip == 6 {\n";
+            mipsAccessorBody += "                textureStore(dst_mip_6, uv, slice, " + (useF16 ? "vec4<" + texelType + ">(value)" : "value") + ");\n";
+            mipsAccessorBody += "                mip_dst_6_buffer[slice][uv.y][uv.x] = value;\n";
+            mipsAccessorBody += "            }";
+        } else {
+            if (i != 0) {
+                mipsAccessorBody += " else ";
+            }
+            mipsAccessorBody += "if mip == " + std::to_string(i + 1) + " {\n";
+            mipsAccessorBody += "                textureStore(dst_mip_" + std::to_string(i + 1) + ", uv, slice, " + (useF16 ? "vec4<" + texelType + ">(value)" : "value") + ");\n";
+            mipsAccessorBody += "            }";
+        }
+    }
+
+    std::string mipsAccessor = "fn store_dst_mip(value: vec4<SPDScalar>, uv: vec2<u32>, slice: u32, mip: u32) {\n" + mipsAccessorBody + "\n}";
+    std::string midMipAccessor = "return mip_dst_6_buffer[slice][uv.y][uv.x];";
+
+    // Start building the final shader code string
+    ss << R"(
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (C) 2023 Advanced Micro Devices, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the “Software”), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+// Definitions --------------------------------------------------------------------------------------------------------
+
+)";
+    if (useF16) {
+        ss << "enable f16;\n";
+    }
+    ss << "alias SPDScalar = " << texelType << ";\n\n"; // Using texelType here, assuming SPDScalar maps to it.
+
+    ss << R"(
+// Helpers ------------------------------------------------------------------------------------------------------------
+
+/**
+ * A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
+ * * The 64-wide lane indices to 8x8 remapping is performed as follows:
+ * 00 01 08 09 10 11 18 19
+ * 02 03 0a 0b 12 13 1a 1b
+ * 04 05 0c 0d 14 15 1c 1d
+ * 06 07 0e 0f 16 17 1e 1f
+ * 20 21 28 29 30 31 38 39
+ * 22 23 2a 2b 32 33 3a 3b
+ * 24 25 2c 2d 34 35 3c 3d
+ * 26 27 2e 2f 36 37 3e 3f
+ * * @param a: The input 1D coordinate to remap.
+ *
+ * @returns The remapped 2D coordinates.
+ */
+fn remap_for_wave_reduction(a: u32) -> vec2<u32> {
+    return vec2<u32>(
+        insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
+        insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
+    );
+}
+
+fn map_to_xy(local_invocation_index: u32) -> vec2<u32> {
+    let sub_xy: vec2<u32> = remap_for_wave_reduction(local_invocation_index % 64);
+    return vec2<u32>(
+        sub_xy.x + 8 * ((local_invocation_index >> 6) % 2),
+        sub_xy.y + 8 * ((local_invocation_index >> 7))
+    );
+}
+
+/*
+ * Compute a linear value from a SRGB value.
+ * * @param value: The value to convert to linear from SRGB.
+ * * @returns A value in SRGB space.
+ */
+/*
+fn srgb_to_linear(value: SPDScalar) -> SPDScalar {
+    let j = vec3<SPDScalar>(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    let k = vec2<SPDScalar>(1.055, -0.055);
+    return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y);
+}
+*/
+
+// Resources & Accessors -----------------------------------------------------------------------------------------------
+struct DownsamplePassMeta {
+    work_group_offset: vec2<u32>,
+    num_work_groups: u32,
+    mips: u32,
+}
+
+// In the original version dst_mip_i is an image2Darray [SPD_MAX_MIP_LEVELS+1], i.e., 12+1, but WGSL doesn't support arrays of textures yet
+// Also these are read_write because for mips 7-13, the workgroup reads from mip level 6 - since most formats don't support read_write access in WGSL yet, we use a single read_write buffer in such cases instead
+@group(0) @binding(0) var src_mip_0: texture_2d_array<)" << texelType << R"(>;
+)" << mipsBindings << R"(
+
+@group(1) @binding(0) var<uniform> downsample_pass_meta : DownsamplePassMeta;
+@group(1) @binding(1) var<storage, read_write> spd_global_counter: array<atomic<u32>>;
+@group(1) @binding(2) var<storage, read_write> mip_dst_6_buffer: array<array<array<vec4<f32>, 64>, 64>>;
+
+fn get_mips() -> u32 {
+    return downsample_pass_meta.mips;
+}
+
+fn get_num_work_groups() -> u32 {
+    return downsample_pass_meta.num_work_groups;
+}
+
+fn get_work_group_offset() -> vec2<u32> {
+    return downsample_pass_meta.work_group_offset;
+}
+
+fn load_src_image(uv: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
+    return vec4<SPDScalar>(textureLoad(src_mip_0, uv, slice, 0));
+}
+
+fn load_mid_mip_image(uv: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
+    )";
+    if (numMips > 6) {
+        ss << midMipAccessor;
+    } else {
+        ss << "return vec4<SPDScalar>();";
+    }
+    ss << R"(
+}
+
+)" << mipsAccessor << R"(
+
+// Workgroup -----------------------------------------------------------------------------------------------------------
+
+var<workgroup> spd_intermediate: array<array<vec4<SPDScalar>, 16>, 16>;
+var<workgroup> spd_counter: atomic<u32>;
+
+fn spd_increase_atomic_counter(slice: u32) {
+    atomicStore(&spd_counter, atomicAdd(&spd_global_counter[slice], 1));
+}
+
+fn spd_get_atomic_counter() -> u32 {
+    return atomicLoad(&spd_counter);
+}
+
+fn spd_reset_atomic_counter(slice: u32) {
+    atomicStore(&spd_global_counter[slice], 0);
+}
+
+// Cotnrol flow --------------------------------------------------------------------------------------------------------
+
+fn spd_barrier() {
+    // in glsl this does: groupMemoryBarrier(); barrier();
+    workgroupBarrier();
+}
+
+// Only last active workgroup should proceed
+fn spd_exit_workgroup(num_work_groups: u32, local_invocation_index: u32, slice: u32) -> bool {
+    // global atomic counter
+    if (local_invocation_index == 0) {
+        spd_increase_atomic_counter(slice);
+    }
+    spd_barrier();
+    return spd_get_atomic_counter() != (num_work_groups - 1);
+}
+
+// Pixel access --------------------------------------------------------------------------------------------------------
+
+)" << filterCode << R"(
+
+fn spd_store(pix: vec2<u32>, out_value: vec4<SPDScalar>, mip: u32, slice: u32) {
+    store_dst_mip(out_value, pix, slice, mip + 1);
+}
+
+fn spd_load_intermediate(x: u32, y: u32) -> vec4<SPDScalar> {
+    return spd_intermediate[x][y];
+}
+
+fn spd_store_intermediate(x: u32, y: u32, value: vec4<SPDScalar>) {
+    spd_intermediate[x][y] = value;
+}
+
+fn spd_reduce_intermediate(i0: vec2<u32>, i1: vec2<u32>, i2: vec2<u32>, i3: vec2<u32>) -> vec4<SPDScalar> {
+    let v0 = spd_load_intermediate(i0.x, i0.y);
+    let v1 = spd_load_intermediate(i1.x, i1.y);
+    let v2 = spd_load_intermediate(i2.x, i2.y);
+    let v3 = spd_load_intermediate(i3.x, i3.y);
+    return spd_reduce_4(v0, v1, v2, v3);
+}
+
+fn spd_reduce_load_4(base: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
+    let v0 = load_src_image(base + vec2<u32>(0, 0), slice);
+    let v1 = load_src_image(base + vec2<u32>(0, 1), slice);
+    let v2 = load_src_image(base + vec2<u32>(1, 0), slice);
+    let v3 = load_src_image(base + vec2<u32>(1, 1), slice);
+    return spd_reduce_4(v0, v1, v2, v3);
+}
+
+fn spd_reduce_load_mid_mip_4(base: vec2<u32>, slice: u32) -> vec4<SPDScalar> {
+    let v0 = load_mid_mip_image(base + vec2<u32>(0, 0), slice);
+    let v1 = load_mid_mip_image(base + vec2<u32>(0, 1), slice);
+    let v2 = load_mid_mip_image(base + vec2<u32>(1, 0), slice);
+    let v3 = load_mid_mip_image(base + vec2<u32>(1, 1), slice);
+    return spd_reduce_4(v0, v1, v2, v3);
+}
+
+// Main logic ---------------------------------------------------------------------------------------------------------
+
+fn spd_downsample_mips_0_1(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    var v: array<vec4<SPDScalar>, 4>;
+
+    let workgroup64 = workgroup_id.xy * 64;
+    let workgroup32 = workgroup_id.xy * 32;
+    let workgroup16 = workgroup_id.xy * 16;
+
+    var tex = workgroup64 + vec2<u32>(x * 2, y * 2);
+    var pix = workgroup32 + vec2<u32>(x, y);
+    v[0] = spd_reduce_load_4(tex, slice);
+    spd_store(pix, v[0], 0, slice);
+
+    tex = workgroup64 + vec2<u32>(x * 2 + 32, y * 2);
+    pix = workgroup32 + vec2<u32>(x + 16, y);
+    v[1] = spd_reduce_load_4(tex, slice);
+    spd_store(pix, v[1], 0, slice);
+
+    tex = workgroup64 + vec2<u32>(x * 2, y * 2 + 32);
+    pix = workgroup32 + vec2<u32>(x, y + 16);
+    v[2] = spd_reduce_load_4(tex, slice);
+    spd_store(pix, v[2], 0, slice);
+
+    tex = workgroup64 + vec2<u32>(x * 2 + 32, y * 2 + 32);
+    pix = workgroup32 + vec2<u32>(x + 16, y + 16);
+    v[3] = spd_reduce_load_4(tex, slice);
+    spd_store(pix, v[3], 0, slice);
+
+    if mip <= 1 {
+        return;
+    }
+
+    for (var i = 0u; i < 4u; i++) {
+        spd_store_intermediate(x, y, v[i]);
+        spd_barrier();
+        if local_invocation_index < 64 {
+            v[i] = spd_reduce_intermediate(
+                vec2<u32>(x * 2 + 0, y * 2 + 0),
+                vec2<u32>(x * 2 + 1, y * 2 + 0),
+                vec2<u32>(x * 2 + 0, y * 2 + 1),
+                vec2<u32>(x * 2 + 1, y * 2 + 1)
+            );
+            spd_store(workgroup16 + vec2<u32>(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
+        }
+        spd_barrier();
+    }
+
+    if local_invocation_index < 64 {
+        spd_store_intermediate(x + 0, y + 0, v[0]);
+        spd_store_intermediate(x + 8, y + 0, v[1]);
+        spd_store_intermediate(x + 0, y + 8, v[2]);
+        spd_store_intermediate(x + 8, y + 8, v[3]);
+    }
+}
+
+fn spd_downsample_mip_2(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    if local_invocation_index < 64u {
+        let v = spd_reduce_intermediate(
+            vec2<u32>(x * 2 + 0, y * 2 + 0),
+            vec2<u32>(x * 2 + 1, y * 2 + 0),
+            vec2<u32>(x * 2 + 0, y * 2 + 1),
+            vec2<u32>(x * 2 + 1, y * 2 + 1)
+        );
+        spd_store(workgroup_id.xy * 8 + vec2<u32>(x, y), v, mip, slice);
+        // store to LDS, try to reduce bank conflicts
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // ...
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        spd_store_intermediate(x * 2 + y % 2, y * 2, v);
+    }
+}
+
+fn spd_downsample_mip_3(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    if local_invocation_index < 16u {
+        // x 0 x 0
+        // 0 0 0 0
+        // 0 x 0 x
+        // 0 0 0 0
+        let v = spd_reduce_intermediate(
+            vec2<u32>(x * 4 + 0 + 0, y * 4 + 0),
+            vec2<u32>(x * 4 + 2 + 0, y * 4 + 0),
+            vec2<u32>(x * 4 + 0 + 1, y * 4 + 2),
+            vec2<u32>(x * 4 + 2 + 1, y * 4 + 2)
+        );
+        spd_store(workgroup_id.xy * 4 + vec2<u32>(x, y), v, mip, slice);
+        // store to LDS
+        // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
+        // ...
+        // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
+        // ...
+        // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
+        // ...
+        spd_store_intermediate(x * 4 + y, y * 4, v);
+    }
+}
+
+fn spd_downsample_mip_4(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    if local_invocation_index < 4u {
+        // x 0 0 0 x 0 0 0
+        // ...
+        // 0 x 0 0 0 x 0 0
+        let v = spd_reduce_intermediate(
+            vec2<u32>(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
+            vec2<u32>(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
+            vec2<u32>(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
+            vec2<u32>(x * 8 + 4 + 1 + y * 2, y * 8 + 4)
+        );
+        spd_store(workgroup_id.xy * 2 + vec2<u32>(x, y), v, mip, slice);
+        // store to LDS
+        // x x x x 0 ...
+        // 0 ...
+        spd_store_intermediate(x + y * 2, 0, v);
+    }
+}
+
+fn spd_downsample_mip_5(workgroup_id: vec2<u32>, local_invocation_index: u32, mip: u32, slice: u32) {
+    if local_invocation_index < 1u {
+        // x x x x 0 ...
+        // 0 ...
+        let v = spd_reduce_intermediate(vec2<u32>(0, 0), vec2<u32>(1, 0), vec2<u32>(2, 0), vec2<u32>(3, 0));
+        spd_store(workgroup_id.xy, v, mip, slice);
+    }
+}
+
+fn spd_downsample_next_four(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32) {
+    if mips <= base_mip {
+        return;
+    }
+    spd_barrier();
+    spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice);
+
+    if mips <= base_mip + 1 {
+        return;
+    }
+    spd_barrier();
+    spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice);
+
+    if mips <= base_mip + 2 {
+        return;
+    }
+    spd_barrier();
+    spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice);
+
+    if mips <= base_mip + 3 {
+        return;
+    }
+    spd_barrier();
+    spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice);
+}
+
+fn spd_downsample_last_four(x: u32, y: u32, workgroup_id: vec2<u32>, local_invocation_index: u32, base_mip: u32, mips: u32, slice: u32, exit: bool) {
+    if mips <= base_mip {
+        return;
+    }
+    spd_barrier();
+    if !exit {
+        spd_downsample_mip_2(x, y, workgroup_id, local_invocation_index, base_mip, slice);
+    }
+
+    if mips <= base_mip + 1 {
+        return;
+    }
+    spd_barrier();
+    if !exit {
+        spd_downsample_mip_3(x, y, workgroup_id, local_invocation_index, base_mip + 1, slice);
+    }
+
+    if mips <= base_mip + 2 {
+        return;
+    }
+    spd_barrier();
+    if !exit {
+        spd_downsample_mip_4(x, y, workgroup_id, local_invocation_index, base_mip + 2, slice);
+    }
+
+    if mips <= base_mip + 3 {
+        return;
+    }
+    spd_barrier();
+    if !exit {
+        spd_downsample_mip_5(workgroup_id, local_invocation_index, base_mip + 3, slice);
+    }
+}
+
+fn spd_downsample_mips_6_7(x: u32, y: u32, mips: u32, slice: u32) {
+    var tex = vec2<u32>(x * 4 + 0, y * 4 + 0);
+    var pix = vec2<u32>(x * 2 + 0, y * 2 + 0);
+    let v0 = spd_reduce_load_mid_mip_4(tex, slice);
+    spd_store(pix, v0, 6, slice);
+
+    tex = vec2<u32>(x * 4 + 2, y * 4 + 0);
+    pix = vec2<u32>(x * 2 + 1, y * 2 + 0);
+    let v1 = spd_reduce_load_mid_mip_4(tex, slice);
+    spd_store(pix, v1, 6, slice);
+
+    tex = vec2<u32>(x * 4 + 0, y * 4 + 2);
+    pix = vec2<u32>(x * 2 + 0, y * 2 + 1);
+    let v2 = spd_reduce_load_mid_mip_4(tex, slice);
+    spd_store(pix, v2, 6, slice);
+
+    tex = vec2<u32>(x * 4 + 2, y * 4 + 2);
+    pix = vec2<u32>(x * 2 + 1, y * 2 + 1);
+    let v3 = spd_reduce_load_mid_mip_4(tex, slice);
+    spd_store(pix, v3, 6, slice);
+
+    if mips <= 7 {
+        return;
+    }
+    // no barrier needed, working on values only from the same thread
+
+    let v = spd_reduce_4(v0, v1, v2, v3);
+    spd_store(vec2<u32>(x, y), v, 7, slice);
+    spd_store_intermediate(x, y, v);
+}
+
+fn spd_downsample_last_6(x: u32, y: u32, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) {
+    if mips <= 6 {
+        return;
+    }
+
+    // increase the global atomic counter for the given slice and check if it's the last remaining thread group:
+    // terminate if not, continue if yes.
+    let exit = spd_exit_workgroup(num_work_groups, local_invocation_index, slice);
+
+    // can't exit directly because subsequent barrier calls break uniform control flow...
+    if !exit {
+        // reset the global atomic counter back to 0 for the next spd dispatch
+        spd_reset_atomic_counter(slice);
+
+        // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
+        // compute MIP level 6 and 7
+        spd_downsample_mips_6_7(x, y, mips, slice);
+    }
+
+    // compute MIP level 8, 9, 10, 11
+    spd_downsample_last_four(x, y, vec2<u32>(0, 0), local_invocation_index, 8, mips, slice, exit);
+}
+
+/// Downsamples a 64x64 tile based on the work group id.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+///
+/// @param [in] workGroupID        index of the work group / thread group
+/// @param [in] localInvocationIndex   index of the thread within the thread group in 1D
+/// @param [in] mips             the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups        the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice             the slice of the input texture
+fn spd_downsample(workgroup_id: vec2<u32>, local_invocation_index: u32, mips: u32, num_work_groups: u32, slice: u32) {
+    let xy = map_to_xy(local_invocation_index);
+    spd_downsample_mips_0_1(xy.x, xy.y, workgroup_id, local_invocation_index, mips, slice);
+    spd_downsample_next_four(xy.x, xy.y, workgroup_id, local_invocation_index, 2, mips, slice);
+)";
+    if (numMips > 6) {
+        ss << "    spd_downsample_last_6(xy.x, xy.y, local_invocation_index, mips, num_work_groups, slice);\n";
+    }
+    ss << R"(}
+
+// Entry points -------------------------------------------------------------------------------------------------------
+
+@compute
+@workgroup_size(256, 1, 1)
+fn downsample(@builtin(local_invocation_index) local_invocation_index: u32, @builtin(workgroup_id) workgroup_id: vec3<u32>) {
+    spd_downsample(
+        workgroup_id.xy + get_work_group_offset(),
+        local_invocation_index,
+        get_mips(),
+        get_num_work_groups(),
+        workgroup_id.z
+    );
+}
+)";
+    return ss.str();
+}
+
+
+} // namespace spd
--- a/filament/backend/src/webgpu/SpdMipmapGenerator.h
+++ b/filament/backend/src/webgpu/SpdMipmapGenerator.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include <webgpu/webgpu_cpp.h>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <optional>
+// C++ port of https://github.com/JolifantoBambla/webgpu-spd for early experiments- do not merge like this
+namespace spd {
+
+    // Enum for selecting the downsampling filter.
+    enum class SPDFilter {
+        Average,
+        Min,
+        Max,
+        MinMax
+    };
+
+    // Enum for shader scalar types.
+    enum class SPDScalarType {
+        F32,
+        F16,
+        I32,
+        U32
+    };
+
+    // Configuration for a single mipmap generation pass.
+struct SPDPassConfig {
+    SPDFilter filter = SPDFilter::Average;
+    wgpu::Texture targetTexture = nullptr;
+    uint32_t numMips = 0;
+    bool halfPrecision = false;
+    uint32_t sourceMipLevel = 0;
+};
+    
+    // Holds a pipeline and its corresponding bind group layout.
+    struct SPDPipeline {
+        wgpu::BindGroupLayout mipsBindGroupLayout = nullptr;
+        wgpu::ComputePipeline pipeline = nullptr;
+    };
+
+    // Manages pipeline creation, caching, and execution for mipmap generation.
+    class MipmapGenerator {
+    public:
+        MipmapGenerator(const wgpu::Device& device);
+
+        // Pre-creates pipelines for specified formats and filters.
+        void PreparePipelines(wgpu::TextureFormat format, SPDFilter filter, bool halfPrecision = false);
+
+        // Generates a compute pass for creating mipmaps.
+        void Generate(
+            wgpu::CommandEncoder& commandEncoder,
+            wgpu::Texture srcTexture,
+            const SPDPassConfig& config
+        );
+
+    private:
+        wgpu::Device m_device;
+        wgpu::BindGroupLayout m_internalResourcesBindGroupLayout;
+
+        // Cached pipelines: Map<TextureFormat, Map<SPDScalarType, Map<Filter, Map<NumMips, Pipeline>>>>
+        std::unordered_map<wgpu::TextureFormat,
+            std::unordered_map<SPDScalarType,
+                std::unordered_map<SPDFilter,
+                    std::unordered_map<uint32_t, SPDPipeline>>>> m_pipelines;
+        
+        // Helper methods
+        SPDPipeline& GetOrCreatePipeline(wgpu::TextureFormat format, SPDFilter filter, uint32_t numMips, SPDScalarType scalarType);
+        SPDScalarType SanitizeScalarType(wgpu::TextureFormat format, bool halfPrecision);
+        std::string GetFilterCode(SPDFilter filter);
+    };
+    // Assuming SPD_FILTER_AVERAGE is a string constant
+    const std::string SPD_FILTER_AVERAGE = "value * 0.25"; // Original filter operation
+
+    // Generates the WGSL shader code dynamically.
+    std::string MakeShaderCode(
+        wgpu::TextureFormat outputFormat,
+        const std::string& filterOp = SPD_FILTER_AVERAGE,
+        uint32_t numMips = 0,
+        SPDScalarType scalarType = SPDScalarType::F32
+    );
+
+} // namespace spd
--- a/filament/backend/src/webgpu/WebGPUDriver.cpp
+++ b/filament/backend/src/webgpu/WebGPUDriver.cpp
@@ -28,7 +28,7 @@
 #include "WebGPUVertexBuffer.h"
 #include "WebGPUVertexBufferInfo.h"
 #include <backend/platforms/WebGPUPlatform.h>
-
+#include "SpdMipmapGenerator.h"
 #include "CommandStreamDispatcher.h"
 #include "DriverBase.h"
 #include "private/backend/Dispatcher.h"
@@ -738,59 +738,63 @@ void WebGPUDriver::setExternalStream(Handle<HwTexture> textureHandle,
 }

 void WebGPUDriver::generateMipmaps(Handle<HwTexture> textureHandle) {
-    if (!mCommandEncoder) {
-        mMipQueue.push_back(textureHandle);
-        return;
-    }
    auto texture = handleCast<WebGPUTexture>(textureHandle);
    assert_invariant(texture);
    wgpu::Texture wgpuTexture = texture->getTexture();
    assert_invariant(wgpuTexture);

-    FILAMENT_CHECK_PRECONDITION(wgpuTexture.GetUsage() & wgpu::TextureUsage::CopySrc)
-            << "Texture intended for mipmap generation (as source) must have CopySrc usage.";
-    FILAMENT_CHECK_PRECONDITION(wgpuTexture.GetUsage() & wgpu::TextureUsage::CopyDst)
-            << "Texture intended for mipmap generation (as destination) must have CopyDst usage.";
+//    const auto usage = wgpuTexture.GetUsage();
+//    FILAMENT_CHECK_PRECONDITION(usage & wgpu::TextureUsage::TextureBinding)
+//            << "Texture for mipmap generation must have TextureBinding usage.";
+//    FILAMENT_CHECK_PRECONDITION(usage & wgpu::TextureUsage::StorageBinding)
+//            << "Texture for mipmap generation must have StorageBinding usage.";

-    uint32_t mipLevelCount = wgpuTexture.GetMipLevelCount();
-    if (mipLevelCount <= 1) {
+    const uint32_t totalMipLevels = wgpuTexture.GetMipLevelCount();
+    if (totalMipLevels <= 1) {
        return;
    }

-    uint32_t width = wgpuTexture.GetWidth();
-    uint32_t height = wgpuTexture.GetHeight();
-    // For 3D textures, depth is > 1. For 2D/Cube/Array, effectively 1 for mip-level copies.
-    uint32_t depth =
-            (texture->target == SamplerType::SAMPLER_3D) ? wgpuTexture.GetDepthOrArrayLayers() : 1;
+    // Determine the maximum number of mips we can generate in a single pass.
+    // The limit is on *storage* textures, and we need 1 binding for the source texture.
+    const uint32_t maxMipsPerPass = 2;//mDeviceLimits.maxStorageTexturesPerShaderStage - 1;
+    FILAMENT_CHECK_POSTCONDITION(maxMipsPerPass > 0)
+            << "Device does not support enough storage textures for mipmapping.";

-    for (uint32_t mipLevel = 0; mipLevel < mipLevelCount - 1; ++mipLevel) {
-        wgpu::TexelCopyTextureInfo sourceCopyInfo{
-            .texture = wgpuTexture,
-            .mipLevel = mipLevel,
-            .aspect = texture->getAspect(),
-        };
+    // The generator can be created once.
+    spd::MipmapGenerator mipmapGenerator(mDevice);

-        wgpu::TexelCopyTextureInfo destinationCopyInfo{
-            .texture = wgpuTexture,
-            .mipLevel = mipLevel + 1,
-            .aspect = texture->getAspect(),
-        };
+    // We will record all passes into a single command encoder.
+    wgpu::CommandEncoderDescriptor encoderDesc = {};
+    encoderDesc.label = "Mipmap Command Encoder";
+    wgpu::CommandEncoder encoder = mDevice.CreateCommandEncoder(&encoderDesc);

-        uint32_t dstWidth = std::max(1u, width >> 1);
-        uint32_t dstHeight = std::max(1u, height >> 1);
-        uint32_t dstDepth = std::max(1u, depth >> 1);
+    uint32_t mipsLeftToGenerate = totalMipLevels - 1;
+    uint32_t currentSourceMipLevel = 0;

-        wgpu::Extent3D copySize{ .width = dstWidth,
-            .height = dstHeight,
-            .depthOrArrayLayers = (texture->target == SamplerType::SAMPLER_3D)
-                                          ? dstDepth
-                                          : texture->getArrayLayerCount() };
-        mCommandEncoder.CopyTextureToTexture(&sourceCopyInfo, &destinationCopyInfo, &copySize);
+    while (mipsLeftToGenerate > 0) {
+        uint32_t mipsThisPass = std::min(mipsLeftToGenerate, maxMipsPerPass);

-        width = dstWidth;
-        height = dstHeight;
-        depth = dstDepth;
+        spd::SPDPassConfig config = {};
+        config.filter = spd::SPDFilter::Average;
+        config.targetTexture = wgpuTexture;
+        config.numMips = mipsThisPass;
+        config.sourceMipLevel = currentSourceMipLevel;
+
+        // The generator needs to create pipelines for the number of mips in THIS pass.
+        mipmapGenerator.PreparePipelines(wgpuTexture.GetFormat(), config.filter);
+
+        // Generate one batch of mipmaps.
+        mipmapGenerator.Generate(encoder, wgpuTexture, config);
+
+        mipsLeftToGenerate -= mipsThisPass;
+        currentSourceMipLevel += mipsThisPass;
    }
+
+    // Finish the encoder and submit all the passes at once.
+    wgpu::CommandBufferDescriptor cmdBufferDesc = {};
+    cmdBufferDesc.label = "Mipmap Command Buffer";
+    wgpu::CommandBuffer commandBuffer = encoder.Finish(&cmdBufferDesc);
+    mQueue.Submit(1, &commandBuffer);
 }

 void WebGPUDriver::compilePrograms(CompilerPriorityQueue priority,
--- a/filament/backend/src/webgpu/WebGPUTexture.cpp
+++ b/filament/backend/src/webgpu/WebGPUTexture.cpp
@@ -34,6 +34,84 @@
 namespace filament::backend {

 namespace {
+[[nodiscard]] bool IsFormatStorageCompatible(wgpu::TextureFormat format) {
+    switch (format) {
+        // List of formats that support storage binding
+        case wgpu::TextureFormat::R32Float:
+        case wgpu::TextureFormat::R32Sint:
+        case wgpu::TextureFormat::R32Uint:
+        case wgpu::TextureFormat::RG32Float:
+        case wgpu::TextureFormat::RG32Sint:
+        case wgpu::TextureFormat::RG32Uint:
+        case wgpu::TextureFormat::RGBA16Float:
+        case wgpu::TextureFormat::RGBA16Sint:
+        case wgpu::TextureFormat::RGBA16Uint:
+        case wgpu::TextureFormat::RGBA32Float:
+        case wgpu::TextureFormat::RGBA32Sint:
+        case wgpu::TextureFormat::RGBA32Uint:
+        case wgpu::TextureFormat::RGBA8Unorm:
+        case wgpu::TextureFormat::RGBA8Snorm:
+        case wgpu::TextureFormat::RGBA8Uint:
+        case wgpu::TextureFormat::RGBA8Sint:
+            return true;
+        default:
+            // All other formats, including packed floats (RG11B10Ufloat),
+            // depth/stencil, and sRGB formats do not support storage.
+            return false;
+    }
+}
+wgpu::TextureFormat GetStorageCompatibleFormat(wgpu::TextureFormat srgbFormat) {
+    switch (srgbFormat) {
+        case wgpu::TextureFormat::RGBA8UnormSrgb:
+            return wgpu::TextureFormat::RGBA8Unorm;
+        case wgpu::TextureFormat::BGRA8UnormSrgb:
+            return wgpu::TextureFormat::BGRA8Unorm;
+        case wgpu::TextureFormat::BC1RGBAUnormSrgb:
+            return wgpu::TextureFormat::BC1RGBAUnorm;
+        case wgpu::TextureFormat::BC2RGBAUnormSrgb:
+            return wgpu::TextureFormat::BC2RGBAUnorm;
+        case wgpu::TextureFormat::BC3RGBAUnormSrgb:
+            return wgpu::TextureFormat::BC3RGBAUnorm;
+        case wgpu::TextureFormat::BC7RGBAUnormSrgb:
+            return wgpu::TextureFormat::BC7RGBAUnorm;
+        case wgpu::TextureFormat::ETC2RGB8UnormSrgb:
+            return wgpu::TextureFormat::ETC2RGB8Unorm;
+        case wgpu::TextureFormat::ETC2RGB8A1UnormSrgb:
+            return wgpu::TextureFormat::ETC2RGB8A1Unorm;
+        case wgpu::TextureFormat::ETC2RGBA8UnormSrgb:
+            return wgpu::TextureFormat::ETC2RGBA8Unorm;
+        case wgpu::TextureFormat::ASTC4x4UnormSrgb:
+            return wgpu::TextureFormat::ASTC4x4Unorm;
+        case wgpu::TextureFormat::ASTC5x4UnormSrgb:
+            return wgpu::TextureFormat::ASTC5x4Unorm;
+        case wgpu::TextureFormat::ASTC5x5UnormSrgb:
+            return wgpu::TextureFormat::ASTC5x5Unorm;
+        case wgpu::TextureFormat::ASTC6x5UnormSrgb:
+            return wgpu::TextureFormat::ASTC6x5Unorm;
+        case wgpu::TextureFormat::ASTC6x6UnormSrgb:
+            return wgpu::TextureFormat::ASTC6x6Unorm;
+        case wgpu::TextureFormat::ASTC8x5UnormSrgb:
+            return wgpu::TextureFormat::ASTC8x5Unorm;
+        case wgpu::TextureFormat::ASTC8x6UnormSrgb:
+            return wgpu::TextureFormat::ASTC8x6Unorm;
+        case wgpu::TextureFormat::ASTC8x8UnormSrgb:
+            return wgpu::TextureFormat::ASTC8x8Unorm;
+        case wgpu::TextureFormat::ASTC10x5UnormSrgb:
+            return wgpu::TextureFormat::ASTC10x5Unorm;
+        case wgpu::TextureFormat::ASTC10x6UnormSrgb:
+            return wgpu::TextureFormat::ASTC10x6Unorm;
+        case wgpu::TextureFormat::ASTC10x8UnormSrgb:
+            return wgpu::TextureFormat::ASTC10x8Unorm;
+        case wgpu::TextureFormat::ASTC10x10UnormSrgb:
+            return wgpu::TextureFormat::ASTC10x10Unorm;
+        case wgpu::TextureFormat::ASTC12x10UnormSrgb:
+            return wgpu::TextureFormat::ASTC12x10Unorm;
+        case wgpu::TextureFormat::ASTC12x12UnormSrgb:
+            return wgpu::TextureFormat::ASTC12x12Unorm;
+        default:
+            return srgbFormat; // If not an sRGB format, return the same input
+    }
+}

 [[nodiscard]] constexpr wgpu::StringView getUserTextureLabel(const SamplerType target) {
    // TODO will be helpful to get more useful info than this
@@ -210,6 +288,11 @@ WebGPUTexture::WebGPUTexture(const SamplerType samplerType, const uint8_t levels
      mWebGPUUsage{ fToWGPUTextureUsage(usage, samples) },
      mBlockWidth{ filament::backend::getBlockWidth(format) },
      mBlockHeight{ filament::backend::getBlockHeight(format) } {
+    mCompatFormat = GetStorageCompatibleFormat(mWebGPUFormat);
+        if(levels > 1 && (mWebGPUUsage & wgpu::TextureUsage::TextureBinding) && (mWebGPUUsage & wgpu::TextureUsage::CopyDst) && IsFormatStorageCompatible(mCompatFormat)){
+        mWebGPUUsage |= wgpu::TextureUsage::StorageBinding;
+    }
+
    assert_invariant(
            samples == 1 ||
            samples == 4 &&
@@ -217,7 +300,7 @@ WebGPUTexture::WebGPUTexture(const SamplerType samplerType, const uint8_t levels
                    "count to either be 1 (no multisampling) or 4, at least as of April 2025 of "
                    "the spec. See https://www.w3.org/TR/webgpu/#texture-creation or "
                    "https://gpuweb.github.io/gpuweb/#multisample-state");
-    const wgpu::TextureDescriptor textureDescriptor{
+    wgpu::TextureDescriptor textureDescriptor{
        .label = getUserTextureLabel(samplerType),
        .usage = mWebGPUUsage,
        .dimension = toWebGPUTextureDimension(samplerType),
@@ -230,6 +313,16 @@ WebGPUTexture::WebGPUTexture(const SamplerType samplerType, const uint8_t levels
        .viewFormatCount = 0,
        .viewFormats = nullptr,
    };
+
+    //If our main format is not srgb, make the srgb a view into a non-srgb format
+    if(mCompatFormat != mWebGPUFormat){
+        textureDescriptor.viewFormatCount = 1;
+        std::swap(mCompatFormat, mWebGPUFormat);
+        textureDescriptor.format = mWebGPUFormat;
+        textureDescriptor.viewFormats = &mCompatFormat;
+        mWebGPUUsage|=wgpu::TextureUsage::StorageBinding;
+        textureDescriptor.usage = mWebGPUUsage;
+    }
    mArrayLayerCount = toArrayLayerCount(samplerType, textureDescriptor.size.depthOrArrayLayers);
    assert_invariant(textureDescriptor.format != wgpu::TextureFormat::Undefined &&
                     "Could not find appropriate WebGPU format");
@@ -455,9 +548,9 @@ wgpu::TextureFormat WebGPUTexture::fToWGPUTextureFormat(TextureFormat const& fFo
 wgpu::TextureView WebGPUTexture::makeTextureView(const uint8_t& baseLevel,
        const uint8_t& levelCount, const uint32_t& baseArrayLayer, const uint32_t& arrayLayerCount,
        const SamplerType samplerType) const noexcept {
-    const wgpu::TextureViewDescriptor textureViewDescriptor{
+    wgpu::TextureViewDescriptor textureViewDescriptor{
        .label = getUserTextureViewLabel(target),
-        .format = mWebGPUFormat,
+        .format = mCompatFormat,
        .dimension = toWebGPUTextureViewDimension(samplerType),
        .baseMipLevel = baseLevel,
        .mipLevelCount = levelCount,
@@ -465,6 +558,7 @@ wgpu::TextureView WebGPUTexture::makeTextureView(const uint8_t& baseLevel,
        .arrayLayerCount = arrayLayerCount,
        .aspect = mAspect,
        .usage = mWebGPUUsage };
+    textureViewDescriptor.usage &= (~wgpu::TextureUsage::StorageBinding);
    wgpu::TextureView textureView = mTexture.CreateView(&textureViewDescriptor);
    FILAMENT_CHECK_POSTCONDITION(textureView)
            << "Failed to create texture view " << textureViewDescriptor.label;
--- a/filament/backend/src/webgpu/WebGPUTexture.h
+++ b/filament/backend/src/webgpu/WebGPUTexture.h
@@ -61,6 +61,7 @@ private:
    wgpu::Texture mTexture = nullptr;
    // format is inherited from HwTexture. This naming is to distinguish it from Filament's format
    wgpu::TextureFormat mWebGPUFormat = wgpu::TextureFormat::Undefined;
+    wgpu::TextureFormat mCompatFormat = wgpu::TextureFormat::Undefined;
    wgpu::TextureAspect mAspect = wgpu::TextureAspect::Undefined;
    // usage is inherited from HwTexture. This naming is to distinguish it from Filament's usage
    wgpu::TextureUsage mWebGPUUsage = wgpu::TextureUsage::None;
--- a/filament/backend/src/webgpu/platform/WebGPUPlatform.cpp
+++ b/filament/backend/src/webgpu/platform/WebGPUPlatform.cpp
@@ -87,6 +87,7 @@ constexpr wgpu::Limits REQUIRED_LIMITS = {
    .maxBindingsPerBindGroup = filament::backend::MAX_DESCRIPTOR_COUNT,
    .maxSamplersPerShaderStage = 16, // TODO should be set to filament::backend::MAX_SAMPLER_COUNT,
    .maxStorageBuffersPerShaderStage = filament::backend::MAX_SSBO_COUNT,
+    .maxStorageTexturesPerShaderStage = 8,
    .maxVertexBuffers = 8, // TODO should be set to filament::backend::MAX_VERTEX_BUFFER_COUNT,
    .maxVertexAttributes = filament::backend::MAX_VERTEX_ATTRIBUTE_COUNT,
 };
Author	SHA1	Message	Date
bridgewaterrobbie	c611b4df6f	Fixup shader code	2025-06-10 18:04:41 -04:00
bridgewaterrobbie	6b91d7ca2d	Reduce the number of prepared mipmap pipelines- this should be configurable really if we want to preserve this logic in the long run.	2025-06-10 16:54:28 -04:00
bridgewaterrobbie	df264372d9	Add check for if the format (Or non srgb format) is storage compatible	2025-06-10 16:54:03 -04:00
bridgewaterrobbie	bf9435d0fd	Never merge: Arbitrarilly raise max storage texutres per stage for easier tinkering, though this s houldn't be relied upon, spec limit is 4	2025-06-10 16:18:17 -04:00
bridgewaterrobbie	726177fead	Hacky version of playing with formats and usages.	2025-06-10 15:58:02 -04:00
bridgewaterrobbie	2ef01ae1e8	Try to go in multiple passes to avoid hitting limits	2025-06-10 15:57:48 -04:00
bridgewaterrobbie	1374e8acef	tinker2	2025-06-10 14:20:27 -04:00
bridgewaterrobbie	622beb211a	WIP: Use converted JS library for compute pass mipmap generation	2025-06-10 14:16:57 -04:00