webgpu: use staging buffers for updateGPUBuffer (#9360)

BUGS=450620535
This commit is contained in:
yrcloud
2025-11-03 17:28:59 -05:00
committed by GitHub
parent 804ee87356
commit 2a2caad1bf
3 changed files with 37 additions and 30 deletions

View File

@@ -17,6 +17,7 @@
#include "WebGPUBufferBase.h"
#include "WebGPUConstants.h"
#include "WebGPUQueueManager.h"
#include "DriverBase.h"
#include <backend/BufferDescriptor.h>
@@ -59,12 +60,12 @@ WebGPUBufferBase::WebGPUBufferBase(wgpu::Device const& device, const wgpu::Buffe
: mBuffer{ createBuffer(device, usage, size, label) } {}
// Updates the GPU buffer with data from a BufferDescriptor.
// WebGPU requires that the size of the data written to a buffer is a multiple of 4.
// This function handles cases where the buffer descriptor's size is not a multiple of 4
// by writing the bulk of the data first, and then copying the remaining bytes into a
// padded temporary chunk which is then written to the buffer.
// WebGPU requires that the size of the data copied from the staging buffer to the GPU buffer is a
// multiple of 4. This function handles cases where the buffer descriptor's size is not a multiple
// of 4 by padding with zeros.
void WebGPUBufferBase::updateGPUBuffer(BufferDescriptor const& bufferDescriptor,
const uint32_t byteOffset, wgpu::Queue const& queue) {
const uint32_t byteOffset, wgpu::Device const& device,
WebGPUQueueManager* const webGPUQueueManager) {
FILAMENT_CHECK_PRECONDITION(bufferDescriptor.buffer)
<< "updateGPUBuffer called with a null buffer";
FILAMENT_CHECK_PRECONDITION(bufferDescriptor.size + byteOffset <= mBuffer.GetSize())
@@ -78,24 +79,34 @@ void WebGPUBufferBase::updateGPUBuffer(BufferDescriptor const& bufferDescriptor,
// This may have some performance implications. That should be investigated later.
assert_invariant(mBuffer.GetUsage() & wgpu::BufferUsage::CopyDst);
// Calculate some alignment related sizes
const size_t remainder = bufferDescriptor.size % FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS;
const size_t mainBulk = bufferDescriptor.size - remainder;
const size_t stagingBufferSize =
remainder == 0 ? bufferDescriptor.size : mainBulk + FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS;
// WriteBuffer is an async call. But cpu buffer data is already written to the staging
// buffer on return from the WriteBuffer.
const size_t legalSize = bufferDescriptor.size - remainder;
queue.WriteBuffer(mBuffer, byteOffset, bufferDescriptor.buffer, legalSize);
// create a staging buffer
wgpu::BufferDescriptor descriptor{
.label = "Filament WebGPU Staging Buffer",
.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc,
.size = stagingBufferSize,
.mappedAtCreation = true };
wgpu::Buffer stagingBuffer = device.CreateBuffer(&descriptor);
void* mappedRange = stagingBuffer.GetMappedRange();
memcpy(mappedRange, bufferDescriptor.buffer, bufferDescriptor.size);
// Make sure the padded memory is set to 0 to have deterministic behaviors
if (remainder != 0) {
const uint8_t* remainderStart =
static_cast<const uint8_t*>(bufferDescriptor.buffer) + legalSize;
memcpy(mRemainderChunk.data(), remainderStart, remainder);
// Pad the remainder with zeros to ensure deterministic behavior, though GPU shouldn't
// access this
std::memset(mRemainderChunk.data() + remainder, 0,
FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS - remainder);
queue.WriteBuffer(mBuffer, byteOffset + legalSize, &mRemainderChunk,
FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS);
uint8_t* paddingStart = static_cast<uint8_t*>(mappedRange) + bufferDescriptor.size;
memset(paddingStart, 0, FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS - remainder);
}
stagingBuffer.Unmap();
// Copy the staging buffer contents to the destination buffer.
webGPUQueueManager->getCommandEncoder().CopyBufferToBuffer(stagingBuffer, 0, mBuffer,
byteOffset, stagingBufferSize);
}
}// namespace filament::backend
} // namespace filament::backend

View File

@@ -17,16 +17,14 @@
#ifndef TNT_FILAMENT_BACKEND_WEBGPUBUFFERBASE_H
#define TNT_FILAMENT_BACKEND_WEBGPUBUFFERBASE_H
#include "WebGPUConstants.h"
#include <webgpu/webgpu_cpp.h>
#include <array>
#include <cstdint>
namespace filament::backend {
class BufferDescriptor;
class WebGPUQueueManager;
/**
* A base class for WebGPU buffer objects, providing common functionality for creating and
@@ -41,7 +39,8 @@ public:
* happen after draw commands encoded in the encoder. Submitting any commands up to this point
* ensures the calls happen in the expected sequence.
*/
void updateGPUBuffer(BufferDescriptor const&, uint32_t byteOffset, wgpu::Queue const&);
void updateGPUBuffer(BufferDescriptor const&, uint32_t byteOffset, wgpu::Device const& device,
WebGPUQueueManager* const webGPUQueueManager);
[[nodiscard]] wgpu::Buffer const& getBuffer() const { return mBuffer; }
@@ -50,9 +49,6 @@ protected:
private:
const wgpu::Buffer mBuffer;
// WebGPU requires that the source buffer of a writeBuffer call has a size that is a multiple
// of 4. This member is used to pad the data if the source size is not a multiple of 4.
std::array<uint8_t, FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS> mRemainderChunk{};
};
} // namespace filament::backend

View File

@@ -852,7 +852,7 @@ void WebGPUDriver::updateIndexBuffer(Handle<HwIndexBuffer> indexBufferHandle,
// draw calls are made.
flush();
handleCast<WebGPUIndexBuffer>(indexBufferHandle)
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice.GetQueue());
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager);
scheduleDestroy(std::move(bufferDescriptor));
}
@@ -863,14 +863,14 @@ void WebGPUDriver::updateBufferObject(Handle<HwBufferObject> bufferObjectHandle,
// draw calls are made.
flush();
handleCast<WebGPUBufferObject>(bufferObjectHandle)
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice.GetQueue());
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager);
scheduleDestroy(std::move(bufferDescriptor));
}
void WebGPUDriver::updateBufferObjectUnsynchronized(Handle<HwBufferObject> bufferObjectHandle,
BufferDescriptor&& bufferDescriptor, const uint32_t byteOffset) {
handleCast<WebGPUBufferObject>(bufferObjectHandle)
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice.GetQueue());
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager);
scheduleDestroy(std::move(bufferDescriptor));
}