Backend: Centralize fence synchronization and fix potential data races. (#9899)

* Backend: Centralize fence synchronization and fix potential data races.

Transitioned fence synchronization from per-fence condition variables to a
centralized shared condition variable in DriverBase, enabling future global
error propagation and interruption.

Encapsulated synchronization by hiding the mutex and condition
variable within DriverBase and exposing `waitForFence` and `signalFence`.

Fixed a critical bug in Metal backend where spurious wakeups would reset the
relative timeout duration, potentially leading to much longer waits than
intended. Resolved by using absolute time points (wait_until).

Fixed potential data races in Metal, OpenGL, and WebGPU backends by ensuring
that shared state (status or state pointers) is always accessed under the
protection of the mutex, specifically by capturing it inside the predicate.

Simplified Vulkan and WebGPU fences to be purely passive data holders.

Trade-off: Spurious wakeups are increased due to the shared condition variable,
but are handled correctly by mandatory predicates in all backends.

* Update filament/backend/src/metal/MetalHandles.mm

Co-authored-by: Ben Doherty <bendoherty@google.com>

* Update filament/backend/src/metal/MetalHandles.mm

Co-authored-by: Ben Doherty <bendoherty@google.com>

---------

Co-authored-by: Ben Doherty <bendoherty@google.com>
This commit is contained in:
Mathias Agopian
2026-04-16 11:30:41 -07:00
committed by GitHub
parent bfbb651ecd
commit a2547731c0
10 changed files with 133 additions and 125 deletions

View File

@@ -214,6 +214,25 @@ public:
void scheduleCallback(CallbackHandler* handler, void* user, CallbackHandler::Callback callback) final;
template<typename Predicate>
bool waitForFence(Predicate predicate, std::chrono::steady_clock::time_point until) {
std::unique_lock lock(mFenceMutex);
return mFenceCondition.wait_until(lock, until, predicate);
}
template<typename Predicate>
void waitForFence(Predicate predicate) {
std::unique_lock lock(mFenceMutex);
mFenceCondition.wait(lock, predicate);
}
template<typename Action>
void signalFence(Action action) {
std::lock_guard lock(mFenceMutex);
action();
mFenceCondition.notify_all();
}
// --------------------------------------------------------------------------------------------
// Privates
// --------------------------------------------------------------------------------------------
@@ -253,6 +272,9 @@ private:
std::condition_variable mServiceThreadCondition;
std::vector<std::tuple<CallbackHandler*, CallbackHandler::Callback, void*>> mServiceThreadCallbackQueue;
bool mExitRequested = false;
std::condition_variable mFenceCondition;
std::mutex mFenceMutex;
};

View File

@@ -478,8 +478,6 @@ private:
struct State {
FenceStatus status { FenceStatus::TIMEOUT_EXPIRED };
std::condition_variable cv;
std::mutex mutex;
};
std::shared_ptr<State> state { std::make_shared<State>() };

View File

@@ -20,6 +20,7 @@
#include "MetalEnums.h"
#include "MetalUtils.h"
#include "MetalBufferPool.h"
#include "MetalDriver.h"
#include <filament/SwapChain.h>
@@ -1400,12 +1401,13 @@ void MetalFence::encode() {
// Using a weak_ptr here because the Fence could be deleted before the block executes.
std::weak_ptr<State> weakState = state;
MetalDriver* driver = context.driver;
[event notifyListener:context.eventListener atValue:value block:^(id <MTLSharedEvent> o,
uint64_t value) {
if (auto s = weakState.lock()) {
std::lock_guard<std::mutex> guard(s->mutex);
s->status = FenceStatus::CONDITION_SATISFIED;
s->cv.notify_all();
driver->signalFence([&] {
s->status = FenceStatus::CONDITION_SATISFIED;
});
}
}];
}
@@ -1418,24 +1420,29 @@ void MetalFence::onSignal(MetalFenceSignalBlock block) {
FenceStatus MetalFence::wait(uint64_t timeoutNs) {
if (@available(iOS 12, *)) {
using ns = std::chrono::nanoseconds;
std::unique_lock<std::mutex> guard(state->mutex);
while (state->status == FenceStatus::TIMEOUT_EXPIRED) {
if (timeoutNs == FENCE_WAIT_FOR_EVER) {
state->cv.wait(guard);
} else if (timeoutNs == 0 ||
state->cv.wait_for(guard, ns(timeoutNs)) == std::cv_status::timeout) {
return state->status;
FenceStatus result = FenceStatus::TIMEOUT_EXPIRED;
auto predicate = [&] {
if (state->status != FenceStatus::TIMEOUT_EXPIRED) {
result = state->status;
return true;
}
return false;
};
if (timeoutNs == FENCE_WAIT_FOR_EVER) {
context.driver->waitForFence(predicate);
} else {
auto const until = std::chrono::steady_clock::now() + ns(timeoutNs);
context.driver->waitForFence(predicate, until);
}
return state->status;
return result;
}
return FenceStatus::ERROR;
}
void MetalFence::cancel() {
std::unique_lock guard(state->mutex);
state->status = FenceStatus::ERROR;
state->cv.notify_all();
context.driver->signalFence([&] {
state->status = FenceStatus::ERROR;
});
}
MetalDescriptorSetLayout::MetalDescriptorSetLayout(DescriptorSetLayout&& l) noexcept

View File

@@ -2074,9 +2074,9 @@ void OpenGLDriver::createFenceR(Handle<HwFence> fh, ImmutableCString&& tag) {
assert_invariant(f->state);
if (mPlatform.canCreateFence()) {
std::lock_guard const lock(f->state->lock);
f->fence = mPlatform.createFence();
f->state->cond.notify_all();
signalFence([&] {
f->fence = mPlatform.createFence();
});
return;
}
@@ -2090,11 +2090,11 @@ void OpenGLDriver::createFenceR(Handle<HwFence> fh, ImmutableCString&& tag) {
// This is the case where we need to use OpenGL fences, as soon as we return, the user
// is allowed to destroy the fence, so we need to keep a reference to the internal state.
std::weak_ptr<GLFence::State> const weak = f->state;
whenGpuCommandsComplete([weak] {
whenGpuCommandsComplete([weak, this] {
if (auto const state = weak.lock()) {
std::lock_guard const lock(state->lock);
state->status = FenceStatus::CONDITION_SATISFIED;
state->cond.notify_all();
signalFence([&] {
state->status = FenceStatus::CONDITION_SATISFIED;
});
}
});
#else
@@ -2633,9 +2633,9 @@ void OpenGLDriver::fenceCancel(Handle<HwFence> fh) {
GLFence const* const f = handle_cast<GLFence*>(fh);
assert_invariant(f->state);
std::lock_guard const lock(f->state->lock);
f->state->status = FenceStatus::ERROR;
f->state->cond.notify_all();
signalFence([&] {
f->state->status = FenceStatus::ERROR;
});
}
FenceStatus OpenGLDriver::getFenceStatus(Handle<HwFence> fh) {
@@ -2661,22 +2661,18 @@ FenceStatus OpenGLDriver::fenceWait(FenceHandle fh, uint64_t const timeout) {
// `f` is not supposed to become invalid while we wait.
if (mPlatform.canCreateFence()) {
std::unique_lock lock(f->state->lock);
// we've been called before the fence was created asynchronously,
// so we need to wait for that, before using the real fence.
// By construction, "f" can't be destroyed while we wait, because its
// construction call is in the queue and a destroy call will have to come later.
waitForFence([f] {
return f->fence != nullptr;
}, until);
if (f->fence == nullptr) {
// we've been called before the fence was created asynchronously,
// so we need to wait for that, before using the real fence.
// By construction, "f" can't be destroyed while we wait, because its
// construction call is in the queue and a destroy call will have to come later.
f->state->cond.wait_until(lock, until, [f] {
return f->fence != nullptr;
});
if (f->fence == nullptr) {
// the only possible choice here is that we timed out
assert_invariant(f->state->status == FenceStatus::TIMEOUT_EXPIRED);
return FenceStatus::TIMEOUT_EXPIRED;
}
// the only possible choice here is that we timed out
assert_invariant(f->state->status == FenceStatus::TIMEOUT_EXPIRED);
return FenceStatus::TIMEOUT_EXPIRED;
}
lock.unlock();
// here we know that we have the platform fence
assert_invariant(f->fence);
return mPlatform.waitFence(f->fence, timeout);
@@ -2689,11 +2685,15 @@ FenceStatus OpenGLDriver::fenceWait(FenceHandle fh, uint64_t const timeout) {
// This is the case where we need to use OpenGL fences
#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
std::unique_lock lock(f->state->lock);
f->state->cond.wait_until(lock, until, [f] {
return f->state->status != FenceStatus::TIMEOUT_EXPIRED;
});
return f->state->status;
FenceStatus result = FenceStatus::TIMEOUT_EXPIRED;
waitForFence([&] {
if (f->state->status != FenceStatus::TIMEOUT_EXPIRED) {
result = f->state->status;
return true;
}
return false;
}, until);
return result;
#else
return FenceStatus::ERROR;
#endif

View File

@@ -197,8 +197,6 @@ public:
struct GLFence : public HwFence {
using HwFence::HwFence;
struct State {
utils::Mutex lock; // NOLINT(*-include-cleaner)
utils::Condition cond; // NOLINT(*-include-cleaner)
FenceStatus status{ FenceStatus::TIMEOUT_EXPIRED };
};
std::shared_ptr<State> state{ std::make_shared<State>() };

View File

@@ -204,39 +204,21 @@ struct VulkanFence : public HwFence, fvkmemory::ThreadSafeResource {
VulkanFence() {}
void setFence(std::shared_ptr<VulkanCmdFence> fence) {
std::lock_guard const l(lock);
sharedFence = std::move(fence);
cond.notify_all();
}
std::shared_ptr<VulkanCmdFence>& getSharedFence() {
std::lock_guard const l(lock);
return sharedFence;
}
std::pair<std::shared_ptr<VulkanCmdFence>, bool>
wait(std::chrono::steady_clock::time_point const until) {
// hold a reference so that our state doesn't disappear while we wait
std::unique_lock l(lock);
cond.wait_until(l, until, [this] {
return bool(sharedFence) || canceled;
});
// here mSharedFence will be null if we timed out
std::pair<std::shared_ptr<VulkanCmdFence>, bool> getStatus() const {
return { sharedFence, canceled };
}
void cancel() const {
std::lock_guard const l(lock);
if (sharedFence) {
sharedFence->cancel();
}
canceled = true;
cond.notify_all();
}
private:
mutable std::mutex lock;
mutable std::condition_variable cond;
mutable bool canceled = false;
std::shared_ptr<VulkanCmdFence> sharedFence;
};

View File

@@ -1008,7 +1008,9 @@ void VulkanDriver::createFenceR(Handle<HwFence> fh, utils::ImmutableCString&& ta
// it with appropriate VulkanCmdFence, which is associated with the current, recording command
// buffer.
auto fence = resource_ptr<VulkanFence>::cast(&mResourceManager, fh);
fence->setFence(cmdbuf->getFenceStatus());
signalFence([&] {
fence->setFence(cmdbuf->getFenceStatus());
});
mResourceManager.associateHandle(fh.getId(), std::move(tag));
}
@@ -1459,7 +1461,9 @@ void VulkanDriver::fenceCancel(FenceHandle const fh) {
// Even though this is a synchronous call, the fence handle must be (and stay) valid
assert_invariant(fh);
auto fence = resource_ptr<VulkanFence>::cast(&mResourceManager, fh);
fence->cancel();
signalFence([&] {
fence->cancel();
});
}
FenceStatus VulkanDriver::getFenceStatus(Handle<HwFence> const fh) {
@@ -1486,7 +1490,18 @@ FenceStatus VulkanDriver::fenceWait(FenceHandle const fh, uint64_t const timeout
until = now + nanoseconds(timeout);
}
auto const [cmdfence, canceled] = fence->wait(until);
std::shared_ptr<VulkanCmdFence> cmdfence;
bool canceled = false;
waitForFence([&] {
auto status = fence->getStatus();
if (bool(status.first) || status.second) {
cmdfence = status.first;
canceled = status.second;
return true;
}
return false;
}, until);
if (!cmdfence || canceled) {
return canceled ? FenceStatus::ERROR : FenceStatus::TIMEOUT_EXPIRED;
}

View File

@@ -702,7 +702,9 @@ void WebGPUDriver::createRenderTargetR(Handle<HwRenderTarget> renderTargetHandle
void WebGPUDriver::createFenceR(Handle<HwFence> fenceHandle, utils::ImmutableCString&& tag) {
// The handle is constructed synchronously in createFenceS.
const auto fence = handleCast<WebGPUFence>(fenceHandle);
fence->setSubmissionState(mQueueManager.getLatestSubmissionState());
signalFence([&] {
fence->setSubmissionState(mQueueManager.getLatestSubmissionState());
});
setDebugTag(fenceHandle.getId(), std::move(tag));
}
@@ -772,11 +774,7 @@ void WebGPUDriver::fenceCancel(FenceHandle fh) {
}
FenceStatus WebGPUDriver::getFenceStatus(Handle<HwFence> fenceHandle) {
const auto fence = handleCast<WebGPUFence>(fenceHandle);
if (!fence) {
return FenceStatus::ERROR;
}
return fence->getStatus();
return fenceWait(fenceHandle, 0);
}
FenceStatus WebGPUDriver::fenceWait(FenceHandle fenceHandle, uint64_t const timeout) {
@@ -784,7 +782,38 @@ FenceStatus WebGPUDriver::fenceWait(FenceHandle fenceHandle, uint64_t const time
if (!fence) {
return FenceStatus::ERROR;
}
return fence->wait(timeout);
using namespace std::chrono;
auto now = steady_clock::now();
steady_clock::time_point until = steady_clock::time_point::max();
using TimeoutType = decltype(timeout);
constexpr TimeoutType maxTimeout = std::numeric_limits<TimeoutType>::max();
constexpr nanoseconds maxNano = nanoseconds::max();
if (timeout < maxNano.count() && timeout < maxTimeout && // Need to account for overflow
now <= steady_clock::time_point::max() - nanoseconds(timeout)) {
until = now + nanoseconds(timeout);
}
std::shared_ptr<WebGPUSubmissionState> state;
bool const success = waitForFence([&] {
state = fence->getState();
return bool(state);
}, until);
if (!success) {
return FenceStatus::TIMEOUT_EXPIRED;
}
auto duration_ns = static_cast<uint64_t>(
std::chrono::duration_cast<std::chrono::nanoseconds>(steady_clock::now() - now)
.count());
// In the unlikely event that duration_ns is too close to timeout, we just assume the leftover
// timeout is 0.
if (timeout == 0 || duration_ns > timeout) {
duration_ns = timeout;
}
return state->waitForCompletion(timeout - duration_ns);
}
void WebGPUDriver::destroySync(Handle<HwSync> syncHandle) {

View File

@@ -23,49 +23,6 @@ namespace filament::backend {
WebGPUFence::WebGPUFence() = default;
WebGPUFence::~WebGPUFence() = default;
void WebGPUFence::setSubmissionState(std::shared_ptr<WebGPUSubmissionState> state) {
std::lock_guard<std::mutex> const lock(mLock);
mState = state;
mCond.notify_one();
}
FenceStatus WebGPUFence::getStatus() {
return wait(0);
}
FenceStatus WebGPUFence::wait(uint64_t timeout) {
// we have to take into account that the STL's wait_for() actually works with
// time_points relative to steady_clock::now() internally.
using namespace std::chrono;
auto now = steady_clock::now();
steady_clock::time_point until = steady_clock::time_point::max();
using TimeoutType = decltype(timeout);
constexpr TimeoutType maxTimeout = std::numeric_limits<TimeoutType>::max();
constexpr nanoseconds maxNano = nanoseconds::max();
if (timeout < maxNano.count() && timeout < maxTimeout && // Need to account for overflow
now <= steady_clock::time_point::max() - nanoseconds(timeout)) {
until = now + nanoseconds(timeout);
}
{
std::unique_lock<std::mutex> lock(mLock);
bool const success = mCond.wait_until(lock, until, [this] {
return bool(mState);
});
if (!success) {
return FenceStatus::TIMEOUT_EXPIRED;
}
}
auto duration_ns = static_cast<uint64_t>(
std::chrono::duration_cast<std::chrono::nanoseconds>(steady_clock::now() - now)
.count());
// In the unlikely event that duration_ns is too close to timeout, we just assume the leftover
// timeout is 0.
if (timeout == 0 || duration_ns > timeout) {
duration_ns = timeout;
}
return mState->waitForCompletion(timeout - duration_ns);
}
} // namespace filament::backend

View File

@@ -33,15 +33,15 @@ public:
~WebGPUFence();
// Initialize the fence with a submission state from the queue manager.
void setSubmissionState(std::shared_ptr<WebGPUSubmissionState> state);
void setSubmissionState(std::shared_ptr<WebGPUSubmissionState> state) {
mState = state;
}
// Note that getStatus cannot be const since it's implemented with wait();
FenceStatus getStatus();
FenceStatus wait(uint64_t timeout);
std::shared_ptr<WebGPUSubmissionState> getState() const {
return mState;
}
private:
std::mutex mLock;
std::condition_variable mCond;
std::shared_ptr<WebGPUSubmissionState> mState;
};