Log GPU errors through Platform debugStat API (#9434)
This commit is contained in:
@@ -8,5 +8,5 @@ appropriate header in [RELEASE_NOTES.md](./RELEASE_NOTES.md).
|
||||
## Release notes for next branch cut
|
||||
|
||||
- engine: add `View::getLastDynamicResolutionScale()` (b/457753622)
|
||||
|
||||
- Metal: report GPU errors to the platform via `debugUpdateStat` (b/431665753).
|
||||
- materials: Make Material Instances' UBO descriptor use dynamic offsets. [⚠️ **Recompile Materials**]
|
||||
|
||||
@@ -19,13 +19,17 @@
|
||||
#ifndef TNT_FILAMENT_BACKEND_PLATFORM_H
|
||||
#define TNT_FILAMENT_BACKEND_PLATFORM_H
|
||||
|
||||
#include <utils/CString.h>
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/Invocable.h>
|
||||
#include <utils/Mutex.h>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
namespace filament::backend {
|
||||
|
||||
@@ -508,13 +512,22 @@ public:
|
||||
// --------------------------------------------------------------------------------------------
|
||||
// Debugging APIs
|
||||
|
||||
using DebugUpdateStatFunc = utils::Invocable<void(const char* UTILS_NONNULL key, uint64_t value)>;
|
||||
using DebugUpdateStatFunc = utils::Invocable<void(const char* UTILS_NONNULL key,
|
||||
uint64_t intValue, utils::CString stringValue)>;
|
||||
|
||||
/**
|
||||
* Sets the callback function that the backend can use to update backend-specific statistics
|
||||
* to aid with debugging. This callback is guaranteed to be called on the Filament driver
|
||||
* thread.
|
||||
*
|
||||
* The callback signature is (key, intValue, stringValue). Note that for any given call,
|
||||
* only one of the value parameters (intValue or stringValue) will be meaningful, depending on
|
||||
* the specific key.
|
||||
*
|
||||
* IMPORTANT_NOTE: because the callback is called on the driver thread, only quick, non-blocking
|
||||
* work should be done inside it. Furthermore, no graphics API calls (such as GL calls) should
|
||||
* be made, which could interfere with Filament's driver state.
|
||||
*
|
||||
* @param debugUpdateStat an Invocable that updates debug statistics
|
||||
*/
|
||||
void setDebugUpdateStatFunc(DebugUpdateStatFunc&& debugUpdateStat) noexcept;
|
||||
@@ -533,15 +546,32 @@ public:
|
||||
* This function is guaranteed to be called only on a single thread, the Filament driver
|
||||
* thread.
|
||||
*
|
||||
* @param key a null-terminated C-string with the key of the debug statistic
|
||||
* @param value the updated value of key
|
||||
* @param key a null-terminated C-string with the key of the debug statistic
|
||||
* @param intValue the updated integer value of key (the string value passed to the
|
||||
* callback will be empty)
|
||||
*/
|
||||
void debugUpdateStat(const char* UTILS_NONNULL key, uint64_t value);
|
||||
void debugUpdateStat(const char* UTILS_NONNULL key, uint64_t intValue);
|
||||
|
||||
/**
|
||||
* To track backend-specific statistics, the backend implementation can call the
|
||||
* application-provided callback function debugUpdateStatFunc to associate or update a value
|
||||
* with a given key. It is possible for this function to be called multiple times with the
|
||||
* same key, in which case newer values should overwrite older values.
|
||||
*
|
||||
* This function is guaranteed to be called only on a single thread, the Filament driver
|
||||
* thread.
|
||||
*
|
||||
* @param key a null-terminated C-string with the key of the debug statistic
|
||||
* @param stringValue the updated string value of key (the integer value passed to the
|
||||
* callback will be 0)
|
||||
*/
|
||||
void debugUpdateStat(const char* UTILS_NONNULL key, utils::CString stringValue);
|
||||
|
||||
private:
|
||||
InsertBlobFunc mInsertBlob;
|
||||
RetrieveBlobFunc mRetrieveBlob;
|
||||
DebugUpdateStatFunc mDebugUpdateStat;
|
||||
std::shared_ptr<InsertBlobFunc> mInsertBlob;
|
||||
std::shared_ptr<RetrieveBlobFunc> mRetrieveBlob;
|
||||
std::shared_ptr<DebugUpdateStatFunc> mDebugUpdateStat;
|
||||
mutable utils::Mutex mMutex;
|
||||
};
|
||||
|
||||
} // namespace filament
|
||||
|
||||
@@ -139,42 +139,73 @@ bool Platform::queryFrameTimestamps(SwapChain const*, uint64_t, FrameTimestamps*
|
||||
}
|
||||
|
||||
void Platform::setBlobFunc(InsertBlobFunc&& insertBlob, RetrieveBlobFunc&& retrieveBlob) noexcept {
|
||||
mInsertBlob = std::move(insertBlob);
|
||||
mRetrieveBlob = std::move(retrieveBlob);
|
||||
std::lock_guard<decltype(mMutex)> lock(mMutex);
|
||||
mInsertBlob = std::make_shared<InsertBlobFunc>(std::move(insertBlob));
|
||||
mRetrieveBlob = std::make_shared<RetrieveBlobFunc>(std::move(retrieveBlob));
|
||||
}
|
||||
|
||||
bool Platform::hasInsertBlobFunc() const noexcept {
|
||||
std::lock_guard<decltype(mMutex)> lock(mMutex);
|
||||
return bool(mInsertBlob);
|
||||
}
|
||||
|
||||
bool Platform::hasRetrieveBlobFunc() const noexcept {
|
||||
std::lock_guard<decltype(mMutex)> lock(mMutex);
|
||||
return bool(mRetrieveBlob);
|
||||
}
|
||||
|
||||
void Platform::insertBlob(void const* key, size_t keySize, void const* value, size_t valueSize) {
|
||||
if (mInsertBlob) {
|
||||
mInsertBlob(key, keySize, value, valueSize);
|
||||
std::shared_ptr<InsertBlobFunc> callback;
|
||||
{
|
||||
std::unique_lock<decltype(mMutex)> lock(mMutex);
|
||||
callback = mInsertBlob;
|
||||
}
|
||||
if (callback) {
|
||||
(*callback)(key, keySize, value, valueSize);
|
||||
}
|
||||
}
|
||||
|
||||
size_t Platform::retrieveBlob(void const* key, size_t keySize, void* value, size_t valueSize) {
|
||||
if (mRetrieveBlob) {
|
||||
return mRetrieveBlob(key, keySize, value, valueSize);
|
||||
std::shared_ptr<RetrieveBlobFunc> callback;
|
||||
{
|
||||
std::unique_lock<decltype(mMutex)> lock(mMutex);
|
||||
callback = mRetrieveBlob;
|
||||
}
|
||||
if (callback) {
|
||||
return (*callback)(key, keySize, value, valueSize);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Platform::setDebugUpdateStatFunc(DebugUpdateStatFunc&& debugUpdateStat) noexcept {
|
||||
mDebugUpdateStat = std::move(debugUpdateStat);
|
||||
std::lock_guard<decltype(mMutex)> lock(mMutex);
|
||||
mDebugUpdateStat = std::make_shared<DebugUpdateStatFunc>(std::move(debugUpdateStat));
|
||||
}
|
||||
|
||||
bool Platform::hasDebugUpdateStatFunc() const noexcept {
|
||||
return bool(mDebugUpdateStat);
|
||||
std::lock_guard<decltype(mMutex)> lock(mMutex);
|
||||
return mDebugUpdateStat != nullptr;
|
||||
}
|
||||
|
||||
void Platform::debugUpdateStat(const char* key, uint64_t value) {
|
||||
if (mDebugUpdateStat) {
|
||||
mDebugUpdateStat(key, value);
|
||||
void Platform::debugUpdateStat(const char* key, uint64_t intValue) {
|
||||
std::shared_ptr<DebugUpdateStatFunc> callback;
|
||||
{
|
||||
std::unique_lock<decltype(mMutex)> lock(mMutex);
|
||||
callback = mDebugUpdateStat;
|
||||
}
|
||||
if (callback) {
|
||||
(*callback)(key, intValue, "");
|
||||
}
|
||||
}
|
||||
|
||||
void Platform::debugUpdateStat(const char* key, utils::CString stringValue) {
|
||||
std::shared_ptr<DebugUpdateStatFunc> callback;
|
||||
{
|
||||
std::unique_lock<decltype(mMutex)> lock(mMutex);
|
||||
callback = mDebugUpdateStat;
|
||||
}
|
||||
if (callback) {
|
||||
(*callback)(key, 0, stringValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#ifndef TNT_METALCONTEXT_H
|
||||
#define TNT_METALCONTEXT_H
|
||||
|
||||
#include "MetalErrorQueue.h"
|
||||
#include "MetalResourceTracker.h"
|
||||
#include "MetalShaderCompiler.h"
|
||||
#include "MetalState.h"
|
||||
@@ -129,6 +130,7 @@ struct MetalContext {
|
||||
id<MTLCommandBuffer> pendingCommandBuffer = nil;
|
||||
id<MTLRenderCommandEncoder> currentRenderPassEncoder = nil;
|
||||
uint32_t currentFrame = 0;
|
||||
MetalErrorQueue commandBufferErrors;
|
||||
|
||||
std::atomic<bool> memorylessLimitsReached = false;
|
||||
|
||||
|
||||
@@ -153,6 +153,7 @@ id<MTLCommandBuffer> getPendingCommandBuffer(MetalContext* context) {
|
||||
|
||||
if (UTILS_UNLIKELY(errorCode != MTLCommandBufferErrorNone)) {
|
||||
logMTLCommandBufferError(errorCode);
|
||||
context->commandBufferErrors.push(buffer.error);
|
||||
}
|
||||
}];
|
||||
FILAMENT_CHECK_POSTCONDITION(context->pendingCommandBuffer)
|
||||
|
||||
@@ -254,6 +254,18 @@ MetalDriver::~MetalDriver() noexcept {
|
||||
void MetalDriver::tick(int) {
|
||||
executeTickOps();
|
||||
executeDeferredOps();
|
||||
|
||||
// Notify platform of GPU errors.
|
||||
auto& platform = mPlatform;
|
||||
if (UTILS_UNLIKELY(!mContext->commandBufferErrors.isEmpty())) {
|
||||
mContext->commandBufferErrors.flush([&platform](NSError* error) {
|
||||
if (UTILS_VERY_UNLIKELY(!error)) {
|
||||
return;
|
||||
}
|
||||
const utils::CString errorString(error.localizedDescription.UTF8String);
|
||||
platform.debugUpdateStat("filament.metal.command_buffer_error", errorString);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void MetalDriver::beginFrame(int64_t monotonic_clock_ns,
|
||||
|
||||
66
filament/backend/src/metal/MetalErrorQueue.h
Normal file
66
filament/backend/src/metal/MetalErrorQueue.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (C) 2025 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TNT_FILAMENT_DRIVER_METALERRORQUEUE_H
|
||||
#define TNT_FILAMENT_DRIVER_METALERRORQUEUE_H
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
#include <utils/compiler.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
class MetalErrorQueue {
|
||||
public:
|
||||
bool isEmpty() const {
|
||||
return !mHasErrors.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void push(NSError* error) {
|
||||
std::lock_guard<std::mutex> lock(mMutex);
|
||||
mErrors.push_back(error);
|
||||
mHasErrors.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void flush(const std::function<void(NSError*)>& callback) {
|
||||
if (UTILS_LIKELY(isEmpty())) {
|
||||
return;
|
||||
}
|
||||
std::vector<NSError*> errors;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mMutex);
|
||||
std::swap(mErrors, errors);
|
||||
mHasErrors.store(false, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
for (const auto& error: errors) {
|
||||
callback(error);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<NSError*> mErrors;
|
||||
std::mutex mMutex;
|
||||
|
||||
// Optimization to avoid locking the mutex at each call to flush.
|
||||
std::atomic<bool> mHasErrors;
|
||||
};
|
||||
|
||||
|
||||
#endif // TNT_FILAMENT_DRIVER_METALERRORQUEUE_H
|
||||
Reference in New Issue
Block a user