mirror of
https://github.com/wolfpld/tracy.git
synced 2026-06-08 00:23:47 +00:00
Compare commits
29 Commits
fc5318dcad
...
slomp/trac
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd13487b86 | ||
|
|
cf81651ea1 | ||
|
|
5597b79f1f | ||
|
|
dd7060a4d9 | ||
|
|
4094c89ef6 | ||
|
|
c47f69a694 | ||
|
|
0f22144e4b | ||
|
|
5a1629d36c | ||
|
|
5f82102fba | ||
|
|
4cf3160c2b | ||
|
|
47397fc9b2 | ||
|
|
3bfe769675 | ||
|
|
77dedb7bb1 | ||
|
|
4ba1c7ea57 | ||
|
|
746f1d028c | ||
|
|
388a94fc79 | ||
|
|
f657f1e99d | ||
|
|
88e87a3348 | ||
|
|
ecbdfde549 | ||
|
|
7951f9a8db | ||
|
|
0091a6b0a8 | ||
|
|
e5aa8eba51 | ||
|
|
7437c41514 | ||
|
|
f441a5070b | ||
|
|
00b6abd67b | ||
|
|
e4e3d75eb8 | ||
|
|
7cb98245ce | ||
|
|
55d5436fb9 | ||
|
|
2b11785b05 |
@@ -4,7 +4,7 @@
|
||||
|
||||
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
|
||||
|
||||
Tracy supports profiling CPU (Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/tealsnow/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL, CUDA.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
||||
Tracy supports profiling CPU (Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/tealsnow/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphics/compute APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL, CUDA, WebGPU.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
||||
|
||||
- [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
|
||||
- [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
|
||||
|
||||
191
examples/webgpu/triangle/CMakeLists.txt
Normal file
191
examples/webgpu/triangle/CMakeLists.txt
Normal file
@@ -0,0 +1,191 @@
|
||||
# CMakeLists.txt — WebGPU spinning triangle demo
|
||||
#
|
||||
# macOS:
|
||||
# clang++ -std=c++17 -ObjC++ spinning_triangle.cpp platform/platform_macos.mm \
|
||||
# -I/path/to/wgpu/include -L/path/to/wgpu/lib -lwgpu_native \
|
||||
# -Wl,-rpath,@executable_path \
|
||||
# -framework Cocoa -framework Metal -framework QuartzCore \
|
||||
# -framework Foundation -framework IOKit -framework IOSurface \
|
||||
# -o spinning_triangle
|
||||
#
|
||||
# Windows (MSVC):
|
||||
# cl /std:c++17 spinning_triangle.cpp platform/platform_windows.cpp \
|
||||
# /I\path\to\wgpu\include \path\to\wgpu\lib\wgpu_native.lib \
|
||||
# user32.lib gdi32.lib /Fe:spinning_triangle.exe
|
||||
#
|
||||
# Linux / Wayland:
|
||||
# g++ -std=c++17 spinning_triangle.cpp platform/platform_wayland.cpp \
|
||||
# xdg-shell-protocol.c \
|
||||
# -I/path/to/wgpu/include -L/path/to/wgpu/lib -lwgpu_native \
|
||||
# -lwayland-client -o spinning_triangle
|
||||
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
project(spinning_triangle LANGUAGES C CXX)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WebGPU backend — set WGPU_PATH to your wgpu-native or Dawn installation.
|
||||
# The library name differs between backends:
|
||||
# wgpu-native → wgpu_native
|
||||
# Dawn → webgpu_dawn
|
||||
# ---------------------------------------------------------------------------
|
||||
set(WGPU_PATH "" CACHE PATH "Root of the WebGPU native installation (contains include/ and lib/)")
|
||||
set(WGPU_LIB "" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty")
|
||||
|
||||
if(NOT WGPU_PATH)
|
||||
message(FATAL_ERROR "Set WGPU_PATH to the root of your WebGPU native installation.")
|
||||
endif()
|
||||
|
||||
# When WGPU_PATH changes, discard any previously auto-detected WGPU_LIB so
|
||||
# detection re-runs against the new path.
|
||||
if(NOT "${WGPU_PATH}" STREQUAL "${_WGPU_PATH_LAST}" AND _WGPU_LIB_AUTO)
|
||||
unset(WGPU_LIB CACHE)
|
||||
set(WGPU_LIB "" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty")
|
||||
endif()
|
||||
set(_WGPU_PATH_LAST "${WGPU_PATH}" CACHE INTERNAL "")
|
||||
|
||||
if(NOT WGPU_LIB)
|
||||
# Always unset the find_library results so they re-probe the current WGPU_PATH.
|
||||
unset(_WGPU_NATIVE_LIB CACHE)
|
||||
unset(_WEBGPU_DAWN_LIB CACHE)
|
||||
find_library(_WGPU_NATIVE_LIB NAMES wgpu_native wgpu_native.dll PATHS "${WGPU_PATH}/lib" NO_DEFAULT_PATH)
|
||||
find_library(_WEBGPU_DAWN_LIB NAMES webgpu_dawn PATHS "${WGPU_PATH}/lib" NO_DEFAULT_PATH)
|
||||
if(_WGPU_NATIVE_LIB)
|
||||
set(WGPU_LIB "wgpu_native" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty" FORCE)
|
||||
elseif(_WEBGPU_DAWN_LIB)
|
||||
set(WGPU_LIB "webgpu_dawn" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty" FORCE)
|
||||
else()
|
||||
message(FATAL_ERROR "Could not detect a WebGPU library in ${WGPU_PATH}/lib. Set WGPU_LIB explicitly (wgpu_native or webgpu_dawn).")
|
||||
endif()
|
||||
set(_WGPU_LIB_AUTO TRUE CACHE INTERNAL "")
|
||||
message(STATUS "WebGPU library auto-detected: ${WGPU_LIB}")
|
||||
else()
|
||||
set(_WGPU_LIB_AUTO FALSE CACHE INTERNAL "")
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tracy root — defaults to two directories above this CMakeLists.txt.
|
||||
# ---------------------------------------------------------------------------
|
||||
set(TRACY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
|
||||
option(TRACY_ENABLE "Enable Tracy profiling" ON)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# macOS quarantine — pre-built WebGPU binaries downloaded from the internet
|
||||
# carry a com.apple.quarantine extended attribute that prevents dyld from
|
||||
# loading them ("damaged or incomplete" / Gatekeeper block). Strip it once
|
||||
# at configure time so the linker and the runtime loader can both access the
|
||||
# library directory without further user intervention.
|
||||
# ---------------------------------------------------------------------------
|
||||
if(APPLE)
|
||||
execute_process(
|
||||
COMMAND xattr -dr com.apple.quarantine "${WGPU_PATH}/lib"
|
||||
)
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Platform-specific source and link settings
|
||||
# ---------------------------------------------------------------------------
|
||||
set(PLATFORM_GENERATED_INCLUDES "")
|
||||
|
||||
if(APPLE)
|
||||
set(PLATFORM_SOURCES platform/platform_macos.mm)
|
||||
set(PLATFORM_LIBS
|
||||
"-framework Cocoa"
|
||||
"-framework Metal"
|
||||
"-framework QuartzCore"
|
||||
"-framework Foundation"
|
||||
"-framework IOKit"
|
||||
"-framework IOSurface"
|
||||
)
|
||||
set_source_files_properties(platform/platform_macos.mm
|
||||
PROPERTIES COMPILE_FLAGS "-ObjC++"
|
||||
)
|
||||
elseif(WIN32)
|
||||
set(PLATFORM_SOURCES platform/platform_windows.cpp)
|
||||
set(PLATFORM_LIBS user32 gdi32)
|
||||
else()
|
||||
# Linux / Wayland — generate xdg-shell protocol glue via wayland-scanner.
|
||||
find_package(PkgConfig REQUIRED)
|
||||
pkg_check_modules(WAYLAND_PROTOCOLS REQUIRED wayland-protocols)
|
||||
pkg_get_variable(WAYLAND_PROTOCOLS_DIR wayland-protocols pkgdatadir)
|
||||
find_program(WAYLAND_SCANNER wayland-scanner REQUIRED)
|
||||
|
||||
set(XDG_SHELL_XML "${WAYLAND_PROTOCOLS_DIR}/stable/xdg-shell/xdg-shell.xml")
|
||||
set(XDG_SHELL_H "${CMAKE_CURRENT_BINARY_DIR}/xdg-shell-client-protocol.h")
|
||||
set(XDG_SHELL_C "${CMAKE_CURRENT_BINARY_DIR}/xdg-shell-protocol.c")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${XDG_SHELL_H}"
|
||||
COMMAND "${WAYLAND_SCANNER}" client-header "${XDG_SHELL_XML}" "${XDG_SHELL_H}"
|
||||
DEPENDS "${XDG_SHELL_XML}"
|
||||
COMMENT "Generating xdg-shell-client-protocol.h"
|
||||
VERBATIM
|
||||
)
|
||||
add_custom_command(
|
||||
OUTPUT "${XDG_SHELL_C}"
|
||||
COMMAND "${WAYLAND_SCANNER}" private-code "${XDG_SHELL_XML}" "${XDG_SHELL_C}"
|
||||
DEPENDS "${XDG_SHELL_XML}"
|
||||
COMMENT "Generating xdg-shell-protocol.c"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
set(PLATFORM_SOURCES
|
||||
platform/platform_wayland.cpp
|
||||
"${XDG_SHELL_C}"
|
||||
"${XDG_SHELL_H}"
|
||||
)
|
||||
set(PLATFORM_LIBS wayland-client)
|
||||
set(PLATFORM_GENERATED_INCLUDES "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
endif()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Target
|
||||
# ---------------------------------------------------------------------------
|
||||
add_executable(spinning_triangle
|
||||
spinning_triangle.cpp
|
||||
"${TRACY_DIR}/public/TracyClient.cpp"
|
||||
${PLATFORM_SOURCES}
|
||||
)
|
||||
|
||||
# Treat TracyClient.cpp as third-party code — suppress all warnings so that
|
||||
# upstream changes don't pollute our build output.
|
||||
if(MSVC)
|
||||
set_source_files_properties("${TRACY_DIR}/public/TracyClient.cpp"
|
||||
PROPERTIES COMPILE_FLAGS "/w"
|
||||
)
|
||||
else()
|
||||
set_source_files_properties("${TRACY_DIR}/public/TracyClient.cpp"
|
||||
PROPERTIES COMPILE_FLAGS "-w"
|
||||
)
|
||||
endif()
|
||||
|
||||
target_compile_features(spinning_triangle PRIVATE cxx_std_17)
|
||||
|
||||
if(TRACY_ENABLE)
|
||||
target_compile_definitions(spinning_triangle PRIVATE TRACY_ENABLE)
|
||||
endif()
|
||||
|
||||
target_include_directories(spinning_triangle PRIVATE
|
||||
"${WGPU_PATH}/include"
|
||||
"${TRACY_DIR}/public"
|
||||
${PLATFORM_GENERATED_INCLUDES}
|
||||
)
|
||||
|
||||
target_link_directories(spinning_triangle PRIVATE "${WGPU_PATH}/lib")
|
||||
|
||||
target_link_libraries(spinning_triangle PRIVATE
|
||||
${WGPU_LIB}
|
||||
${PLATFORM_LIBS}
|
||||
)
|
||||
|
||||
# Embed the rpath so the binary finds the WebGPU dylib/so next to itself.
|
||||
if(APPLE)
|
||||
set_target_properties(spinning_triangle PROPERTIES
|
||||
BUILD_RPATH "${WGPU_PATH}/lib"
|
||||
INSTALL_RPATH "@executable_path"
|
||||
)
|
||||
elseif(UNIX)
|
||||
set_target_properties(spinning_triangle PROPERTIES
|
||||
BUILD_RPATH "${WGPU_PATH}/lib"
|
||||
INSTALL_RPATH "$ORIGIN"
|
||||
)
|
||||
endif()
|
||||
23
examples/webgpu/triangle/platform/platform.h
Normal file
23
examples/webgpu/triangle/platform/platform.h
Normal file
@@ -0,0 +1,23 @@
|
||||
// platform.h — interface between platform-agnostic code and platform backends
|
||||
//
|
||||
// Each platform_*.mm / platform_*.cpp file implements these five functions.
|
||||
// Exactly one backend must be linked into the final binary.
|
||||
|
||||
#pragma once
|
||||
#include <webgpu/webgpu.h>
|
||||
|
||||
// Initialize the windowing system and create a window of the given dimensions.
|
||||
// Returns true on success.
|
||||
bool platformInit(int width, int height, const char* title);
|
||||
|
||||
// Create a WebGPU surface backed by the platform window.
|
||||
// Must be called after wgpuCreateInstance() and platformInit().
|
||||
WGPUSurface platformCreateSurface(WGPUInstance instance);
|
||||
|
||||
// Elapsed wall-clock time in seconds since platformInit().
|
||||
double platformGetTime();
|
||||
|
||||
// Enter the platform event/render loop.
|
||||
// Calls render() each frame at ~60 fps.
|
||||
// Calls shutdown() exactly once before returning.
|
||||
void platformRunLoop(void (*render)(), void (*shutdown)());
|
||||
120
examples/webgpu/triangle/platform/platform_macos.mm
Normal file
120
examples/webgpu/triangle/platform/platform_macos.mm
Normal file
@@ -0,0 +1,120 @@
|
||||
// platform_macos.mm — macOS backend (Cocoa + CAMetalLayer)
|
||||
//
|
||||
// Compile flags (see spinning_triangle.cpp header for full invocation):
|
||||
// -ObjC++ -framework Cocoa -framework Metal -framework QuartzCore \
|
||||
// -framework Foundation -framework IOKit -framework IOSurface
|
||||
|
||||
#import <Cocoa/Cocoa.h>
|
||||
#import <QuartzCore/CAMetalLayer.h>
|
||||
#include <CoreFoundation/CFDate.h>
|
||||
#include <webgpu/webgpu.h>
|
||||
#include "platform.h"
|
||||
|
||||
static CAMetalLayer* sMetalLayer = nullptr;
|
||||
static CFAbsoluteTime sStartTime = 0;
|
||||
static void (*sRenderCb)() = nullptr;
|
||||
static void (*sShutdownCb)() = nullptr;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Cocoa app — window, metal layer, render timer
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@interface AppDelegate : NSObject <NSApplicationDelegate, NSWindowDelegate>
|
||||
@property (strong) NSWindow* window;
|
||||
@property (strong) NSTimer* timer;
|
||||
@end
|
||||
|
||||
@implementation AppDelegate
|
||||
|
||||
- (void)applicationDidFinishLaunching:(NSNotification*)notification {
|
||||
// ~60 fps render loop
|
||||
self.timer = [NSTimer scheduledTimerWithTimeInterval:1.0 / 60.0
|
||||
target:self
|
||||
selector:@selector(tick:)
|
||||
userInfo:nil
|
||||
repeats:YES];
|
||||
[[NSRunLoop currentRunLoop] addTimer:self.timer forMode:NSRunLoopCommonModes];
|
||||
|
||||
[NSEvent addLocalMonitorForEventsMatchingMask:NSEventMaskKeyDown
|
||||
handler:^NSEvent*(NSEvent* event) {
|
||||
if (event.keyCode == 53) { // kVK_Escape
|
||||
[NSApp terminate:nil];
|
||||
return nil;
|
||||
}
|
||||
return event;
|
||||
}];
|
||||
|
||||
[self.window makeKeyAndOrderFront:nil];
|
||||
}
|
||||
|
||||
- (void)tick:(NSTimer*)t {
|
||||
if (sRenderCb) sRenderCb();
|
||||
}
|
||||
|
||||
- (BOOL)applicationShouldTerminateAfterLastWindowClosed:(NSApplication*)app {
|
||||
return YES;
|
||||
}
|
||||
|
||||
- (void)applicationWillTerminate:(NSNotification*)notification {
|
||||
[self.timer invalidate];
|
||||
if (sShutdownCb) sShutdownCb();
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Platform interface implementation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
bool platformInit(int width, int height, const char* title) {
|
||||
NSApplication* app = [NSApplication sharedApplication];
|
||||
[app setActivationPolicy:NSApplicationActivationPolicyRegular];
|
||||
|
||||
NSRect frame = NSMakeRect(200, 200, width, height);
|
||||
NSWindow* window = [[NSWindow alloc]
|
||||
initWithContentRect:frame
|
||||
styleMask:(NSWindowStyleMaskTitled |
|
||||
NSWindowStyleMaskClosable |
|
||||
NSWindowStyleMaskMiniaturizable)
|
||||
backing:NSBackingStoreBuffered
|
||||
defer:NO];
|
||||
[window setTitle:[NSString stringWithUTF8String:title]];
|
||||
|
||||
// Metal-backed layer
|
||||
NSView* contentView = [window contentView];
|
||||
[contentView setWantsLayer:YES];
|
||||
sMetalLayer = [CAMetalLayer layer];
|
||||
sMetalLayer.frame = contentView.bounds;
|
||||
sMetalLayer.contentsScale = [window backingScaleFactor];
|
||||
sMetalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm;
|
||||
[contentView.layer addSublayer:sMetalLayer];
|
||||
|
||||
AppDelegate* del = [[AppDelegate alloc] init];
|
||||
del.window = window;
|
||||
[app setDelegate:del];
|
||||
|
||||
sStartTime = CFAbsoluteTimeGetCurrent();
|
||||
return true;
|
||||
}
|
||||
|
||||
WGPUSurface platformCreateSurface(WGPUInstance instance) {
|
||||
WGPUSurfaceSourceMetalLayer metalSrc = {};
|
||||
metalSrc.chain.sType = WGPUSType_SurfaceSourceMetalLayer;
|
||||
metalSrc.layer = sMetalLayer;
|
||||
|
||||
WGPUSurfaceDescriptor surfDesc = {};
|
||||
surfDesc.nextInChain = (WGPUChainedStruct*)&metalSrc;
|
||||
return wgpuInstanceCreateSurface(instance, &surfDesc);
|
||||
}
|
||||
|
||||
double platformGetTime() {
|
||||
return CFAbsoluteTimeGetCurrent() - sStartTime;
|
||||
}
|
||||
|
||||
void platformRunLoop(void (*render)(), void (*shutdown)()) {
|
||||
sRenderCb = render;
|
||||
sShutdownCb = shutdown;
|
||||
@autoreleasepool {
|
||||
[[NSApplication sharedApplication] run];
|
||||
}
|
||||
}
|
||||
213
examples/webgpu/triangle/platform/platform_wayland.cpp
Normal file
213
examples/webgpu/triangle/platform/platform_wayland.cpp
Normal file
@@ -0,0 +1,213 @@
|
||||
// platform_wayland.cpp — Linux/Wayland backend
|
||||
//
|
||||
// Dependencies:
|
||||
// libwayland-client, wayland-protocols (for xdg-shell)
|
||||
//
|
||||
// Generate xdg-shell protocol glue before building:
|
||||
// XML=$(pkg-config --variable=pkgdatadir wayland-protocols)/stable/xdg-shell/xdg-shell.xml
|
||||
// wayland-scanner client-header $XML xdg-shell-client-protocol.h
|
||||
// wayland-scanner private-code $XML xdg-shell-protocol.c
|
||||
//
|
||||
// Compile flags (see spinning_triangle.cpp header for full invocation):
|
||||
// g++ -std=c++17 spinning_triangle.cpp platform_wayland.cpp \
|
||||
// xdg-shell-protocol.c \
|
||||
// -I/path/to/wgpu/include -L/path/to/wgpu/lib -lwgpu_native \
|
||||
// $(pkg-config --cflags --libs wayland-client) \
|
||||
// -o spinning_triangle
|
||||
|
||||
#include <wayland-client.h>
|
||||
#include "xdg-shell-client-protocol.h"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <time.h>
|
||||
#include <webgpu/webgpu.h>
|
||||
#include "platform.h"
|
||||
|
||||
static wl_display* sDisplay = nullptr;
|
||||
static wl_compositor* sCompositor = nullptr;
|
||||
static xdg_wm_base* sWmBase = nullptr;
|
||||
static wl_seat* sSeat = nullptr;
|
||||
static wl_keyboard* sKeyboard = nullptr;
|
||||
static wl_surface* sSurface = nullptr;
|
||||
static xdg_surface* sXdgSurface = nullptr;
|
||||
static xdg_toplevel* sToplevel = nullptr;
|
||||
static bool sConfigured = false;
|
||||
static bool sRunning = false;
|
||||
static struct timespec sStartTime = {};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// xdg_wm_base listener — ping/pong keepalive
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static void wmBasePing(void*, xdg_wm_base* wm, uint32_t serial) {
|
||||
xdg_wm_base_pong(wm, serial);
|
||||
}
|
||||
static const xdg_wm_base_listener kWmBaseListener = { wmBasePing };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// xdg_surface listener — acknowledge configure events
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static void xdgSurfaceConfigure(void*, xdg_surface* surf, uint32_t serial) {
|
||||
xdg_surface_ack_configure(surf, serial);
|
||||
sConfigured = true;
|
||||
}
|
||||
static const xdg_surface_listener kXdgSurfaceListener = { xdgSurfaceConfigure };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// xdg_toplevel listener — window close / resize
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static void toplevelClose(void*, xdg_toplevel*) {
|
||||
sRunning = false;
|
||||
}
|
||||
static void toplevelConfigure(void*, xdg_toplevel*, int32_t, int32_t, wl_array*) {}
|
||||
static const xdg_toplevel_listener kToplevelListener = { toplevelConfigure, toplevelClose };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Keyboard listener — Escape to quit
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static void kbdKeymap(void*, wl_keyboard*, uint32_t, int32_t, uint32_t) {}
|
||||
static void kbdEnter(void*, wl_keyboard*, uint32_t, wl_surface*, wl_array*) {}
|
||||
static void kbdLeave(void*, wl_keyboard*, uint32_t, wl_surface*) {}
|
||||
static void kbdKey(void*, wl_keyboard*, uint32_t, uint32_t, uint32_t key, uint32_t state) {
|
||||
// key 1 == KEY_ESC in Linux evdev (linux/input-event-codes.h)
|
||||
if (key == 1 && state == WL_KEYBOARD_KEY_STATE_PRESSED)
|
||||
sRunning = false;
|
||||
}
|
||||
static void kbdModifiers(void*, wl_keyboard*, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t) {}
|
||||
static void kbdRepeatInfo(void*, wl_keyboard*, int32_t, int32_t) {}
|
||||
static const wl_keyboard_listener kKbdListener = {
|
||||
kbdKeymap, kbdEnter, kbdLeave, kbdKey, kbdModifiers, kbdRepeatInfo
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// wl_seat listener — grab keyboard capability
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static void seatCapabilities(void*, wl_seat* seat, uint32_t caps) {
|
||||
if ((caps & WL_SEAT_CAPABILITY_KEYBOARD) && !sKeyboard) {
|
||||
sKeyboard = wl_seat_get_keyboard(seat);
|
||||
wl_keyboard_add_listener(sKeyboard, &kKbdListener, nullptr);
|
||||
} else if (!(caps & WL_SEAT_CAPABILITY_KEYBOARD) && sKeyboard) {
|
||||
wl_keyboard_release(sKeyboard);
|
||||
sKeyboard = nullptr;
|
||||
}
|
||||
}
|
||||
static void seatName(void*, wl_seat*, const char*) {}
|
||||
static const wl_seat_listener kSeatListener = { seatCapabilities, seatName };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Registry listener — bind global interfaces
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static void registryGlobal(void*, wl_registry* reg,
|
||||
uint32_t name, const char* iface, uint32_t ver) {
|
||||
if (strcmp(iface, wl_compositor_interface.name) == 0)
|
||||
sCompositor = (wl_compositor*)wl_registry_bind(reg, name, &wl_compositor_interface, 4);
|
||||
else if (strcmp(iface, xdg_wm_base_interface.name) == 0) {
|
||||
sWmBase = (xdg_wm_base*)wl_registry_bind(reg, name, &xdg_wm_base_interface, 1);
|
||||
xdg_wm_base_add_listener(sWmBase, &kWmBaseListener, nullptr);
|
||||
} else if (strcmp(iface, wl_seat_interface.name) == 0) {
|
||||
sSeat = (wl_seat*)wl_registry_bind(reg, name, &wl_seat_interface, 5);
|
||||
wl_seat_add_listener(sSeat, &kSeatListener, nullptr);
|
||||
}
|
||||
}
|
||||
static void registryGlobalRemove(void*, wl_registry*, uint32_t) {}
|
||||
static const wl_registry_listener kRegistryListener = { registryGlobal, registryGlobalRemove };
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Platform interface implementation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
bool platformInit(int width, int height, const char* title) {
|
||||
sDisplay = wl_display_connect(nullptr);
|
||||
if (!sDisplay) { fprintf(stderr, "Cannot connect to Wayland display\n"); return false; }
|
||||
|
||||
wl_registry* registry = wl_display_get_registry(sDisplay);
|
||||
wl_registry_add_listener(registry, &kRegistryListener, nullptr);
|
||||
|
||||
// Two roundtrips: first to enumerate globals, second for seat capabilities
|
||||
wl_display_roundtrip(sDisplay);
|
||||
wl_display_roundtrip(sDisplay);
|
||||
|
||||
if (!sCompositor) { fprintf(stderr, "No wl_compositor\n"); return false; }
|
||||
if (!sWmBase) { fprintf(stderr, "No xdg_wm_base\n"); return false; }
|
||||
|
||||
sSurface = wl_compositor_create_surface(sCompositor);
|
||||
sXdgSurface = xdg_wm_base_get_xdg_surface(sWmBase, sSurface);
|
||||
sToplevel = xdg_surface_get_toplevel(sXdgSurface);
|
||||
|
||||
xdg_surface_add_listener(sXdgSurface, &kXdgSurfaceListener, nullptr);
|
||||
xdg_toplevel_add_listener(sToplevel, &kToplevelListener, nullptr);
|
||||
xdg_toplevel_set_title(sToplevel, title);
|
||||
xdg_toplevel_set_app_id(sToplevel, "spinning_triangle");
|
||||
|
||||
wl_surface_commit(sSurface);
|
||||
|
||||
// Wait for the compositor to send the first configure
|
||||
while (!sConfigured) wl_display_dispatch(sDisplay);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &sStartTime);
|
||||
return true;
|
||||
}
|
||||
|
||||
WGPUSurface platformCreateSurface(WGPUInstance instance) {
|
||||
WGPUSurfaceSourceWaylandSurface waylandSrc = {};
|
||||
waylandSrc.chain.sType = WGPUSType_SurfaceSourceWaylandSurface;
|
||||
waylandSrc.display = sDisplay;
|
||||
waylandSrc.surface = sSurface;
|
||||
|
||||
WGPUSurfaceDescriptor surfDesc = {};
|
||||
surfDesc.nextInChain = (WGPUChainedStruct*)&waylandSrc;
|
||||
return wgpuInstanceCreateSurface(instance, &surfDesc);
|
||||
}
|
||||
|
||||
double platformGetTime() {
|
||||
struct timespec now;
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
return (double)(now.tv_sec - sStartTime.tv_sec)
|
||||
+ (double)(now.tv_nsec - sStartTime.tv_nsec) * 1e-9;
|
||||
}
|
||||
|
||||
void platformRunLoop(void (*render)(), void (*shutdown)()) {
|
||||
// Target ~16.67 ms per frame (60 fps)
|
||||
static const long kFrameNs = 1000000000L / 60;
|
||||
|
||||
sRunning = true;
|
||||
while (sRunning) {
|
||||
struct timespec frameStart;
|
||||
clock_gettime(CLOCK_MONOTONIC, &frameStart);
|
||||
|
||||
// Dispatch pending Wayland events without blocking
|
||||
if (wl_display_dispatch_pending(sDisplay) < 0) break;
|
||||
wl_display_flush(sDisplay);
|
||||
|
||||
if (sRunning) render();
|
||||
|
||||
// Sleep for the remainder of the frame budget
|
||||
struct timespec frameEnd;
|
||||
clock_gettime(CLOCK_MONOTONIC, &frameEnd);
|
||||
long elapsed = (frameEnd.tv_sec - frameStart.tv_sec) * 1000000000L
|
||||
+ (frameEnd.tv_nsec - frameStart.tv_nsec);
|
||||
long remaining = kFrameNs - elapsed;
|
||||
if (remaining > 0) {
|
||||
struct timespec ts = { 0, remaining };
|
||||
nanosleep(&ts, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
shutdown();
|
||||
|
||||
// Cleanup Wayland objects
|
||||
if (sKeyboard) { wl_keyboard_release(sKeyboard); sKeyboard = nullptr; }
|
||||
if (sToplevel) { xdg_toplevel_destroy(sToplevel); sToplevel = nullptr; }
|
||||
if (sXdgSurface) { xdg_surface_destroy(sXdgSurface); sXdgSurface = nullptr; }
|
||||
if (sSurface) { wl_surface_destroy(sSurface); sSurface = nullptr; }
|
||||
if (sWmBase) { xdg_wm_base_destroy(sWmBase); sWmBase = nullptr; }
|
||||
if (sSeat) { wl_seat_release(sSeat); sSeat = nullptr; }
|
||||
if (sCompositor) { wl_compositor_destroy(sCompositor); sCompositor = nullptr; }
|
||||
wl_display_disconnect(sDisplay);
|
||||
}
|
||||
135
examples/webgpu/triangle/platform/platform_windows.cpp
Normal file
135
examples/webgpu/triangle/platform/platform_windows.cpp
Normal file
@@ -0,0 +1,135 @@
|
||||
// platform_windows.cpp — Windows backend (Win32)
|
||||
//
|
||||
// Compile flags (MSVC, console subsystem):
|
||||
// cl /std:c++17 spinning_triangle.cpp platform_windows.cpp \
|
||||
// /I\path\to\wgpu\include \path\to\wgpu\lib\wgpu_native.lib \
|
||||
// user32.lib gdi32.lib /Fe:spinning_triangle.exe
|
||||
//
|
||||
// MinGW/Clang equivalent:
|
||||
// clang++ -std=c++17 spinning_triangle.cpp platform_windows.cpp \
|
||||
// -I/path/to/wgpu/include -L/path/to/wgpu/lib -lwgpu_native \
|
||||
// -luser32 -lgdi32 -o spinning_triangle.exe
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <webgpu/webgpu.h>
|
||||
#include <stdio.h>
|
||||
#include "platform.h"
|
||||
|
||||
#pragma comment(lib, "user32.lib")
|
||||
#pragma comment(lib, "gdi32.lib")
|
||||
#pragma comment(lib, "dxguid.lib") // Dawn: WKPDID_D3DDebugObjectName
|
||||
#pragma comment(lib, "OneCore") // Dawn: CompareObjectHandles
|
||||
#pragma comment(lib, "ntdll.lib") // wgpu-native: NtReadFile et al.
|
||||
|
||||
static HWND sHwnd = nullptr;
|
||||
static bool sRunning = false;
|
||||
static LARGE_INTEGER sFreq = {};
|
||||
static LARGE_INTEGER sStartTime = {};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Win32 window procedure
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static LRESULT CALLBACK wndProc(HWND hwnd, UINT msg, WPARAM wp, LPARAM lp) {
|
||||
switch (msg) {
|
||||
case WM_KEYDOWN:
|
||||
if (wp == VK_ESCAPE) { sRunning = false; return 0; }
|
||||
break;
|
||||
case WM_CLOSE:
|
||||
case WM_DESTROY:
|
||||
sRunning = false;
|
||||
PostQuitMessage(0);
|
||||
return 0;
|
||||
}
|
||||
return DefWindowProcA(hwnd, msg, wp, lp);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Platform interface implementation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
bool platformInit(int width, int height, const char* title) {
|
||||
WNDCLASSEXA wc = {};
|
||||
wc.cbSize = sizeof(wc);
|
||||
wc.style = CS_HREDRAW | CS_VREDRAW;
|
||||
wc.lpfnWndProc = wndProc;
|
||||
wc.hInstance = GetModuleHandleA(nullptr);
|
||||
wc.hCursor = LoadCursor(nullptr, IDC_ARROW);
|
||||
wc.lpszClassName = "SpinningTriangle";
|
||||
if (!RegisterClassExA(&wc)) {
|
||||
fprintf(stderr, "RegisterClassExA failed (%lu)\n", GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
// Adjust client area to match the requested dimensions
|
||||
RECT rect = { 0, 0, width, height };
|
||||
AdjustWindowRect(&rect, WS_OVERLAPPEDWINDOW & ~(WS_THICKFRAME | WS_MAXIMIZEBOX), FALSE);
|
||||
|
||||
sHwnd = CreateWindowExA(
|
||||
0, "SpinningTriangle", title,
|
||||
WS_OVERLAPPEDWINDOW & ~(WS_THICKFRAME | WS_MAXIMIZEBOX),
|
||||
CW_USEDEFAULT, CW_USEDEFAULT,
|
||||
rect.right - rect.left, rect.bottom - rect.top,
|
||||
nullptr, nullptr, GetModuleHandleA(nullptr), nullptr);
|
||||
if (!sHwnd) {
|
||||
fprintf(stderr, "CreateWindowExA failed (%lu)\n", GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
ShowWindow(sHwnd, SW_SHOW);
|
||||
UpdateWindow(sHwnd);
|
||||
|
||||
QueryPerformanceFrequency(&sFreq);
|
||||
QueryPerformanceCounter(&sStartTime);
|
||||
return true;
|
||||
}
|
||||
|
||||
WGPUSurface platformCreateSurface(WGPUInstance instance) {
|
||||
WGPUSurfaceSourceWindowsHWND hwndSrc = {};
|
||||
hwndSrc.chain.sType = WGPUSType_SurfaceSourceWindowsHWND;
|
||||
hwndSrc.hinstance = GetModuleHandleA(nullptr);
|
||||
hwndSrc.hwnd = sHwnd;
|
||||
|
||||
WGPUSurfaceDescriptor surfDesc = {};
|
||||
surfDesc.nextInChain = (WGPUChainedStruct*)&hwndSrc;
|
||||
return wgpuInstanceCreateSurface(instance, &surfDesc);
|
||||
}
|
||||
|
||||
double platformGetTime() {
|
||||
LARGE_INTEGER now;
|
||||
QueryPerformanceCounter(&now);
|
||||
return (double)(now.QuadPart - sStartTime.QuadPart) / (double)sFreq.QuadPart;
|
||||
}
|
||||
|
||||
void platformRunLoop(void (*render)(), void (*shutdown)()) {
|
||||
// Target ~16.67 ms per frame (60 fps)
|
||||
static const double kFrameTime = 1.0 / 60.0;
|
||||
|
||||
sRunning = true;
|
||||
while (sRunning) {
|
||||
double frameStart = platformGetTime();
|
||||
|
||||
// Drain the Win32 message queue
|
||||
MSG msg;
|
||||
while (PeekMessageA(&msg, nullptr, 0, 0, PM_REMOVE)) {
|
||||
if (msg.message == WM_QUIT) { sRunning = false; break; }
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessageA(&msg);
|
||||
}
|
||||
|
||||
if (sRunning) render();
|
||||
|
||||
// Sleep for the remainder of the frame budget
|
||||
double elapsed = platformGetTime() - frameStart;
|
||||
if (elapsed < kFrameTime) {
|
||||
DWORD ms = (DWORD)((kFrameTime - elapsed) * 1000.0);
|
||||
if (ms > 0) Sleep(ms);
|
||||
}
|
||||
}
|
||||
|
||||
shutdown();
|
||||
if (sHwnd) DestroyWindow(sHwnd);
|
||||
}
|
||||
364
examples/webgpu/triangle/spinning_triangle.cpp
Normal file
364
examples/webgpu/triangle/spinning_triangle.cpp
Normal file
@@ -0,0 +1,364 @@
|
||||
// spinning_triangle.cpp — platform-agnostic WebGPU spinning triangle demo.
|
||||
|
||||
#include "platform/platform.h"
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <webgpu/webgpu.h>
|
||||
|
||||
#include <tracy/Tracy.hpp>
|
||||
#include <tracy/TracyWebGPU.hpp>
|
||||
|
||||
#ifndef __TRACYWEBGPU_HPP__
|
||||
#define TracyWebGPUCtx void*
|
||||
#define TracyWebGPUSetupDevice(...)
|
||||
#define TracyWebGPUContext(...) nullptr
|
||||
#define TracyWebGPUContextName(...)
|
||||
#define TracyWebGPUNamedZone(...)
|
||||
#define TracyWebGPUCollect(...)
|
||||
#define TracyWebGPUDestroy(...)
|
||||
namespace tracy { struct WebGPUQueueCtx { static void SetupDevice(WGPUDeviceDescriptor) {} }; }
|
||||
#endif//__TRACYWEBGPU_HPP__
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Globals
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static const int kWidth = 800;
|
||||
static const int kHeight = 600;
|
||||
|
||||
static WGPUInstance gInstance = nullptr;
|
||||
static WGPUSurface gSurface = nullptr;
|
||||
static WGPUAdapter gAdapter = nullptr;
|
||||
static WGPUDevice gDevice = nullptr;
|
||||
static WGPUQueue gQueue = nullptr;
|
||||
static WGPURenderPipeline gPipeline = nullptr;
|
||||
static WGPUBuffer gUniformBuf = nullptr;
|
||||
static WGPUBindGroup gBindGroup = nullptr;
|
||||
|
||||
static TracyWebGPUCtx gTracyCtx = nullptr;
|
||||
|
||||
static WGPUTextureFormat gSurfaceFormat = WGPUTextureFormat_BGRA8Unorm;
|
||||
|
||||
// TODO: this can become platformError() instead
|
||||
int error(int code, const char* message) {
|
||||
fprintf(stderr, "ERROR: %s (code: %d)\n", message, code);
|
||||
return code;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WGSL shader — vertex colours baked in, rotation via a uniform float.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static const char* kShaderSource = R"(
|
||||
struct Uniforms {
|
||||
angle: f32,
|
||||
};
|
||||
@group(0) @binding(0) var<uniform> u: Uniforms;
|
||||
|
||||
struct VSOut {
|
||||
@builtin(position) pos: vec4f,
|
||||
@location(0) color: vec3f,
|
||||
};
|
||||
|
||||
@vertex
|
||||
fn vs_main(@builtin(vertex_index) vi: u32) -> VSOut {
|
||||
var positions = array<vec2f, 3>(
|
||||
vec2f( 0.0, 0.5),
|
||||
vec2f(-0.433, -0.25),
|
||||
vec2f( 0.433, -0.25),
|
||||
);
|
||||
var colors = array<vec3f, 3>(
|
||||
vec3f(1.0, 0.0, 0.0),
|
||||
vec3f(0.0, 1.0, 0.0),
|
||||
vec3f(0.0, 0.0, 1.0),
|
||||
);
|
||||
|
||||
let c = cos(u.angle);
|
||||
let s = sin(u.angle);
|
||||
let p = positions[vi];
|
||||
let rotated = vec2f(p.x * c - p.y * s, p.x * s + p.y * c);
|
||||
|
||||
var out: VSOut;
|
||||
out.pos = vec4f(rotated, 0.0, 1.0);
|
||||
out.color = colors[vi];
|
||||
return out;
|
||||
}
|
||||
|
||||
@fragment
|
||||
fn fs_main(@location(0) color: vec3f) -> @location(0) vec4f {
|
||||
return vec4f(color, 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Adapter / Device request callbacks (current wgpu-native API)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static void onAdapterReady(WGPURequestAdapterStatus status,
|
||||
WGPUAdapter adapter,
|
||||
WGPUStringView message,
|
||||
void* userdata1, void* /*userdata2*/) {
|
||||
if (status == WGPURequestAdapterStatus_Success) {
|
||||
*(WGPUAdapter*)userdata1 = adapter;
|
||||
} else {
|
||||
fprintf(stderr, "Adapter request failed: %.*s\n",
|
||||
(int)message.length, message.data);
|
||||
}
|
||||
}
|
||||
|
||||
static void onDeviceReady(WGPURequestDeviceStatus status,
|
||||
WGPUDevice device,
|
||||
WGPUStringView message,
|
||||
void* userdata1, void* /*userdata2*/) {
|
||||
if (status == WGPURequestDeviceStatus_Success) {
|
||||
*(WGPUDevice*)userdata1 = device;
|
||||
} else {
|
||||
fprintf(stderr, "Device request failed: %.*s\n",
|
||||
(int)message.length, message.data);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WebGPU init
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static int initWebGPU() {
|
||||
// Adapter
|
||||
WGPURequestAdapterOptions adapterOpts = {};
|
||||
adapterOpts.compatibleSurface = gSurface;
|
||||
|
||||
WGPURequestAdapterCallbackInfo adapterCB = {};
|
||||
adapterCB.mode = WGPUCallbackMode_AllowProcessEvents;
|
||||
adapterCB.callback = onAdapterReady;
|
||||
adapterCB.userdata1 = &gAdapter;
|
||||
wgpuInstanceRequestAdapter(gInstance, &adapterOpts, adapterCB);
|
||||
while (!gAdapter) { wgpuInstanceProcessEvents(gInstance); }
|
||||
if (!gAdapter) return error(11, "No adapter");
|
||||
|
||||
WGPUUncapturedErrorCallbackInfo errorCB = {};
|
||||
errorCB.callback = [](WGPUDevice const*, WGPUErrorType type,
|
||||
WGPUStringView message, void*, void*) {
|
||||
fprintf(stderr, "[WGPU ERROR] type=%d %.*s\n",
|
||||
(int)type, (int)message.length, message.data);
|
||||
};
|
||||
|
||||
WGPUDeviceDescriptor deviceDesc = {};
|
||||
deviceDesc.uncapturedErrorCallbackInfo = errorCB;
|
||||
|
||||
TracyWebGPUSetupDevice(deviceDesc);
|
||||
|
||||
WGPURequestDeviceCallbackInfo deviceCB = {};
|
||||
deviceCB.mode = WGPUCallbackMode_AllowProcessEvents;
|
||||
deviceCB.callback = onDeviceReady;
|
||||
deviceCB.userdata1 = &gDevice;
|
||||
wgpuAdapterRequestDevice(gAdapter, &deviceDesc, deviceCB);
|
||||
while (!gDevice) { wgpuInstanceProcessEvents(gInstance); }
|
||||
if (!gDevice) return error(12, "No device");
|
||||
|
||||
gQueue = wgpuDeviceGetQueue(gDevice);
|
||||
gTracyCtx = TracyWebGPUContext(gInstance, gDevice, gQueue);
|
||||
TracyWebGPUContextName(gTracyCtx, "WebGPU", 6);
|
||||
|
||||
// Configure surface
|
||||
WGPUSurfaceConfiguration config = {};
|
||||
config.device = gDevice;
|
||||
config.format = gSurfaceFormat;
|
||||
config.usage = WGPUTextureUsage_RenderAttachment;
|
||||
config.alphaMode = WGPUCompositeAlphaMode_Opaque;
|
||||
config.width = kWidth;
|
||||
config.height = kHeight;
|
||||
config.presentMode = WGPUPresentMode_Fifo;
|
||||
wgpuSurfaceConfigure(gSurface, &config);
|
||||
|
||||
// Shader module
|
||||
WGPUShaderSourceWGSL wgslSrc = {};
|
||||
wgslSrc.chain.sType = WGPUSType_ShaderSourceWGSL;
|
||||
wgslSrc.code = { kShaderSource, WGPU_STRLEN };
|
||||
|
||||
WGPUShaderModuleDescriptor smDesc = {};
|
||||
smDesc.nextInChain = (WGPUChainedStruct*)&wgslSrc;
|
||||
WGPUShaderModule shaderMod = wgpuDeviceCreateShaderModule(gDevice, &smDesc);
|
||||
|
||||
// Uniform buffer (one f32 for rotation angle)
|
||||
WGPUBufferDescriptor bufDesc = {};
|
||||
bufDesc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
|
||||
bufDesc.size = sizeof(float);
|
||||
gUniformBuf = wgpuDeviceCreateBuffer(gDevice, &bufDesc);
|
||||
|
||||
// Bind group layout + bind group
|
||||
WGPUBindGroupLayoutEntry bglEntry = {};
|
||||
bglEntry.binding = 0;
|
||||
bglEntry.visibility = WGPUShaderStage_Vertex;
|
||||
bglEntry.buffer.type = WGPUBufferBindingType_Uniform;
|
||||
bglEntry.buffer.minBindingSize = sizeof(float);
|
||||
|
||||
WGPUBindGroupLayoutDescriptor bglDesc = {};
|
||||
bglDesc.entryCount = 1;
|
||||
bglDesc.entries = &bglEntry;
|
||||
WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(gDevice, &bglDesc);
|
||||
|
||||
WGPUBindGroupEntry bgEntry = {};
|
||||
bgEntry.binding = 0;
|
||||
bgEntry.buffer = gUniformBuf;
|
||||
bgEntry.size = sizeof(float);
|
||||
|
||||
WGPUBindGroupDescriptor bgDesc = {};
|
||||
bgDesc.layout = bgl;
|
||||
bgDesc.entryCount = 1;
|
||||
bgDesc.entries = &bgEntry;
|
||||
gBindGroup = wgpuDeviceCreateBindGroup(gDevice, &bgDesc);
|
||||
|
||||
// Pipeline layout
|
||||
WGPUPipelineLayoutDescriptor plDesc = {};
|
||||
plDesc.bindGroupLayoutCount = 1;
|
||||
plDesc.bindGroupLayouts = &bgl;
|
||||
WGPUPipelineLayout pipelineLayout = wgpuDeviceCreatePipelineLayout(gDevice, &plDesc);
|
||||
|
||||
// Render pipeline
|
||||
WGPUColorTargetState colorTarget = {};
|
||||
colorTarget.format = gSurfaceFormat;
|
||||
colorTarget.writeMask = WGPUColorWriteMask_All;
|
||||
|
||||
WGPUFragmentState fragState = {};
|
||||
fragState.module = shaderMod;
|
||||
fragState.entryPoint = { "fs_main", WGPU_STRLEN };
|
||||
fragState.targetCount = 1;
|
||||
fragState.targets = &colorTarget;
|
||||
|
||||
WGPURenderPipelineDescriptor rpDesc = {};
|
||||
rpDesc.layout = pipelineLayout;
|
||||
rpDesc.vertex.module = shaderMod;
|
||||
rpDesc.vertex.entryPoint = { "vs_main", WGPU_STRLEN };
|
||||
rpDesc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
|
||||
rpDesc.multisample.count = 1;
|
||||
rpDesc.multisample.mask = 0xFFFFFFFF;
|
||||
rpDesc.fragment = &fragState;
|
||||
|
||||
gPipeline = wgpuDeviceCreateRenderPipeline(gDevice, &rpDesc);
|
||||
|
||||
// Cleanup intermediates
|
||||
wgpuShaderModuleRelease(shaderMod);
|
||||
wgpuPipelineLayoutRelease(pipelineLayout);
|
||||
wgpuBindGroupLayoutRelease(bgl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Frame rendering
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Returns the surface texture for the current frame, or {.texture=nullptr} on
|
||||
// a skippable condition (timeout, occlusion) or an error.
|
||||
static WGPUSurfaceTexture getWindowSurface() {
|
||||
WGPUSurfaceTexture surfTex = {};
|
||||
wgpuSurfaceGetCurrentTexture(gSurface, &surfTex);
|
||||
if (surfTex.status == WGPUSurfaceGetCurrentTextureStatus_SuccessOptimal ||
|
||||
surfTex.status == WGPUSurfaceGetCurrentTextureStatus_SuccessSuboptimal)
|
||||
return surfTex;
|
||||
|
||||
// Timeout and Occluded are normal OS events (window covered / on a different Space).
|
||||
bool silent = surfTex.status == WGPUSurfaceGetCurrentTextureStatus_Timeout;
|
||||
#ifdef WGPU_H_
|
||||
silent = silent || surfTex.status == (WGPUSurfaceGetCurrentTextureStatus)WGPUSurfaceGetCurrentTextureStatus_Occluded;
|
||||
#endif
|
||||
if (!silent)
|
||||
fprintf(stderr, "Failed to get surface texture (status %d)\n", surfTex.status);
|
||||
if (surfTex.texture) wgpuTextureRelease(surfTex.texture);
|
||||
surfTex.texture = nullptr;
|
||||
return surfTex;
|
||||
}
|
||||
|
||||
static void renderFrame() {
|
||||
ZoneScoped;
|
||||
|
||||
// Update rotation angle
|
||||
float angle = (float)platformGetTime();
|
||||
wgpuQueueWriteBuffer(gQueue, gUniformBuf, 0, &angle, sizeof(float));
|
||||
|
||||
WGPUSurfaceTexture surfTex = getWindowSurface();
|
||||
if (!surfTex.texture) return;
|
||||
|
||||
WGPUTextureView view = wgpuTextureCreateView(surfTex.texture, nullptr);
|
||||
|
||||
// Command encoder
|
||||
WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(gDevice, nullptr);
|
||||
|
||||
// Render pass
|
||||
WGPURenderPassColorAttachment colorAtt = {};
|
||||
colorAtt.view = view;
|
||||
colorAtt.loadOp = WGPULoadOp_Clear;
|
||||
colorAtt.storeOp = WGPUStoreOp_Store;
|
||||
colorAtt.clearValue = { 0.05, 0.05, 0.08, 1.0 };
|
||||
colorAtt.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
|
||||
|
||||
WGPURenderPassDescriptor passDesc = {};
|
||||
passDesc.colorAttachmentCount = 1;
|
||||
passDesc.colorAttachments = &colorAtt;
|
||||
|
||||
{
|
||||
ZoneScopedN("render-pass");
|
||||
TracyWebGPUNamedZone(gTracyCtx, tracyZone, encoder, passDesc, "triangle draw", true);
|
||||
WGPURenderPassEncoder pass = wgpuCommandEncoderBeginRenderPass(encoder, &passDesc);
|
||||
wgpuRenderPassEncoderSetPipeline(pass, gPipeline);
|
||||
wgpuRenderPassEncoderSetBindGroup(pass, 0, gBindGroup, 0, nullptr);
|
||||
wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
|
||||
wgpuRenderPassEncoderEnd(pass);
|
||||
wgpuRenderPassEncoderRelease(pass);
|
||||
}
|
||||
|
||||
// Submit
|
||||
WGPUCommandBuffer cmdBuf = wgpuCommandEncoderFinish(encoder, nullptr);
|
||||
wgpuQueueSubmit(gQueue, 1, &cmdBuf);
|
||||
|
||||
// Present
|
||||
wgpuSurfacePresent(gSurface);
|
||||
|
||||
// Process Events
|
||||
wgpuInstanceProcessEvents(gInstance);
|
||||
TracyWebGPUCollect(gTracyCtx);
|
||||
|
||||
// Cleanup
|
||||
wgpuCommandBufferRelease(cmdBuf);
|
||||
wgpuCommandEncoderRelease(encoder);
|
||||
wgpuTextureViewRelease(view);
|
||||
wgpuTextureRelease(surfTex.texture);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shutdown
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static void shutdown() {
|
||||
fprintf(stderr, "application is shutting down...\n");
|
||||
TracyWebGPUDestroy(gTracyCtx);
|
||||
if (gBindGroup) wgpuBindGroupRelease(gBindGroup);
|
||||
if (gUniformBuf) wgpuBufferRelease(gUniformBuf);
|
||||
if (gPipeline) wgpuRenderPipelineRelease(gPipeline);
|
||||
if (gQueue) wgpuQueueRelease(gQueue);
|
||||
if (gDevice) wgpuDeviceRelease(gDevice);
|
||||
if (gAdapter) wgpuAdapterRelease(gAdapter);
|
||||
if (gSurface) wgpuSurfaceRelease(gSurface);
|
||||
if (gInstance) wgpuInstanceRelease(gInstance);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// main
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (!platformInit(kWidth, kHeight, "WebGPU Spinning Triangle"))
|
||||
return 1;
|
||||
|
||||
gInstance = wgpuCreateInstance(nullptr);
|
||||
if (!gInstance) return error(2, "Failed to create WebGPU instance.");
|
||||
|
||||
gSurface = platformCreateSurface(gInstance);
|
||||
if (!gSurface) return error(3, "Failed to create surface.");
|
||||
|
||||
if (initWebGPU() != 0) return 4;
|
||||
|
||||
platformRunLoop(renderFrame, shutdown);
|
||||
return 0;
|
||||
}
|
||||
@@ -141,7 +141,7 @@ There's much more Tracy can do, which can be explored by carefully reading this
|
||||
\section{A quick look at Tracy Profiler}
|
||||
\label{quicklook}
|
||||
|
||||
Tracy is a real-time, nanosecond resolution \emph{hybrid frame and sampling profiler} that you can use for remote or embedded telemetry of games and other applications. It can profile CPU\footnote{Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as Rust, Zig, C\#, OCaml, Odin, etc.}, GPU\footnote{All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL.}, memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
||||
Tracy is a real-time, nanosecond resolution \emph{hybrid frame and sampling profiler} that you can use for remote or embedded telemetry of games and other applications. It can profile CPU\footnote{Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as Rust, Zig, C\#, OCaml, Odin, etc.}, GPU\footnote{All major graphics/compute APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL, CUDA, WebGPU.}, memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
||||
|
||||
While Tracy can perform statistical analysis of sampled call stack data, just like other \emph{statistical profilers} (such as VTune, perf, or Very Sleepy), it mainly focuses on manual markup of the source code. Such markup allows frame-by-frame inspection of the program execution. For example, you will be able to see exactly which functions are called, how much time they require, and how they interact with each other in a multi-threaded environment. In contrast, the statistical analysis may show you the hot spots in your code, but it cannot accurately pinpoint the underlying cause for semi-random frame stutter that may occur every couple of seconds.
|
||||
|
||||
@@ -1050,6 +1050,8 @@ Memory & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faXm
|
||||
GPU zones (OpenGL) & \faCheck & \faCheck & \faCheck & \faPoo & \faPoo & & \faXmark \\
|
||||
GPU zones (Vulkan) & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & & \faXmark \\
|
||||
GPU zones (Metal) & \faXmark & \faXmark & \faXmark & \faCheck\textsuperscript{\emph{b}} & \faCheck\textsuperscript{\emph{b}} & \faXmark & \faXmark \\
|
||||
GPU zones (CUDA) & \faCheck & \faCheck & \faXmark & \faXmark & \faXmark & \faQuestion & \faXmark \\
|
||||
GPU zones (WebGPU) & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faQuestion & \faQuestion \\
|
||||
Call stacks & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faXmark \\
|
||||
Symbol resolution & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck \\
|
||||
Crash handling & \faCheck & \faCheck & \faCheck & \faXmark & \faXmark & \faXmark & \faXmark \\
|
||||
@@ -1645,7 +1647,7 @@ To mark that a separate memory pool is to be tracked you should use the named ve
|
||||
\subsection{GPU profiling}
|
||||
\label{gpuprofiling}
|
||||
|
||||
Tracy provides bindings for profiling OpenGL, Vulkan, Direct3D 11, Direct3D 12, Metal, OpenCL and CUDA execution time on GPU.
|
||||
Tracy provides bindings for profiling OpenGL, Vulkan, Direct3D 11, Direct3D 12, Metal, OpenCL, CUDA and WebGPU execution time on GPU.
|
||||
|
||||
Note that the CPU and GPU timers may be unsynchronized unless you create a calibrated context, but the availability of calibrated contexts is limited. You can try to correct the desynchronization of uncalibrated contexts in the profiler's options (section~\ref{options}).
|
||||
|
||||
@@ -1785,6 +1787,16 @@ Unlike other GPU backends in Tracy, there is no need to call \texttt{TracyCUDACo
|
||||
|
||||
To stop profiling, call the \texttt{TracyCUDAStopProfiling(ctx)} macro.
|
||||
|
||||
\subsubsection{WebGPU}
|
||||
|
||||
WebGPU support is enabled by including the \texttt{public/tracy/TracyWebGPU.hpp} header file. Both major implementations of WebGPU (Dawn and wgpu-native) are supported.
|
||||
|
||||
Before creating the WebGPU device, make sure to call \texttt{TracyWebGPUSetupDevice()} to let Tracy request the necessary device features and extensions necessary for profiling. After the device is created, use the \texttt{TracyWebGPUContext()} macro to instantiate the necessary \texttt{WebGPUQueueCtx} object required for GPU instrumentation. The object should later be cleaned up with the \texttt{TracyWebGPUDestroy()} macro. To set a custom name for the context, use the \texttt{TracyWebGPUContextName()} macro.
|
||||
|
||||
To instrument a GPU zone, use the various \texttt{TracyWebGPU*Zone*()} macros. Note that WebGPU only offers command instrumentation at the "pass"-level. While command-level granularity is possible through implementation-specific WebGPU extensions, Tracy does not support it at the moment. Supply the corresponding WebGPU pass descriptor to the instrumentation macro \textit{before} creating the WebGPU pass encoder.
|
||||
|
||||
You are required to periodically collect the GPU events using the \texttt{TracyWebGPUCollect()} macro. Good places for collection are: after synchronous waits, after event processing \texttt{wgpuInstanceProcessEvents}, after present drawable calls (\texttt{wgpuSurfacePresent}), and inside the completion callback of command queues (\texttt{wgpuQueueOnSubmittedWorkDone}).
|
||||
|
||||
\subsubsection{ROCm}
|
||||
|
||||
On Linux, if rocprofiler-sdk is installed, tracy can automatically trace GPU dispatches and collect
|
||||
@@ -1818,13 +1830,13 @@ sudo amd-smi set -g 0 -l stable_std
|
||||
|
||||
Putting more than one GPU zone macro in a single scope features the same issue as with the \texttt{ZoneScoped} macros, described in section~\ref{multizone} (but this time the variable name is \texttt{\_\_\_tracy\_gpu\_zone}).
|
||||
|
||||
To solve this problem, in case of OpenGL use the \texttt{TracyGpuNamedZone} macro in place of \texttt{TracyGpuZone} (or the color variant). The same applies to Vulkan, Direct3D 11/12 and Metal -- replace \texttt{TracyVkZone} with \texttt{TracyVkNamedZone}, \texttt{TracyD3D11Zone}/\texttt{TracyD3D12Zone} with \texttt{TracyD3D11NamedZone}/\texttt{TracyD3D12NamedZone}, and \texttt{TracyMetalZone} with \texttt{TracyMetalNamedZone}.
|
||||
To solve this problem, in case of OpenGL use the \texttt{TracyGpuNamedZone} macro in place of \texttt{TracyGpuZone} (or the color variant). The same applies to Vulkan, Direct3D 11/12, Metal and WebGPU -- replace \texttt{TracyVkZone} with \texttt{TracyVkNamedZone}, \texttt{TracyD3D11Zone}/\texttt{TracyD3D12Zone} with \texttt{TracyD3D11NamedZone}/\texttt{TracyD3D12NamedZone}, \texttt{TracyMetalZone} with \texttt{TracyMetalNamedZone}, and \texttt{TracyWebGPUZone} with \texttt{TracyWebGPUNamedZone}.
|
||||
|
||||
Remember to provide your name for the created stack variable as the first parameter to the macros.
|
||||
|
||||
\subsubsection{Transient GPU zones}
|
||||
|
||||
Transient zones (see section~\ref{transientzones} for details) are available in OpenGL, Vulkan, and Direct3D 11/12 macros. Transient zones are not available for Metal at this moment.
|
||||
Transient zones (see section~\ref{transientzones} for details) are available in OpenGL, Vulkan, Direct3D 11/12 and WebGPU macros. Transient zones are not available for Metal at this moment.
|
||||
|
||||
\subsection{Fibers}
|
||||
\label{fibers}
|
||||
@@ -2041,6 +2053,20 @@ filesystem setup as the one used to run the tracy instrumented application).
|
||||
You can do path substitution with the \texttt{-p} option to perform any number of path
|
||||
substitions in order to use symbols located elsewhere.
|
||||
|
||||
By default symbol resolution is performed with the platform's native facility: the DbgHelp
|
||||
library on Windows, and the \texttt{addr2line} tool found in \texttt{PATH} elsewhere. You can
|
||||
override this with the \texttt{-a} option, passing the path to a custom
|
||||
\texttt{addr2line}-compatible tool (for instance an \texttt{addr2line} from a cross-compilation
|
||||
toolchain, or \texttt{llvm-addr2line}). The \texttt{-a} option works on all platforms, including
|
||||
Windows, and takes precedence over the platform default.
|
||||
|
||||
Extra arguments can be passed verbatim to the resolution tool with the \texttt{-A} option. Tracy
|
||||
records callstack frame offsets relative to the image base, but \texttt{addr2line}-compatible
|
||||
tools expect a full virtual address for images that have a non-zero preferred image base (such as
|
||||
PE on Windows or Mach-O on Apple). For these, pass \texttt{-A "--relative-address"} so that
|
||||
\texttt{llvm-addr2line} or \texttt{llvm-symbolizer} adds the image base back. ELF images need no
|
||||
such adjustment.
|
||||
|
||||
\begin{bclogo}[
|
||||
noborder=true,
|
||||
couleur=black!5,
|
||||
@@ -3832,7 +3858,7 @@ You will find the zones with locks and their associated threads on this combined
|
||||
The left-hand side \emph{index area} of the timeline view displays various labels (threads, locks), which can be categorized in the following way:
|
||||
|
||||
\begin{itemize}
|
||||
\item \emph{Light blue label} -- GPU context. Multi-threaded Vulkan, OpenCL, Direct3D 12 and Metal contexts are additionally split into separate threads.
|
||||
\item \emph{Light blue label} -- GPU context. Multi-threaded Vulkan, OpenCL, Direct3D 12, Metal and WebGPU contexts are additionally split into separate threads.
|
||||
\item \emph{Pink label} -- CPU data graph.
|
||||
\item \emph{White label} -- A CPU thread. It will be replaced by a bright red label in a thread that has crashed (section~\ref{crashhandling}). If automated sampling was performed, clicking the~\LMB{}~left mouse button on the \emph{\faGhost{}~ghost zones} button will switch zone display mode between 'instrumented' and 'ghost.'
|
||||
\item \emph{Green label} -- Fiber, coroutine, or any other sort of cooperative multitasking 'green thread.'
|
||||
@@ -3854,7 +3880,7 @@ In an example in figure~\ref{zoneslocks} you can see that there are two threads:
|
||||
|
||||
Meanwhile, the \emph{Streaming thread} is performing some \emph{Streaming jobs}. The first \emph{Streaming job} sent a message (section~\ref{messagelog}). In addition to being listed in the message log, it is indicated by a triangle over the thread separator. When multiple messages are in one place, the triangle outline shape changes to a filled triangle.
|
||||
|
||||
The GPU zones are displayed just like CPU zones, with an OpenGL/Vulkan/Direct3D/Metal/OpenCL context in place of a thread name.
|
||||
The GPU zones are displayed just like CPU zones, with an OpenGL/Vulkan/Direct3D/Metal/OpenCL/CUDA/WebGPU context in place of a thread name.
|
||||
|
||||
Hovering the \faArrowPointer{} mouse pointer over a zone will highlight all other zones that have the exact source location with a white outline. Clicking the \LMB{}~left mouse button on a zone will open the zone information window (section~\ref{zoneinfo}). Holding the \keys{\ctrl} key and clicking the \LMB{}~left mouse button on a zone will open the zone statistics window (section~\ref{findzone}). Clicking the \MMB{}~middle mouse button on a zone will zoom the view to the extent of the zone.
|
||||
|
||||
@@ -4063,7 +4089,7 @@ In this window, you can set various trace-related options. For example, the time
|
||||
\begin{itemize}
|
||||
\item \emph{\faSignature{} Draw CPU usage graph} -- You can disable drawing of the CPU usage graph here.
|
||||
\end{itemize}
|
||||
\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Metal/Direct3D/OpenCL zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets of uncalibrated contexts (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}.
|
||||
\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Metal/Direct3D/OpenCL/CUDA/WebGPU zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets of uncalibrated contexts (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}.
|
||||
\item \emph{\faMicrochip{} Draw CPU zones} -- Determines whether CPU zones are displayed.
|
||||
\begin{itemize}
|
||||
\item \emph{\faGhost{} Draw ghost zones} -- Controls if ghost zones should be displayed in threads which don't have any instrumented zones available.
|
||||
|
||||
@@ -149,6 +149,7 @@ Embed(PROFILER_FILES SystemPrompt src/llm/system.prompt.md)
|
||||
Embed(PROFILER_FILES SkillCallstack src/llm/skill.callstack.md)
|
||||
Embed(PROFILER_FILES SkillOptimization src/llm/skill.optimization.md)
|
||||
Embed(PROFILER_FILES ToolsJson src/llm/tools.json)
|
||||
|
||||
Embed(PROFILER_FILES FontFixed src/font/FiraCode-Retina.ttf)
|
||||
Embed(PROFILER_FILES FontIcons src/font/Font\ Awesome\ 7\ Free-Solid-900.otf)
|
||||
Embed(PROFILER_FILES FontNormal src/font/Roboto-Regular.ttf)
|
||||
@@ -156,8 +157,22 @@ Embed(PROFILER_FILES FontBold src/font/Roboto-Bold.ttf)
|
||||
Embed(PROFILER_FILES FontItalic src/font/Roboto-Italic.ttf)
|
||||
Embed(PROFILER_FILES FontBoldItalic src/font/Roboto-BoldItalic.ttf)
|
||||
Embed(PROFILER_FILES FontEmoji src/font/NotoEmoji-Regular.ttf)
|
||||
|
||||
Embed(PROFILER_FILES Manual ../manual/tracy.md)
|
||||
|
||||
Embed(PROFILER_FILES Text100Million src/achievements/100Million.md)
|
||||
Embed(PROFILER_FILES TextConnectToClient src/achievements/ConnectToClient.md)
|
||||
Embed(PROFILER_FILES TextFindZone src/achievements/FindZone.md)
|
||||
Embed(PROFILER_FILES TextFrameImages src/achievements/FrameImages.md)
|
||||
Embed(PROFILER_FILES TextGlobalSettings src/achievements/GlobalSettings.md)
|
||||
Embed(PROFILER_FILES TextInstrumentationIntro src/achievements/InstrumentationIntro.md)
|
||||
Embed(PROFILER_FILES TextInstrumentationStatistics src/achievements/InstrumentationStatistics.md)
|
||||
Embed(PROFILER_FILES TextInstrumentFrames src/achievements/InstrumentFrames.md)
|
||||
Embed(PROFILER_FILES TextIntro src/achievements/Intro.md)
|
||||
Embed(PROFILER_FILES TextLoadTrace src/achievements/LoadTrace.md)
|
||||
Embed(PROFILER_FILES TextSamplingIntro src/achievements/SamplingIntro.md)
|
||||
Embed(PROFILER_FILES TextSaveTrace src/achievements/SaveTrace.md)
|
||||
|
||||
set(INCLUDES "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
set(LIBS "")
|
||||
|
||||
|
||||
12
profiler/src/achievements/100Million.md
Normal file
12
profiler/src/achievements/100Million.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# It's over 100 million!
|
||||
|
||||
Tracy can handle a lot of data. How about 100 million zones in a single trace? Add a lot of zones to your program and see how it handles it!
|
||||
|
||||
Capturing a long-running profile trace is easy. Need to profile an hour of your program execution? You can do it.
|
||||
|
||||
Note that it doesn't make much sense to instrument every little function you might have. The cost of the instrumentation itself will be higher than the cost of the function in such a case.
|
||||
|
||||
> [!TIP]
|
||||
> Keep in mind that the more zones you have, the more memory and CPU time the profiler will use. Be careful not to run out of memory.
|
||||
>
|
||||
> To capture 100 million zones, you will need approximately 4 GB of RAM.
|
||||
10
profiler/src/achievements/ConnectToClient.md
Normal file
10
profiler/src/achievements/ConnectToClient.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# First profiling session
|
||||
|
||||
Let's start our adventure by instrumenting your application and connecting it to the profiler. Here's a quick refresher:
|
||||
|
||||
1. Integrate Tracy Profiler into your application. This can be done using CMake, Meson, or simply by adding the source files to your project.
|
||||
2. Make sure that `TracyClient.cpp` (or the Tracy library) is included in your build.
|
||||
3. Define `TRACY_ENABLE` in your build configuration, for the whole application. Do not do it in a single source file because it won't work.
|
||||
4. Start your application, and * Connect* to it with the profiler.
|
||||
|
||||
Please refer to the [user manual](https://github.com/wolfpld/tracy/releases) for more details.
|
||||
11
profiler/src/achievements/FindZone.md
Normal file
11
profiler/src/achievements/FindZone.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Find some zones
|
||||
|
||||
You can search for zones in the trace by opening the search window with the * Find zone* button on the top bar. It will ask you for the zone name, which in most cases will be the function name in the code.
|
||||
|
||||
The search may find more than one zone with the same name. A list of all the zones found is displayed, and you can select any of them.
|
||||
|
||||
Alternatively, you can open the Statistics window and click an entry there. This will open the Find zone window as if you had searched for that zone.
|
||||
|
||||
When a zone is selected, a number of statistics are displayed to help you understand the performance of your application. In addition, a histogram of the zone execution times is displayed to make it easier for you to determine the performance of the profiled code. Be sure to select a zone with a large number of calls to make the histogram look interesting!
|
||||
|
||||
Note that you can draw a range on the histogram to limit the number of entries displayed in the zone list below. This list allows you to examine each zone individually. There are also a number of zone groupings that you can select. Each group can be selected and the time associated with the selected group will be highlighted on the histogram.
|
||||
11
profiler/src/achievements/FrameImages.md
Normal file
11
profiler/src/achievements/FrameImages.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# A picture is worth a thousand words
|
||||
|
||||
Tracy allows you to add context to each frame, by attaching a screenshot. You can do this with the `FrameImage` macro.
|
||||
|
||||
You will have to do the screen capture and resizing yourself, which can be a bit complicated. The manual provides a sample code that shows how to do this in a performant way.
|
||||
|
||||
The frame images are displayed in the context of a frame, for example, when you hover over the frame in the timeline or in the frame graph at the top of the screen.
|
||||
|
||||
You can even view a recording of what your application was doing by clicking the * Tools* icon and then selecting the * Playback* option. Try it out!
|
||||
|
||||
The `FrameImage` macro is a great way to see what happened in your application at a particular time. Maybe you have a performance problem that only occurs when a certain object is on the screen?
|
||||
5
profiler/src/achievements/GlobalSettings.md
Normal file
5
profiler/src/achievements/GlobalSettings.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Global settings
|
||||
|
||||
Tracy has a variety of settings that can be adjusted to suit your needs. These settings can be found by clicking on the * Wrench* icon on the welcome screen. This will open the about window, where you can expand the * Global settings* menu.
|
||||
|
||||
The settings are saved between sessions, so you only need to set them once.
|
||||
22
profiler/src/achievements/InstrumentFrames.md
Normal file
22
profiler/src/achievements/InstrumentFrames.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Instrumenting frames
|
||||
|
||||
In addition to instrumenting functions, you can also instrument frames. This allows you to see how much time is spent in each frame of your application.
|
||||
|
||||
To instrument frames, you need to add the `FrameMark` macro at the beginning of each frame. This can be done in the main loop of your application, or in a separate function that is called at the beginning of each frame.
|
||||
|
||||
```c++
|
||||
#include "Tracy.hpp"
|
||||
|
||||
void Render()
|
||||
{
|
||||
// Render the frame
|
||||
SwapBuffers();
|
||||
FrameMark;
|
||||
}
|
||||
```
|
||||
|
||||
When you profile your application, you will see a new frame appear on the timeline each time the `FrameMark` macro is called. This allows you to see how much time is spent in each frame and how many frames are rendered per second.
|
||||
|
||||
The `FrameMark` macro is a great way to see at a glance how your application is performing over time. Maybe there are some performance problems that only appear after a few minutes of running the application? A frame graph is drawn at the top of the profiler window where you can see the timing of all frames.
|
||||
|
||||
Note that some applications do not have a frame-based structure, and in such cases, frame instrumentation may not be useful. That's ok.
|
||||
22
profiler/src/achievements/InstrumentationIntro.md
Normal file
22
profiler/src/achievements/InstrumentationIntro.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Instrumentating your application
|
||||
|
||||
Instrumentation is a powerful feature that allows you to see the exact runtime of each call to the selected set of functions. The downside is that it takes a bit of manual work to get it set up.
|
||||
|
||||
To get started, open a source file and include the `Tracy.hpp` header. This will give you access to a variety of macros provided by Tracy. Next, add the `ZoneScoped` macro to the beginning of one of your functions, like this:
|
||||
|
||||
```c++
|
||||
#include "Tracy.hpp"
|
||||
|
||||
void SomeFunction()
|
||||
{
|
||||
ZoneScoped;
|
||||
// Your code here
|
||||
}
|
||||
```
|
||||
|
||||
Now, when you profile your application, you will see a new zone appear on the timeline for each call to the function. This allows you to see how much time is spent in each call and how many times the function is called.
|
||||
|
||||
> [!NOTE]
|
||||
> The `ZoneScoped` macro is just one of the many macros provided by Tracy. See the documentation for more information.
|
||||
|
||||
The above description applies to C++ code, but things are done similarly in other programming languages. Refer to the documentation for your language for more information.
|
||||
5
profiler/src/achievements/InstrumentationStatistics.md
Normal file
5
profiler/src/achievements/InstrumentationStatistics.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Show me the stats!
|
||||
|
||||
Once you have instrumented your application, you can view the statistics for each zone in the timeline. This allows you to see how much time is spent in each zone and how many times it is called.
|
||||
|
||||
To view the statistics, click on the * Statistics* button on the top bar. This will open a new window with a list of all zones in the trace.
|
||||
12
profiler/src/achievements/Intro.md
Normal file
12
profiler/src/achievements/Intro.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# Click here to discover achievements!
|
||||
|
||||
Clicking on the * Achievements* button opens the Achievements List. Here you can see the tasks to be completed along with a short description of what needs to be done.
|
||||
|
||||
As you complete each Achievement, new Achievements will appear, so be sure to keep checking the list for new ones!
|
||||
|
||||
To make the new things easier to spot, the Achievements List will show a marker next to them. The achievements * Achievements* button will glow yellow when there are new things to see.
|
||||
|
||||
- New tasks: orange
|
||||
- Completed tasks: green
|
||||
|
||||
Good luck!
|
||||
3
profiler/src/achievements/LoadTrace.md
Normal file
3
profiler/src/achievements/LoadTrace.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Load a trace
|
||||
|
||||
You can open a previously saved trace file (or one received from a friend) with the * Open saved trace* button on the welcome screen.
|
||||
10
profiler/src/achievements/SamplingIntro.md
Normal file
10
profiler/src/achievements/SamplingIntro.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Sampling program execution
|
||||
|
||||
Sampling program execution is a great way to find out where the hot spots are in your program. It can be used to find out which functions take the most time, or which lines of code are executed the most often.
|
||||
|
||||
While instrumentation requires changes to your code, sampling does not. However, because of the way it works, the results are coarser and it's not possible to know when functions are called or when they return.
|
||||
|
||||
Sampling is automatic on Linux. On Windows, you must run the profiled application as an administrator for it to work.
|
||||
|
||||
> [!WARNING]
|
||||
> Depending on your system configuration, some additional steps may be required. Please refer to the user manual for more information.
|
||||
12
profiler/src/achievements/SaveTrace.md
Normal file
12
profiler/src/achievements/SaveTrace.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# Save a trace
|
||||
|
||||
Now that you have traced your application (or are in the process of doing so), you can save it to disk for future reference. You can do this by clicking on the * Connection* icon in the top left corner of the screen and then clicking on the * Save trace* button.
|
||||
|
||||
Keeping old traces on hand can be beneficial, as you can compare the performance of your optimizations with what you had before.
|
||||
|
||||
You can also share the trace with your friends or co-workers by sending them the trace file.
|
||||
|
||||
> [!WARNING]
|
||||
> **Warning**
|
||||
>
|
||||
> Trace files can contain sensitive information about your application, such as program code, or even the contents of source files. Be careful when sharing them with others.
|
||||
@@ -1466,9 +1466,17 @@ Would you like to enable achievements?
|
||||
{
|
||||
ImGui::Columns( 2 );
|
||||
ImGui::SetColumnWidth( 0, 300 * dpiScale );
|
||||
ImGui::BeginChild( "##achievementtoc", ImVec2( 0, 0 ), ImGuiChildFlags_AlwaysUseWindowPadding );
|
||||
DrawAchievements( c->items );
|
||||
ImGui::EndChild();
|
||||
ImGui::NextColumn();
|
||||
if( s_achievementItem ) s_achievementItem->description();
|
||||
ImGui::BeginChild( "##achievementtext", ImVec2( 0, 0 ), ImGuiChildFlags_AlwaysUseWindowPadding );
|
||||
if( s_achievementItem )
|
||||
{
|
||||
tracy::Markdown md( nullptr, nullptr );
|
||||
md.Print( s_achievementItem->text.c_str(), s_achievementItem->text.size() );
|
||||
}
|
||||
ImGui::EndChild();
|
||||
ImGui::EndColumns();
|
||||
ImGui::EndTabItem();
|
||||
}
|
||||
|
||||
@@ -1,52 +1,60 @@
|
||||
#include "IconsFontAwesome7.h"
|
||||
#include "TracyAchievements.hpp"
|
||||
#include "TracyImGui.hpp"
|
||||
#include "TracySourceContents.hpp"
|
||||
#include "TracyWeb.hpp"
|
||||
#include "../Fonts.hpp"
|
||||
#include "TracyEmbed.hpp"
|
||||
|
||||
#include "data/Text100Million.hpp"
|
||||
#include "data/TextConnectToClient.hpp"
|
||||
#include "data/TextFindZone.hpp"
|
||||
#include "data/TextFrameImages.hpp"
|
||||
#include "data/TextGlobalSettings.hpp"
|
||||
#include "data/TextInstrumentFrames.hpp"
|
||||
#include "data/TextInstrumentationIntro.hpp"
|
||||
#include "data/TextInstrumentationStatistics.hpp"
|
||||
#include "data/TextIntro.hpp"
|
||||
#include "data/TextLoadTrace.hpp"
|
||||
#include "data/TextSamplingIntro.hpp"
|
||||
#include "data/TextSaveTrace.hpp"
|
||||
|
||||
namespace tracy::data
|
||||
{
|
||||
|
||||
AchievementItem ai_samplingIntro = { "samplingIntro", "Sampling program execution", [](){
|
||||
ImGui::TextWrapped( "Sampling program execution is a great way to find out where the hot spots are in your program. It can be used to find out which functions take the most time, or which lines of code are executed the most often." );
|
||||
ImGui::TextWrapped( "While instrumentation requires changes to your code, sampling does not. However, because of the way it works, the results are coarser and it's not possible to know when functions are called or when they return." );
|
||||
ImGui::TextWrapped( "Sampling is automatic on Linux. On Windows, you must run the profiled application as an administrator for it to work." );
|
||||
ImGui::PushFont( g_fonts.normal, FontSmall );
|
||||
ImGui::PushStyleColor( ImGuiCol_Text, GImGui->Style.Colors[ImGuiCol_TextDisabled] );
|
||||
ImGui::TextWrapped( "Depending on your system configuration, some additional steps may be required. Please refer to the user manual for more information." );
|
||||
ImGui::PopStyleColor();
|
||||
ImGui::PopFont();
|
||||
} };
|
||||
static std::string UnpackImpl( size_t size, size_t lz4Size, const uint8_t* data )
|
||||
{
|
||||
std::string ret;
|
||||
const EmbedData unembed( size, lz4Size, data );
|
||||
ret.assign( unembed.data(), unembed.size() );
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define Unpack( name ) UnpackImpl( Embed::name##Size, Embed::name##Lz4Size, Embed::name##Data )
|
||||
|
||||
|
||||
AchievementItem ai_samplingIntro = {
|
||||
.id = "samplingIntro",
|
||||
.name = "Sampling program execution",
|
||||
.text = Unpack( TextSamplingIntro ),
|
||||
};
|
||||
|
||||
AchievementItem* ac_samplingItems[] = { &ai_samplingIntro, nullptr };
|
||||
AchievementCategory ac_sampling = { "sampling", "Sampling", ac_samplingItems };
|
||||
|
||||
|
||||
AchievementItem ai_100million = { "100million", "It's over 100 million!", [](){
|
||||
ImGui::TextWrapped( "Tracy can handle a lot of data. How about 100 million zones in a single trace? Add a lot of zones to your program and see how it handles it!" );
|
||||
ImGui::TextWrapped( "Capturing a long-running profile trace is easy. Need to profile an hour of your program execution? You can do it." );
|
||||
ImGui::TextWrapped( "Note that it doesn't make much sense to instrument every little function you might have. The cost of the instrumentation itself will be higher than the cost of the function in such a case." );
|
||||
ImGui::PushFont( g_fonts.normal, FontSmall );
|
||||
ImGui::PushStyleColor( ImGuiCol_Text, GImGui->Style.Colors[ImGuiCol_TextDisabled] );
|
||||
ImGui::TextWrapped( "Keep in mind that the more zones you have, the more memory and CPU time the profiler will use. Be careful not to run out of memory." );
|
||||
ImGui::TextWrapped( "To capture 100 million zones, you will need approximately 4 GB of RAM." );
|
||||
ImGui::PopStyleColor();
|
||||
ImGui::PopFont();
|
||||
} };
|
||||
AchievementItem ai_100million = {
|
||||
.id = "100million",
|
||||
.name = "It's over 100 million!",
|
||||
.text = Unpack( Text100Million )
|
||||
};
|
||||
|
||||
AchievementItem ai_instrumentationStatistics = { "instrumentationStatistics", "Show me the stats!", [](){
|
||||
ImGui::TextWrapped( "Once you have instrumented your application, you can view the statistics for each zone in the timeline. This allows you to see how much time is spent in each zone and how many times it is called." );
|
||||
ImGui::TextWrapped( "To view the statistics, click on the \"" ICON_FA_ARROW_UP_WIDE_SHORT " Statistics\" button on the top bar. This will open a new window with a list of all zones in the trace." );
|
||||
} };
|
||||
AchievementItem ai_instrumentationStatistics = {
|
||||
.id = "instrumentationStatistics",
|
||||
.name = "Show me the stats!",
|
||||
.text = Unpack( TextInstrumentationStatistics )
|
||||
};
|
||||
|
||||
AchievementItem ai_findZone = { "findZone", "Find some zones", [](){
|
||||
ImGui::TextWrapped( "You can search for zones in the trace by opening the search window with the \"" ICON_FA_MAGNIFYING_GLASS " Find zone\" button on the top bar. It will ask you for the zone name, which in most cases will be the function name in the code." );
|
||||
ImGui::TextWrapped( "The search may find more than one zone with the same name. A list of all the zones found is displayed, and you can select any of them." );
|
||||
ImGui::TextWrapped( "Alternatively, you can open the Statistics window and click an entry there. This will open the Find zone window as if you had searched for that zone." );
|
||||
ImGui::TextWrapped( "When a zone is selected, a number of statistics are displayed to help you understand the performance of your application. In addition, a histogram of the zone execution times is displayed to make it easier for you to determine the performance of the profiled code. Be sure to select a zone with a large number of calls to make the histogram look interesting!" );
|
||||
ImGui::TextWrapped( "Note that you can draw a range on the histogram to limit the number of entries displayed in the zone list below. This list allows you to examine each zone individually. There are also a number of zone groupings that you can select. Each group can be selected and the time associated with the selected group will be highlighted on the histogram." );
|
||||
} };
|
||||
AchievementItem ai_findZone = {
|
||||
.id = "findZone",
|
||||
.name = "Find some zones",
|
||||
.text = Unpack( TextFindZone )
|
||||
};
|
||||
|
||||
AchievementItem* ac_instrumentationIntroItems[] = {
|
||||
&ai_100million,
|
||||
@@ -55,90 +63,46 @@ AchievementItem* ac_instrumentationIntroItems[] = {
|
||||
nullptr
|
||||
};
|
||||
|
||||
AchievementItem ai_instrumentationIntro = { "instrumentationIntro", "Instrumentating your application", [](){
|
||||
constexpr const char* src = R"(#include "Tracy.hpp"
|
||||
AchievementItem ai_instrumentationIntro = {
|
||||
.id = "instrumentationIntro",
|
||||
.name = "Instrumentating your application",
|
||||
.text = Unpack( TextInstrumentationIntro ),
|
||||
.items = ac_instrumentationIntroItems
|
||||
};
|
||||
|
||||
void SomeFunction()
|
||||
{
|
||||
ZoneScoped;
|
||||
// Your code here
|
||||
}
|
||||
)";
|
||||
|
||||
static SourceContents sc;
|
||||
sc.Parse( src );
|
||||
|
||||
ImGui::TextWrapped( "Instrumentation is a powerful feature that allows you to see the exact runtime of each call to the selected set of functions. The downside is that it takes a bit of manual work to get it set up." );
|
||||
ImGui::TextWrapped( "To get started, open a source file and include the Tracy.hpp header. This will give you access to a variety of macros provided by Tracy. Next, add the ZoneScoped macro to the beginning of one of your functions, like this:" );
|
||||
ImGui::PushFont( g_fonts.mono, FontNormal );
|
||||
PrintSource( sc.get() );
|
||||
ImGui::PopFont();
|
||||
ImGui::TextWrapped( "Now, when you profile your application, you will see a new zone appear on the timeline for each call to the function. This allows you to see how much time is spent in each call and how many times the function is called." );
|
||||
ImGui::PushFont( g_fonts.normal, FontSmall );
|
||||
ImGui::PushStyleColor( ImGuiCol_Text, GImGui->Style.Colors[ImGuiCol_TextDisabled] );
|
||||
ImGui::TextWrapped( "Note: The ZoneScoped macro is just one of the many macros provided by Tracy. See the documentation for more information." );
|
||||
ImGui::TextWrapped( "The above description applies to C++ code, but things are done similarly in other programming languages. Refer to the documentation for your language for more information." );
|
||||
ImGui::PopStyleColor();
|
||||
ImGui::PopFont();
|
||||
}, ac_instrumentationIntroItems };
|
||||
|
||||
AchievementItem ai_frameImages = { "frameImages", "A picture is worth a thousand words", [](){
|
||||
ImGui::TextWrapped( "Tracy allows you to add context to each frame, by attaching a screenshot. You can do this with the FrameImage macro." );
|
||||
ImGui::TextWrapped( "You will have to do the screen capture and resizing yourself, which can be a bit complicated. The manual provides a sample code that shows how to do this in a performant way." );
|
||||
ImGui::TextWrapped( "The frame images are displayed in the context of a frame, for example, when you hover over the frame in the timeline or in the frame graph at the top of the screen." );
|
||||
ImGui::TextWrapped( "You can even view a recording of what your application was doing by clicking the " ICON_FA_SCREWDRIVER_WRENCH " icon and then selecting the \"" ICON_FA_PLAY " Playback\" option. Try it out!" );
|
||||
ImGui::TextWrapped( "The FrameImage macro is a great way to see what happened in your application at a particular time. Maybe you have a performance problem that only occurs when a certain object is on the screen?" );
|
||||
} };
|
||||
AchievementItem ai_frameImages = {
|
||||
.id = "frameImages",
|
||||
.name = "A picture is worth a thousand words",
|
||||
.text = Unpack( TextFrameImages )
|
||||
};
|
||||
|
||||
AchievementItem* ac_instrumentFramesItems[] = {
|
||||
&ai_frameImages,
|
||||
nullptr
|
||||
};
|
||||
|
||||
AchievementItem ai_instrumentFrames = { "instrumentFrames", "Instrumenting frames", [](){
|
||||
constexpr const char* src = R"(#include "Tracy.hpp"
|
||||
|
||||
void Render()
|
||||
{
|
||||
// Render the frame
|
||||
SwapBuffers();
|
||||
FrameMark;
|
||||
}
|
||||
)";
|
||||
|
||||
static SourceContents sc;
|
||||
sc.Parse( src );
|
||||
|
||||
ImGui::TextWrapped( "In addition to instrumenting functions, you can also instrument frames. This allows you to see how much time is spent in each frame of your application." );
|
||||
ImGui::TextWrapped( "To instrument frames, you need to add the FrameMark macro at the beginning of each frame. This can be done in the main loop of your application, or in a separate function that is called at the beginning of each frame." );
|
||||
ImGui::PushFont( g_fonts.mono, FontNormal );
|
||||
PrintSource( sc.get() );
|
||||
ImGui::PopFont();
|
||||
ImGui::TextWrapped( "When you profile your application, you will see a new frame appear on the timeline each time the FrameMark macro is called. This allows you to see how much time is spent in each frame and how many frames are rendered per second." );
|
||||
ImGui::TextWrapped( "The FrameMark macro is a great way to see at a glance how your application is performing over time. Maybe there are some performance problems that only appear after a few minutes of running the application? A frame graph is drawn at the top of the profiler window where you can see the timing of all frames." );
|
||||
ImGui::TextWrapped( "Note that some applications do not have a frame-based structure, and in such cases, frame instrumentation may not be useful. That's ok." );
|
||||
}, ac_instrumentFramesItems };
|
||||
AchievementItem ai_instrumentFrames = {
|
||||
.id = "instrumentFrames",
|
||||
.name = "Instrumenting frames",
|
||||
.text = Unpack( TextInstrumentFrames ),
|
||||
.items = ac_instrumentFramesItems
|
||||
};
|
||||
|
||||
AchievementItem* ac_instrumentationItems[] = { &ai_instrumentationIntro, &ai_instrumentFrames, nullptr };
|
||||
AchievementCategory ac_instrumentation = { "instrumentation", "Instrumentation", ac_instrumentationItems };
|
||||
|
||||
|
||||
AchievementItem ai_loadTrace = { "loadTrace", "Load a trace", [](){
|
||||
ImGui::TextWrapped( "You can open a previously saved trace file (or one received from a friend) with the \"" ICON_FA_FOLDER_OPEN " Open saved trace\" button on the welcome screen." );
|
||||
} };
|
||||
AchievementItem ai_loadTrace = {
|
||||
.id = "loadTrace",
|
||||
.name = "Load a trace",
|
||||
.text = Unpack( TextLoadTrace )
|
||||
};
|
||||
|
||||
AchievementItem ai_saveTrace = { "saveTrace", "Save a trace", [](){
|
||||
ImGui::TextWrapped( "Now that you have traced your application (or are in the process of doing so), you can save it to disk for future reference. You can do this by clicking on the " ICON_FA_WIFI " icon in the top left corner of the screen and then clicking on the \"" ICON_FA_FLOPPY_DISK " Save trace\" button." );
|
||||
ImGui::TextWrapped( "Keeping old traces on hand can be beneficial, as you can compare the performance of your optimizations with what you had before." );
|
||||
ImGui::TextWrapped( "You can also share the trace with your friends or co-workers by sending them the trace file." );
|
||||
ImGui::Spacing();
|
||||
tracy::TextColoredUnformatted( 0xFF44FFFF, ICON_FA_TRIANGLE_EXCLAMATION );
|
||||
ImGui::SameLine();
|
||||
ImGui::TextUnformatted( "Warning" );
|
||||
ImGui::SameLine();
|
||||
tracy::TextColoredUnformatted( 0xFF44FFFF, ICON_FA_TRIANGLE_EXCLAMATION );
|
||||
ImGui::TextWrapped( "Trace files can contain sensitive information about your application, such as program code, or even the contents of source files. Be careful when sharing them with others." );
|
||||
} };
|
||||
AchievementItem ai_saveTrace = {
|
||||
.id = "saveTrace",
|
||||
.name = "Save a trace",
|
||||
.text = Unpack( TextSaveTrace )
|
||||
};
|
||||
|
||||
AchievementItem* ac_connectToServerItems[] = {
|
||||
&ai_saveTrace,
|
||||
@@ -152,23 +116,19 @@ AchievementItem* ac_connectToServerUnlock[] = {
|
||||
nullptr
|
||||
};
|
||||
|
||||
AchievementItem ai_connectToServer = { "connectToClient", "First profiling session", [](){
|
||||
ImGui::TextWrapped( "Let's start our adventure by instrumenting your application and connecting it to the profiler. Here's a quick refresher:" );
|
||||
ImGui::TextWrapped( " 1. Integrate Tracy Profiler into your application. This can be done using CMake, Meson, or simply by adding the source files to your project." );
|
||||
ImGui::TextWrapped( " 2. Make sure that TracyClient.cpp (or the Tracy library) is included in your build." );
|
||||
ImGui::TextWrapped( " 3. Define TRACY_ENABLE in your build configuration, for the whole application. Do not do it in a single source file because it won't work." );
|
||||
ImGui::TextWrapped( " 4. Start your application, and \"" ICON_FA_WIFI " Connect\" to it with the profiler." );
|
||||
ImGui::TextWrapped( "Please refer to the user manual for more details." );
|
||||
if( ImGui::SmallButton( "Download the user manual" ) )
|
||||
{
|
||||
tracy::OpenWebpage( "https://github.com/wolfpld/tracy/releases" );
|
||||
}
|
||||
}, ac_connectToServerItems, ac_connectToServerUnlock };
|
||||
AchievementItem ai_connectToServer = {
|
||||
.id = "connectToClient",
|
||||
.name = "First profiling session",
|
||||
.text = Unpack( TextConnectToClient ),
|
||||
.items = ac_connectToServerItems,
|
||||
.unlocks = ac_connectToServerUnlock
|
||||
};
|
||||
|
||||
AchievementItem ai_globalSettings = { "globalSettings", "Global settings", [](){
|
||||
ImGui::TextWrapped( "Tracy has a variety of settings that can be adjusted to suit your needs. These settings can be found by clicking on the " ICON_FA_WRENCH " icon on the welcome screen. This will open the about window, where you can expand the \"" ICON_FA_TOOLBOX " Global settings\" menu." );
|
||||
ImGui::TextWrapped( "The settings are saved between sessions, so you only need to set them once." );
|
||||
} };
|
||||
AchievementItem ai_globalSettings = {
|
||||
.id = "globalSettings",
|
||||
.name = "Global settings",
|
||||
.text = Unpack( TextGlobalSettings )
|
||||
};
|
||||
|
||||
AchievementItem* ac_achievementsIntroItems[] = {
|
||||
&ai_connectToServer,
|
||||
@@ -176,18 +136,14 @@ AchievementItem* ac_achievementsIntroItems[] = {
|
||||
nullptr
|
||||
};
|
||||
|
||||
AchievementItem ai_achievementsIntro = { "achievementsIntro", "Click here to discover achievements!", [](){
|
||||
ImGui::TextWrapped( "Clicking on the " ICON_FA_STAR " button opens the Achievements List. Here you can see the tasks to be completed along with a short description of what needs to be done." );
|
||||
ImGui::TextWrapped( "As you complete each Achievement, new Achievements will appear, so be sure to keep checking the list for new ones!" );
|
||||
ImGui::TextWrapped( "To make the new things easier to spot, the Achievements List will show a marker next to them. The achievements " ICON_FA_STAR " button will glow yellow when there are new things to see." );
|
||||
ImGui::TextUnformatted( "New tasks:" );
|
||||
ImGui::SameLine();
|
||||
TextColoredUnformatted( 0xFF4488FF, ICON_FA_CIRCLE_EXCLAMATION );
|
||||
ImGui::TextUnformatted( "Completed tasks:" );
|
||||
ImGui::SameLine();
|
||||
TextColoredUnformatted( 0xFF44FF44, ICON_FA_CIRCLE_CHECK );
|
||||
ImGui::TextWrapped( "Good luck!" );
|
||||
}, ac_achievementsIntroItems, nullptr, true, 1 };
|
||||
AchievementItem ai_achievementsIntro = {
|
||||
.id = "achievementsIntro",
|
||||
.name = "Click here to discover achievements!",
|
||||
.text = Unpack( TextIntro ),
|
||||
.items = ac_achievementsIntroItems,
|
||||
.keepOpen = true,
|
||||
.unlockTime = 1
|
||||
};
|
||||
|
||||
AchievementItem* ac_firstStepsItems[] = { &ai_achievementsIntro, nullptr };
|
||||
AchievementCategory ac_firstSteps = { "firstSteps", "First steps", ac_firstStepsItems, 1 };
|
||||
|
||||
@@ -20,7 +20,7 @@ struct AchievementItem
|
||||
{
|
||||
const char* id;
|
||||
const char* name;
|
||||
void(*description)();
|
||||
std::string text;
|
||||
AchievementItem** items;
|
||||
AchievementItem** unlocks;
|
||||
bool keepOpen;
|
||||
|
||||
@@ -49,7 +49,8 @@ constexpr const char* GpuContextNames[] = {
|
||||
"Metal",
|
||||
"Custom",
|
||||
"CUDA",
|
||||
"Rocprof"
|
||||
"Rocprof",
|
||||
"WebGPU"
|
||||
};
|
||||
|
||||
struct MemoryPage;
|
||||
|
||||
@@ -25,6 +25,8 @@ void View::DrawManual()
|
||||
ImGui::PopStyleColor();
|
||||
ImGui::SameLine();
|
||||
TextDisabledUnformatted( "This user manual is missing features. See the PDF file for the proper version." );
|
||||
ImGui::SameLine();
|
||||
if( ImGui::Button( ICON_FA_BOOK " PDF Manual" ) ) OpenWebpage( "https://github.com/wolfpld/tracy/releases" );
|
||||
|
||||
ImGui::Separator();
|
||||
ImGui::BeginChild( "##usermanual" );
|
||||
|
||||
@@ -492,7 +492,8 @@ enum class GpuContextType : uint8_t
|
||||
Metal,
|
||||
Custom,
|
||||
CUDA,
|
||||
Rocprof
|
||||
Rocprof,
|
||||
WebGPU
|
||||
};
|
||||
|
||||
enum GpuContextFlags : uint8_t
|
||||
|
||||
957
public/tracy/TracyWebGPU.hpp
Normal file
957
public/tracy/TracyWebGPU.hpp
Normal file
@@ -0,0 +1,957 @@
|
||||
#ifndef __TRACYWEBGPU_HPP__
|
||||
#define __TRACYWEBGPU_HPP__
|
||||
|
||||
// WebGPU, unlike other graphics APIs, has many annoying restrictions that complicate
|
||||
// the design of the Tracy WebGPU back-end:
|
||||
// - there's no CPU/GPU clock calibration API
|
||||
// - submitting GPU commands that touch a buffer that the host is mapping is not permitted
|
||||
// - resolving timestamps require destination offsets aligned to 256 bytes
|
||||
// - timestamps are only available at pass granularity (implementations may need to emulate this)
|
||||
// - spec mandates timestamps to be in nanoseconds (implementationw may need to emulate this)
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyWebGPUSetupDevice(deviceDescriptor)
|
||||
|
||||
#define TracyWebGPUContext(instance, device, queue) nullptr
|
||||
#define TracyWebGPUDestroy(ctx)
|
||||
#define TracyWebGPUContextName(ctx, name, size)
|
||||
|
||||
#define TracyWebGPUZone(ctx, encoder, passDesc, name)
|
||||
#define TracyWebGPUZoneC(ctx, encoder, passDesc, name, color)
|
||||
#define TracyWebGPUNamedZone(ctx, varname, encoder, passDesc, name, active)
|
||||
#define TracyWebGPUNamedZoneC(ctx, varname, encoder, passDesc, name, color, active)
|
||||
#define TracyWebGPUZoneTransient(ctx, varname, encoder, passDesc, name, active)
|
||||
|
||||
#define TracyWebGPUZoneS(ctx, encoder, passDesc, name, depth)
|
||||
#define TracyWebGPUZoneCS(ctx, encoder, passDesc, name, color, depth)
|
||||
#define TracyWebGPUNamedZoneS(ctx, varname, encoder, passDesc, name, depth, active)
|
||||
#define TracyWebGPUNamedZoneCS(ctx, varname, encoder, passDesc, name, color, depth, active)
|
||||
#define TracyWebGPUZoneTransientS(ctx, varname, encoder, passDesc, name, depth, active)
|
||||
|
||||
#define TracyWebGPUCollect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class WebGPUZoneScope {};
|
||||
}
|
||||
|
||||
using TracyWebGPUCtx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
#include "../client/TracyCallstack.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
#include <webgpu/webgpu.h>
|
||||
|
||||
// piggy-back on WGPU_DAWN_TOGGLES_DESCRIPTOR_INIT to detect Dawn header
|
||||
#ifdef WGPU_DAWN_TOGGLES_DESCRIPTOR_INIT
|
||||
#define TRACY_WEBGPU_DAWN_NATIVE (1)
|
||||
#include <dawn/native/DawnNative.h>
|
||||
#else
|
||||
#define TRACY_WEBGPU_WGPU_NATIVE (1)
|
||||
#include <webgpu/wgpu.h>
|
||||
#endif
|
||||
|
||||
#ifndef TRACY_WEBGPU_DEBUG_LEVEL
|
||||
#define TRACY_WEBGPU_DEBUG_LEVEL (0)
|
||||
#endif//TRACY_WEBGPU_DEBUG_LEVEL
|
||||
|
||||
#if TRACY_WEBGPU_DEBUG_LEVEL
|
||||
#define TracyWebGPUDebug(...) __VA_ARGS__;
|
||||
#if defined(_MSC_VER)
|
||||
extern "C" int32_t IsDebuggerPresent(void);
|
||||
#define TracyWebGPUBreak() if (IsDebuggerPresent()) __debugbreak()
|
||||
#else
|
||||
#define TracyWebGPUBreak() ((void)0)
|
||||
#endif
|
||||
#define TracyWebGPUAssert(predicate, ...) if (predicate) {} else { __VA_ARGS__; TracyWebGPUBreak(); }
|
||||
#else
|
||||
#define TracyWebGPUDebug(...)
|
||||
#define TracyWebGPUBreak()
|
||||
#define TracyWebGPUAssert(predicate, ...) assert(predicate);
|
||||
#endif
|
||||
|
||||
#define TracyWebGPULog(severity, msg) fprintf(stdout, "%s", msg), tracy::Profiler::LogString( tracy::MessageSourceType::Tracy, tracy::MessageSeverity::severity, tracy::Color::Red4, 0, msg );
|
||||
#define TracyWebGPUPanic(msg, ...) do { TracyWebGPULog(Error, msg); TracyWebGPUAssert(false && "TracyWebGPU: " msg); __VA_ARGS__; } while(false);
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class WebGPUQueueCtx
|
||||
{
|
||||
friend class WebGPUZoneScope;
|
||||
|
||||
uint8_t m_contextId = 255; // 255 represents "invalid id"
|
||||
|
||||
std::mutex m_collectionMutex;
|
||||
|
||||
WGPUInstance m_instance = nullptr;
|
||||
WGPUDevice m_device = nullptr;
|
||||
WGPUQueue m_queue = nullptr;
|
||||
|
||||
struct ReadbackStage
|
||||
{
|
||||
WGPUBuffer buffer = nullptr;
|
||||
std::atomic<uint64_t> copiedUpto {0};
|
||||
std::atomic<WGPUMapAsyncStatus> mapStatus = {};
|
||||
WGPUFuture pendingFuture = {};
|
||||
};
|
||||
static_assert(std::atomic<WGPUMapAsyncStatus>::is_always_lock_free, "WGPUMapAsyncStatus must be lock-free atomic");
|
||||
|
||||
WGPUQuerySet m_querySet = nullptr;
|
||||
WGPUBuffer m_resolveBuffer = nullptr;
|
||||
ReadbackStage m_readbackReel [3];
|
||||
std::atomic<int> m_writeIdx {0};
|
||||
|
||||
using atomic_counter = std::atomic<uint64_t>;
|
||||
atomic_counter m_queryCounter = 0;
|
||||
atomic_counter m_previousCheckpoint = 0;
|
||||
|
||||
uint32_t m_queryLimit = 0;
|
||||
|
||||
std::vector<uint64_t> m_shadowBuffer;
|
||||
|
||||
using WallTime = std::chrono::steady_clock::time_point;
|
||||
static tracy_force_inline auto GetWallTime() { return WallTime::clock::now(); }
|
||||
static tracy_force_inline auto Milliseconds(int value) { return std::chrono::milliseconds(value); }
|
||||
|
||||
static bool WaitQueueIdle(WGPUQueue queue, WGPUInstance instance)
|
||||
{
|
||||
bool gpuDone = false;
|
||||
WGPUQueueWorkDoneCallbackInfo doneCB = {};
|
||||
doneCB.mode = WGPUCallbackMode_AllowProcessEvents;
|
||||
doneCB.callback = [](WGPUQueueWorkDoneStatus, WGPUStringView, void* userData, void*) {
|
||||
*static_cast<bool*>(userData) = true;
|
||||
};
|
||||
doneCB.userdata1 = &gpuDone;
|
||||
wgpuQueueOnSubmittedWorkDone(queue, doneCB);
|
||||
|
||||
const auto deadline = GetWallTime() + Milliseconds(2000);
|
||||
while (!gpuDone && GetWallTime() < deadline)
|
||||
wgpuInstanceProcessEvents(instance);
|
||||
return gpuDone;
|
||||
}
|
||||
|
||||
static const uint64_t* MapBufferSync(WGPUBuffer buffer, WGPUInstance instance)
|
||||
{
|
||||
struct MapCtx { WGPUMapAsyncStatus status = {}; } ctx;
|
||||
WGPUBufferMapCallbackInfo cbInfo = {};
|
||||
cbInfo.mode = WGPUCallbackMode_AllowProcessEvents;
|
||||
cbInfo.callback = [](WGPUMapAsyncStatus status, WGPUStringView, void* userData, void*) {
|
||||
auto* ctx = static_cast<MapCtx*>(userData);
|
||||
ctx->status = status;
|
||||
};
|
||||
cbInfo.userdata1 = &ctx;
|
||||
size_t offset = 0;
|
||||
size_t size = 2 * sizeof(uint64_t);
|
||||
wgpuBufferMapAsync(buffer, WGPUMapMode_Read, offset, size, cbInfo);
|
||||
|
||||
const auto deadline = GetWallTime() + Milliseconds(2000);
|
||||
while (ctx.status == 0 && GetWallTime() < deadline)
|
||||
wgpuInstanceProcessEvents(instance);
|
||||
|
||||
if (ctx.status != WGPUMapAsyncStatus_Success) return nullptr;
|
||||
auto data = wgpuBufferGetConstMappedRange(buffer, offset, size);
|
||||
return static_cast<const uint64_t*>(data);
|
||||
}
|
||||
|
||||
struct Calibration {
|
||||
int64_t minCpuRange = ~uint64_t(0) >> 1;
|
||||
struct Regression
|
||||
{
|
||||
int64_t n = 0;
|
||||
int64_t mean_x = 0;
|
||||
int64_t mean_y = 0;
|
||||
int64_t S_xx = 0;
|
||||
int64_t S_xy = 0;
|
||||
void Update(int64_t x, int64_t y)
|
||||
{
|
||||
n += 1;
|
||||
int64_t dx = x - mean_x;
|
||||
int64_t dy = y - mean_y;
|
||||
mean_x += dx / n;
|
||||
mean_y += dy / n;
|
||||
S_xx += dx * (x - mean_x);
|
||||
S_xy += dx * (y - mean_y);
|
||||
}
|
||||
double Slope() const { return double(S_xy) / S_xx; }
|
||||
double Intercept() const { return mean_y - Slope() * mean_x; }
|
||||
};
|
||||
Regression cpuToGpuModel; // cpu-ticks to gpu-ticks
|
||||
Regression cpuRangeModel; // cpu-tick interval uncertainty
|
||||
Regression wallToGpuModel; // nanoseconds to gpu-ticks
|
||||
void GetReferenceTime(uint64_t& cpuTime, uint64_t& gpuTime) const
|
||||
{
|
||||
// the mean belongs to the regression line
|
||||
cpuTime = cpuToGpuModel.mean_x;
|
||||
gpuTime = cpuToGpuModel.mean_y;
|
||||
}
|
||||
double Period() const { return 1.0 / wallToGpuModel.Slope(); } // ns/tick
|
||||
bool AcceptX(const Regression& r, int64_t x, double threshold = 3.0) const {
|
||||
if (r.n < 2) return true;
|
||||
auto dx = x - r.mean_x;
|
||||
if (dx <= 0) return true; // always accept "tighter" outliers
|
||||
double variance = double(r.S_xx) / (r.n - 1);
|
||||
if (variance == 0.0) return true;
|
||||
// WARN: dx*dx "could" overflow, but very unlikely in practice
|
||||
double zz = (double)(dx*dx) / variance;
|
||||
return zz <= (threshold*threshold);
|
||||
}
|
||||
bool Update(WallTime twall0, WallTime twall1, uint64_t tcpu0, uint64_t tcpu1, uint64_t tgpu)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
int64_t cpuRange = tcpu1 - tcpu0;
|
||||
cpuRangeModel.Update(cpuRange, 0);
|
||||
if (!AcceptX(cpuRangeModel, cpuRange, 1.0)) return false;
|
||||
// Process sample:
|
||||
int64_t tcpu = tcpu0 + (tcpu1 - tcpu0) / 2; // mid-point
|
||||
int64_t twall = duration_cast<nanoseconds>(
|
||||
(twall0 + (twall1 - twall0) / 2) // mid-point
|
||||
.time_since_epoch()
|
||||
).count();
|
||||
// incremental regression:
|
||||
cpuToGpuModel.Update(tcpu, tgpu);
|
||||
wallToGpuModel.Update(twall, tgpu);
|
||||
TracyWebGPUDebug( fprintf(stderr, "----- (sample accepted! wall = %lld | cpu = %lld | gpu = %lld | period = %f)\n", twall, tcpu, tgpu, Period()) );
|
||||
return true;
|
||||
}
|
||||
} m_calibration;
|
||||
|
||||
tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item)
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
bool CalibrateClocks(uint64_t& outCpuTime, uint64_t& outGpuTime, double& period)
|
||||
{
|
||||
// WebGPU does not have any clock calibration API.
|
||||
// This routine attempts to estimates a reasonable (cpuTime, gpuTime) correlation
|
||||
// by sampling CPU and GPU timestamps around a "synchronous" draw call.
|
||||
// Several samples are taken to tighten the estimation.
|
||||
|
||||
ZoneScoped;
|
||||
|
||||
WGPUShaderSourceWGSL wgslSrc = {};
|
||||
wgslSrc.chain.sType = WGPUSType_ShaderSourceWGSL;
|
||||
wgslSrc.code =
|
||||
{
|
||||
R"(
|
||||
@vertex fn vs(@builtin(vertex_index) i: u32) -> @builtin(position) vec4f {
|
||||
var p = array(vec4f(-1,-1,.5,1), vec4f(3,-1,.5,1), vec4f(-1,3,.5,1));
|
||||
return p[i];
|
||||
}
|
||||
@fragment fn fs() -> @location(0) vec4f { return vec4f(0.0); }
|
||||
)",
|
||||
WGPU_STRLEN
|
||||
};
|
||||
WGPUShaderModuleDescriptor smDesc = {};
|
||||
smDesc.nextInChain = reinterpret_cast<WGPUChainedStruct*>(&wgslSrc);
|
||||
WGPUShaderModule calibShader = wgpuDeviceCreateShaderModule(m_device, &smDesc);
|
||||
if (!calibShader) { TracyWebGPUPanic("Failed to create calibration shader.", return false); }
|
||||
|
||||
WGPUTextureDescriptor texDesc = {};
|
||||
texDesc.usage = WGPUTextureUsage_RenderAttachment;
|
||||
texDesc.dimension = WGPUTextureDimension_2D;
|
||||
texDesc.size = { 1, 1, 1 };
|
||||
texDesc.format = WGPUTextureFormat_BGRA8Unorm;
|
||||
texDesc.mipLevelCount = 1;
|
||||
texDesc.sampleCount = 1;
|
||||
WGPUTexture tex = wgpuDeviceCreateTexture(m_device, &texDesc);
|
||||
if (!tex) { wgpuShaderModuleRelease(calibShader); TracyWebGPUPanic("Failed to create calibration scratch texture.", return false); }
|
||||
WGPUTextureView texView = wgpuTextureCreateView(tex, nullptr);
|
||||
if (!texView) { wgpuTextureRelease(tex); wgpuShaderModuleRelease(calibShader); TracyWebGPUPanic("Failed to create calibration scratch texture view.", return false); }
|
||||
|
||||
WGPUColorTargetState colorTarget = {};
|
||||
colorTarget.format = WGPUTextureFormat_BGRA8Unorm;
|
||||
colorTarget.writeMask = WGPUColorWriteMask_All;
|
||||
WGPUFragmentState fragState = {};
|
||||
fragState.module = calibShader;
|
||||
fragState.entryPoint = { "fs", WGPU_STRLEN };
|
||||
fragState.targetCount = 1;
|
||||
fragState.targets = &colorTarget;
|
||||
WGPURenderPipelineDescriptor pipeDesc = {};
|
||||
pipeDesc.vertex.module = calibShader;
|
||||
pipeDesc.vertex.entryPoint = { "vs", WGPU_STRLEN };
|
||||
pipeDesc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
|
||||
pipeDesc.multisample.count = 1;
|
||||
pipeDesc.fragment = &fragState;
|
||||
WGPURenderPipeline calibPipeline = wgpuDeviceCreateRenderPipeline(m_device, &pipeDesc);
|
||||
if (!calibPipeline) { wgpuTextureViewRelease(texView); wgpuTextureRelease(tex); wgpuShaderModuleRelease(calibShader); TracyWebGPUPanic("Failed to create calibration pipeline.", return false); }
|
||||
|
||||
uint32_t queryId = 0;
|
||||
WGPUPassTimestampWrites anchorTs = {};
|
||||
anchorTs.querySet = m_querySet;
|
||||
anchorTs.beginningOfPassWriteIndex = queryId;
|
||||
anchorTs.endOfPassWriteIndex = queryId+1;
|
||||
|
||||
WGPURenderPassColorAttachment att = {};
|
||||
att.view = texView;
|
||||
att.loadOp = WGPULoadOp_Clear;
|
||||
att.storeOp = WGPUStoreOp_Store;
|
||||
att.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
|
||||
|
||||
WGPURenderPassDescriptor passDesc = {};
|
||||
passDesc.colorAttachmentCount = 1;
|
||||
passDesc.colorAttachments = &att;
|
||||
passDesc.timestampWrites = &anchorTs;
|
||||
|
||||
// calibration loop
|
||||
const auto deadline = GetWallTime() + Milliseconds(100);
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
{
|
||||
// loop until time budget (100ms) allows, but ensure at least 5 iterations
|
||||
if ((GetWallTime() >= deadline) && (i > 5))
|
||||
break;
|
||||
|
||||
WGPUCommandEncoder enc = wgpuDeviceCreateCommandEncoder(m_device, nullptr);
|
||||
if (!enc) { TracyWebGPUPanic("Failed to create command encoder for time calibration.", return false); }
|
||||
|
||||
WGPURenderPassEncoder pass = wgpuCommandEncoderBeginRenderPass(enc, &passDesc);
|
||||
wgpuRenderPassEncoderSetPipeline(pass, calibPipeline);
|
||||
wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
|
||||
wgpuRenderPassEncoderEnd(pass);
|
||||
wgpuRenderPassEncoderRelease(pass);
|
||||
|
||||
WGPUBuffer readBackBuffer = m_readbackReel[0].buffer;
|
||||
uint32_t byteOffset = queryId * sizeof(uint64_t);
|
||||
uint32_t sizeInBytes = 2 * sizeof(uint64_t);
|
||||
wgpuCommandEncoderResolveQuerySet(enc, m_querySet, queryId, 2, m_resolveBuffer, byteOffset);
|
||||
wgpuCommandEncoderCopyBufferToBuffer(enc, m_resolveBuffer, byteOffset, readBackBuffer, byteOffset, sizeInBytes);
|
||||
|
||||
WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(enc, nullptr);
|
||||
wgpuCommandEncoderRelease(enc);
|
||||
if (!cmd) { TracyWebGPUPanic("Failed to finish calibration command encoder.", return false); }
|
||||
|
||||
WaitQueueIdle(m_queue, m_instance);
|
||||
int64_t cpu [2] = {};
|
||||
int64_t gpu [2] = {};
|
||||
WallTime wall [2] = {};
|
||||
cpu[0] = Profiler::GetTime();
|
||||
wall[0] = GetWallTime();
|
||||
wgpuQueueSubmit(m_queue, 1, &cmd);
|
||||
wgpuCommandBufferRelease(cmd);
|
||||
WaitQueueIdle(m_queue, m_instance);
|
||||
wall[1] = GetWallTime();
|
||||
cpu[1] = Profiler::GetTime();
|
||||
auto gpuTimestamps = MapBufferSync(readBackBuffer, m_instance);
|
||||
TracyWebGPUAssert(gpuTimestamps != nullptr);
|
||||
gpu[0] = gpuTimestamps[0];
|
||||
gpu[1] = gpuTimestamps[1];
|
||||
wgpuBufferUnmap(readBackBuffer);
|
||||
TracyWebGPUDebug(
|
||||
fprintf(stdout, "[%03d] CalibrateClocks() [CPU] %16lld | %16lld | /// %lld\n", i, cpu[0], cpu[1], cpu[1]-cpu[0]);
|
||||
fprintf(stdout, "----------------------- [GPU] %16llu | %16llu | /// %lld\n", gpu[0], gpu[1], gpu[1]-gpu[0]);
|
||||
uint64_t cpuTimeRef, gpuTimeRef;
|
||||
m_calibration.GetReferenceTime(cpuTimeRef, gpuTimeRef);
|
||||
if (gpu[0] < gpuTimeRef)
|
||||
fprintf(stdout, "!!!!! CalibrateClocks() -> WARNING!!! going backwards!\n%llu\n%llu\n%lld\n", gpuTimeRef, gpu[0], gpu[0] - gpuTimeRef);
|
||||
);
|
||||
|
||||
// skip first sample since it is quite jittery (lazy intialization of WebGPU objects)
|
||||
if (i == 0)
|
||||
continue;
|
||||
|
||||
m_calibration.Update(wall[0], wall[1], cpu[0], cpu[1], gpu[0]);
|
||||
};
|
||||
|
||||
TracyWebGPUDebug(
|
||||
fprintf(stdout, "##### CalibrateClocks() WALL = %lld | CPU = %lld | GPU = %lld | period = %f\n",
|
||||
m_calibration.wallToGpuModel.mean_x,
|
||||
m_calibration.cpuToGpuModel.mean_x,
|
||||
m_calibration.cpuToGpuModel.mean_y,
|
||||
m_calibration.Period());
|
||||
);
|
||||
|
||||
wgpuRenderPipelineRelease(calibPipeline);
|
||||
wgpuShaderModuleRelease(calibShader);
|
||||
wgpuTextureViewRelease(texView);
|
||||
wgpuTextureRelease(tex);
|
||||
|
||||
m_calibration.GetReferenceTime(outCpuTime, outGpuTime);
|
||||
period = m_calibration.Period();
|
||||
// assume 1 ns/tick if the period estimation is close enough to 1
|
||||
if (std::abs(period - 1.0) < 0.001)
|
||||
period = 1.0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
static bool SetupDevice(WGPUDeviceDescriptor& deviceDescriptor)
|
||||
{
|
||||
// TODO: pass features array/size as argument to better allow for repeated calls
|
||||
static constexpr int MaxFeatures = 128;
|
||||
static WGPUFeatureName features [MaxFeatures] = {};
|
||||
|
||||
int n = deviceDescriptor.requiredFeatureCount;
|
||||
assert(n < MaxFeatures && "Too many required features in WGPUDeviceDescriptor");
|
||||
if (n > 0 && deviceDescriptor.requiredFeatures)
|
||||
memcpy(features, deviceDescriptor.requiredFeatures, n * sizeof(WGPUFeatureName));
|
||||
|
||||
features[n++] = WGPUFeatureName_TimestampQuery;
|
||||
|
||||
# if (TRACY_WEBGPU_DAWN_NATIVE)
|
||||
TracyWebGPUDebug( fprintf(stderr, "[INFO] [DAWN] ENABLING RAW TIMESTAMP TICKS (disabling ns conversion + quantization)\n") );
|
||||
// disable_timestamp_query_conversion: resolve timestamps as raw GPU ticks, not nanoseconds.
|
||||
// timestamp_quantization: disabled defensively (off by default on Metal, but on elsewhere).
|
||||
static const char* dawnDisabledToggles[] = { "timestamp_quantization" };
|
||||
static const char* dawnEnabledToggles[] = { "disable_timestamp_query_conversion" };
|
||||
static WGPUDawnTogglesDescriptor togglesDesc = {};
|
||||
togglesDesc.chain.sType = WGPUSType_DawnTogglesDescriptor;
|
||||
togglesDesc.disabledToggles = dawnDisabledToggles;
|
||||
togglesDesc.disabledToggleCount = 1;
|
||||
togglesDesc.enabledToggles = dawnEnabledToggles;
|
||||
togglesDesc.enabledToggleCount = 1;
|
||||
deviceDescriptor.nextInChain = reinterpret_cast<WGPUChainedStruct*>(&togglesDesc);
|
||||
# elif (TRACY_WEBGPU_WGPU_NATIVE)
|
||||
// wgpu-native: passTimestampWrites requires the non-standard
|
||||
// TIMESTAMP_QUERY_INSIDE_PASSES device feature in addition to
|
||||
// the standard TimestampQuery feature.
|
||||
TracyWebGPUDebug( fprintf(stderr, "[INFO] [WGPU] Requesting TimestampQueryInsidePasses native feature\n") );
|
||||
features[n++] = (WGPUFeatureName)WGPUNativeFeature_TimestampQueryInsideEncoders;
|
||||
# endif
|
||||
deviceDescriptor.requiredFeatures = features;
|
||||
deviceDescriptor.requiredFeatureCount = static_cast<uint32_t>(n);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VerifyDevice(WGPUDevice device)
|
||||
{
|
||||
if (device == nullptr)
|
||||
return false;
|
||||
if (wgpuDeviceHasFeature(device, WGPUFeatureName_TimestampQuery) == WGPU_FALSE)
|
||||
return false;
|
||||
# if (TRACY_WEBGPU_DAWN_NATIVE)
|
||||
bool hasDisableConversion = false, hasQuantization = false;
|
||||
for (const char* t : ::dawn::native::GetTogglesUsed(device))
|
||||
{
|
||||
if (strcmp(t, "disable_timestamp_query_conversion") == 0)
|
||||
hasDisableConversion = true;
|
||||
if (strcmp(t, "timestamp_quantization") == 0)
|
||||
hasQuantization = true;
|
||||
}
|
||||
return hasDisableConversion && !hasQuantization;
|
||||
# elif (TRACY_WEBGPU_WGPU_NATIVE)
|
||||
// wgpu-native also requires TimestampQueryInsideEncoders for ResolveQuerySet.
|
||||
if (wgpuDeviceHasFeature(device, (WGPUFeatureName)WGPUNativeFeature_TimestampQueryInsideEncoders) == WGPU_FALSE)
|
||||
return false;
|
||||
return true;
|
||||
# endif
|
||||
return false;
|
||||
}
|
||||
|
||||
WebGPUQueueCtx(WGPUInstance instance, WGPUDevice device, WGPUQueue queue)
|
||||
: m_instance(instance)
|
||||
, m_device(device)
|
||||
, m_queue(queue)
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
if (!VerifyDevice(m_device))
|
||||
TracyWebGPUPanic("GPU profiling disabled because the device did not enable the necessary features.", return)
|
||||
|
||||
TracyWebGPUAssert(m_instance); wgpuInstanceAddRef(m_instance);
|
||||
TracyWebGPUAssert(m_device); wgpuDeviceAddRef(m_device);
|
||||
TracyWebGPUAssert(m_queue); wgpuQueueAddRef(m_queue);
|
||||
|
||||
// Setup Query Set: must have even size since queries are issued in pairs.
|
||||
// (The WebGPU spec mandates 4096, with no way to query the device limit.)
|
||||
WGPUQuerySetDescriptor qsDesc = {};
|
||||
qsDesc.type = WGPUQueryType_Timestamp;
|
||||
qsDesc.count = 4096;
|
||||
for (;;)
|
||||
{
|
||||
m_querySet = wgpuDeviceCreateQuerySet(m_device, &qsDesc);
|
||||
if (m_querySet) break;
|
||||
qsDesc.count /= 2;
|
||||
if (qsDesc.count < 128) break;
|
||||
}
|
||||
if (m_querySet == nullptr)
|
||||
TracyWebGPUPanic("Failed to create timestamp query set.", return);
|
||||
m_queryLimit = qsDesc.count;
|
||||
|
||||
WGPUBufferDescriptor resolveDesc = {};
|
||||
resolveDesc.usage = WGPUBufferUsage_QueryResolve | WGPUBufferUsage_CopySrc;
|
||||
resolveDesc.size = static_cast<uint64_t>(m_queryLimit) * sizeof(uint64_t);
|
||||
m_resolveBuffer = wgpuDeviceCreateBuffer(m_device, &resolveDesc);
|
||||
if (!m_resolveBuffer)
|
||||
TracyWebGPUPanic("Failed to create timestamp resolve buffer.", return);
|
||||
|
||||
WGPUBufferDescriptor readbackDesc = {};
|
||||
readbackDesc.usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead;
|
||||
readbackDesc.size = static_cast<uint64_t>(m_queryLimit) * sizeof(uint64_t);
|
||||
for (auto& stage : m_readbackReel)
|
||||
{
|
||||
stage.buffer = wgpuDeviceCreateBuffer(m_device, &readbackDesc);
|
||||
stage.copiedUpto = 0;
|
||||
if (!stage.buffer) { TracyWebGPUPanic("Failed to create timestamp readback buffer.", return); }
|
||||
}
|
||||
|
||||
uint64_t cpuTimestamp = 0;
|
||||
uint64_t gpuTimestamp = 0;
|
||||
double period = 0.0; // in nanoseconds per gpu-tick
|
||||
if (!CalibrateClocks(cpuTimestamp, gpuTimestamp, period))
|
||||
TracyWebGPUPanic("Failed to calibrate CPU/GPU clocks.", return);
|
||||
|
||||
TracyWebGPUDebug( fprintf(stdout, "[WebGPUQueueCtx] cpuTimestamp: %llu | gpuTimestamp: %llu | period: %f\n", cpuTimestamp, gpuTimestamp, period) );
|
||||
m_shadowBuffer.resize(m_queryLimit, gpuTimestamp);
|
||||
|
||||
// All setup completed: register the context.
|
||||
m_contextId = GetGpuCtxCounter().fetch_add(1);
|
||||
ZoneValue(m_contextId);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, static_cast<int64_t>(cpuTimestamp));
|
||||
MemWrite(&item->gpuNewContext.gpuTime, static_cast<int64_t>(gpuTimestamp));
|
||||
MemWrite(&item->gpuNewContext.thread, static_cast<uint32_t>(0));
|
||||
MemWrite(&item->gpuNewContext.period, static_cast<float>(period));
|
||||
MemWrite(&item->gpuNewContext.context, static_cast<uint8_t>(GetId()));
|
||||
MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0)); // no calibration available
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::WebGPU);
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
~WebGPUQueueCtx()
|
||||
{
|
||||
// TODO: a few problems to address later during this final Collect():
|
||||
// 1. ensure "partial" query batches are collected
|
||||
// 2. ensure all readback stages are collected and empty
|
||||
// 3. ensure readback buffers are not mapped before deleting them
|
||||
Collect();
|
||||
|
||||
for (auto& stage : m_readbackReel)
|
||||
if (stage.buffer) { wgpuBufferRelease(stage.buffer); stage.buffer = nullptr; }
|
||||
if (m_resolveBuffer) { wgpuBufferRelease(m_resolveBuffer); m_resolveBuffer = nullptr; }
|
||||
if (m_querySet) { wgpuQuerySetRelease(m_querySet); m_querySet = nullptr; }
|
||||
if (m_queue) { wgpuQueueRelease(m_queue); m_queue = nullptr; }
|
||||
if (m_device) { wgpuDeviceRelease(m_device); m_device = nullptr; }
|
||||
if (m_instance) { wgpuInstanceRelease(m_instance); m_instance = nullptr; }
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
|
||||
void Name(const char* name, uint16_t len)
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc(len);
|
||||
memcpy(ptr, name, len);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuContextName);
|
||||
MemWrite(&item->gpuContextNameFat.context, GetId());
|
||||
MemWrite(&item->gpuContextNameFat.ptr, (uint64_t)ptr);
|
||||
MemWrite(&item->gpuContextNameFat.size, len);
|
||||
SubmitQueueItem(item);
|
||||
}
|
||||
|
||||
void Collect(bool webgpuProcessEvents=false)
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected()) return;
|
||||
#endif
|
||||
if (!m_collectionMutex.try_lock()) return;
|
||||
std::unique_lock<std::mutex> lock(m_collectionMutex, std::adopt_lock);
|
||||
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
if (Distance(m_previousCheckpoint, m_queryCounter) <= 0)
|
||||
return;
|
||||
|
||||
// Current Readback "Reel" Stages:
|
||||
const int state = m_writeIdx;
|
||||
const int fillingIdx = (state + 0) % 3; // this is where instrumentation is pushing new queries
|
||||
const int pendingIdx = (state + 1) % 3; // instrumentation is done here; ready to be collected
|
||||
const int collectIdx = (state + 2) % 3; // this is where queries are being collected right now
|
||||
|
||||
// Ensure readback buffer has been mapped to the host
|
||||
auto& collectStage = m_readbackReel[collectIdx];
|
||||
if (collectStage.pendingFuture.id != 0)
|
||||
{
|
||||
if (webgpuProcessEvents)
|
||||
wgpuInstanceProcessEvents(m_instance);
|
||||
if (collectStage.mapStatus == WGPUMapAsyncStatus{})
|
||||
return; // callback hasn't fired yet
|
||||
collectStage.pendingFuture = {};
|
||||
if (collectStage.mapStatus != WGPUMapAsyncStatus_Success)
|
||||
TracyWebGPUPanic("Colect(): unable to map readback buffer.", return);
|
||||
}
|
||||
|
||||
if (collectStage.mapStatus == WGPUMapAsyncStatus_Success)
|
||||
{
|
||||
const uint64_t* ts = static_cast<const uint64_t*>(
|
||||
wgpuBufferGetConstMappedRange(collectStage.buffer, 0,
|
||||
static_cast<uint64_t>(m_queryLimit) * sizeof(uint64_t)));
|
||||
if (ts)
|
||||
{
|
||||
uint64_t ticket = m_previousCheckpoint;
|
||||
const uint64_t end = collectStage.copiedUpto;
|
||||
TracyWebGPUDebug( fprintf(stdout, "[TWG] Collect [%d] (%llu, %llu)\n", collectIdx, ticket, end) );
|
||||
for (; Distance(ticket, end) > 0; ticket += 2)
|
||||
{
|
||||
const uint32_t slotB = RingIndex(ticket);
|
||||
const uint32_t slotE = slotB + 1;
|
||||
TracyWebGPUDebug(
|
||||
fprintf(stderr,
|
||||
"[TWG] slot B=%4u E=%4u ts[B]=%llu ts[E]=%llu shadow[E]=%llu ts-diff=%lld shadow-diff=%lld\n",
|
||||
slotB, slotE,
|
||||
ts[slotB], ts[slotE], m_shadowBuffer[slotE],
|
||||
Distance(ts[slotB], ts[slotE]),
|
||||
Distance(m_shadowBuffer[slotE], ts[slotE]));
|
||||
);
|
||||
if (Distance(m_shadowBuffer[slotE], ts[slotE]) <= 0)
|
||||
break; // GPU hasn't written this timestamp yet; retry next Collect()
|
||||
EmitGpuTime(ts[slotB], slotB);
|
||||
EmitGpuTime(ts[slotE], slotE);
|
||||
}
|
||||
m_previousCheckpoint = ticket;
|
||||
|
||||
if (Distance(ticket, end) > 0)
|
||||
return; // still unresolved queries in this buffer; come back next Collect()
|
||||
}
|
||||
|
||||
// All queries resolved (or getMappedRange failed): unmap and fall through to rotate.
|
||||
wgpuBufferUnmap(collectStage.buffer);
|
||||
collectStage.mapStatus = {};
|
||||
}
|
||||
|
||||
// At this point, all queries in the collect buffer have been processed.
|
||||
// (it's now tie to "rotate" the buffers around...)
|
||||
|
||||
// Has any ResolveQueryBatch call landed in this reel stage since it was last recycled?
|
||||
// (Are there any queries to resolve and collect at all?)
|
||||
if (m_readbackReel[fillingIdx].copiedUpto <= m_previousCheckpoint)
|
||||
return;
|
||||
|
||||
// Rotate/Cycle the Readback Pipeline State:
|
||||
// the buffer that was just collected shall now be used for instrumentation
|
||||
collectStage.copiedUpto = m_previousCheckpoint.load();
|
||||
m_writeIdx = collectIdx; // atomically commit the pipeline rotation
|
||||
|
||||
auto& nextToCollect = m_readbackReel[pendingIdx];
|
||||
WGPUBufferMapCallbackInfo cbInfo = {};
|
||||
// This readback buffer map callback can fire "spontaneously" (it just sets a flag)
|
||||
cbInfo.mode = WGPUCallbackMode_AllowSpontaneous;
|
||||
cbInfo.callback = [](WGPUMapAsyncStatus status, WGPUStringView, void* userData, void*)
|
||||
{
|
||||
auto* stage = static_cast<ReadbackStage*>(userData);
|
||||
stage->mapStatus = status;
|
||||
};
|
||||
cbInfo.userdata1 = &nextToCollect;
|
||||
nextToCollect.pendingFuture = wgpuBufferMapAsync(
|
||||
nextToCollect.buffer, WGPUMapMode_Read, 0,
|
||||
static_cast<uint64_t>(m_queryLimit) * sizeof(uint64_t), cbInfo);
|
||||
}
|
||||
|
||||
private:
|
||||
void EmitGpuTime(uint64_t gpuTimestamp, uint32_t queryId)
|
||||
{
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(gpuTimestamp));
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuTime.context, GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
m_shadowBuffer[queryId] = gpuTimestamp;
|
||||
}
|
||||
|
||||
tracy_force_inline uint32_t RingCapacity() const { return m_queryLimit; }
|
||||
|
||||
tracy_force_inline uint32_t RingIndex(uint64_t t) const
|
||||
{
|
||||
return static_cast<uint32_t>(t % RingCapacity());
|
||||
}
|
||||
|
||||
tracy_force_inline static int64_t Distance(uint64_t begin, uint64_t end)
|
||||
{
|
||||
return static_cast<int64_t>(end - begin);
|
||||
}
|
||||
|
||||
tracy_force_inline uint64_t NextQueryId()
|
||||
{
|
||||
const uint64_t ticket = m_queryCounter.fetch_add(2, std::memory_order_relaxed);
|
||||
if (Distance(m_previousCheckpoint, ticket)
|
||||
>= static_cast<int64_t>(RingCapacity()))
|
||||
{
|
||||
TracyWebGPULog(Warning, "Too many pending GPU queries: stalling!");
|
||||
Collect();
|
||||
}
|
||||
return ticket;
|
||||
}
|
||||
};
|
||||
|
||||
class WebGPUZoneScope
|
||||
{
|
||||
const bool m_active;
|
||||
WebGPUQueueCtx* m_ctx = nullptr;
|
||||
WGPUCommandEncoder m_encoder = nullptr;
|
||||
uint64_t m_rawTicket = 0;
|
||||
uint32_t m_queryId = 0;
|
||||
|
||||
WGPUPassTimestampWrites m_timestampWrites = {};
|
||||
|
||||
void ResolveQueryBatch(uint32_t queryBatchStartId)
|
||||
{
|
||||
// 32 queries = 32 * 8 bytes = 256 bytes
|
||||
TracyWebGPUAssert(queryBatchStartId % 32 == 0, return);
|
||||
queryBatchStartId = m_ctx->RingIndex(queryBatchStartId);
|
||||
|
||||
const uint64_t blockOffset = static_cast<uint64_t>(queryBatchStartId) * sizeof(uint64_t);
|
||||
wgpuCommandEncoderResolveQuerySet(
|
||||
m_encoder,
|
||||
m_ctx->m_querySet,
|
||||
queryBatchStartId, 32,
|
||||
m_ctx->m_resolveBuffer,
|
||||
blockOffset // MUST be a multiple of (aligned to) 256...
|
||||
);
|
||||
|
||||
auto& stage = m_ctx->m_readbackReel[m_ctx->m_writeIdx];
|
||||
auto readbackBuffer = stage.buffer;
|
||||
wgpuCommandEncoderCopyBufferToBuffer(
|
||||
m_encoder,
|
||||
m_ctx->m_resolveBuffer,
|
||||
blockOffset,
|
||||
readbackBuffer,
|
||||
blockOffset,
|
||||
32 * sizeof(uint64_t)
|
||||
);
|
||||
|
||||
// Advance this stage's high-water mark to cover the block just encoded.
|
||||
// TODO: maybe we can use fetch_add to increment the atomic and not need
|
||||
// to keep track of the raw ticket; Collect would need to derive the raw
|
||||
// end ticket number.
|
||||
const uint64_t blockEnd = m_rawTicket;
|
||||
uint64_t prev = stage.copiedUpto;
|
||||
while ((WebGPUQueueCtx::Distance(prev, blockEnd) > 0) &&
|
||||
!stage.copiedUpto.compare_exchange_weak(prev, blockEnd)) {}
|
||||
TracyWebGPUDebug( fprintf(stdout, "[TWG] WebGPUZoneScope [%d] (%d,%d)\n", (int)m_ctx->m_writeIdx, queryBatchStartId, queryBatchStartId+32) );
|
||||
}
|
||||
|
||||
tracy_force_inline void WriteQueueItem(const SourceLocationData* srcLocation, int32_t callstackDepth, uint32_t sourceLine, const char* sourceFile, size_t sourceFileLen, const char* functionName, size_t functionNameLen, const char* zoneName, size_t zoneNameLen)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
const bool captureCallstack = callstackDepth > 0 && has_callstack();
|
||||
const bool transientZone = srcLocation == nullptr;
|
||||
uint64_t srcLocationAddr = reinterpret_cast<uint64_t>(srcLocation);
|
||||
|
||||
QueueItem* item = nullptr;
|
||||
QueueType itemType;
|
||||
if (transientZone)
|
||||
{
|
||||
srcLocationAddr = Profiler::AllocSourceLocation(sourceLine, sourceFile, sourceFileLen, functionName, functionNameLen, zoneName, zoneNameLen);
|
||||
if (captureCallstack)
|
||||
{
|
||||
item = Profiler::QueueSerialCallstack(Callstack(callstackDepth));
|
||||
itemType = QueueType::GpuZoneBeginAllocSrcLocCallstackSerial;
|
||||
}
|
||||
else
|
||||
{
|
||||
item = Profiler::QueueSerial();
|
||||
itemType = QueueType::GpuZoneBeginAllocSrcLocSerial;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (captureCallstack)
|
||||
{
|
||||
item = Profiler::QueueSerialCallstack(Callstack(callstackDepth));
|
||||
itemType = QueueType::GpuZoneBeginCallstackSerial;
|
||||
}
|
||||
else
|
||||
{
|
||||
item = Profiler::QueueSerial();
|
||||
itemType = QueueType::GpuZoneBeginSerial;
|
||||
}
|
||||
}
|
||||
|
||||
MemWrite(&item->hdr.type, itemType);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, srcLocationAddr);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
// Fills in m_timestampWrites and assigns its address to passDesc.timestampWrites.
|
||||
// Works with both WGPURenderPassDescriptor and WGPUComputePassDescriptor.
|
||||
template<typename PassDescriptor>
|
||||
tracy_force_inline void InitBase(WebGPUQueueCtx* ctx, WGPUCommandEncoder encoder, PassDescriptor& passDesc)
|
||||
{
|
||||
m_ctx = ctx;
|
||||
m_encoder = encoder;
|
||||
|
||||
m_rawTicket = m_ctx->NextQueryId();
|
||||
m_queryId = m_ctx->RingIndex(m_rawTicket);
|
||||
m_timestampWrites.querySet = m_ctx->m_querySet;
|
||||
m_timestampWrites.beginningOfPassWriteIndex = m_queryId;
|
||||
m_timestampWrites.endOfPassWriteIndex = m_queryId + 1;
|
||||
passDesc.timestampWrites = &m_timestampWrites;
|
||||
}
|
||||
|
||||
public:
|
||||
template<typename PassDescriptor>
|
||||
tracy_force_inline WebGPUZoneScope(WebGPUQueueCtx* ctx, WGPUCommandEncoder encoder, PassDescriptor& passDesc, const SourceLocationData* srcLocation, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
InitBase(ctx, encoder, passDesc);
|
||||
WriteQueueItem(srcLocation, 0, 0, nullptr, 0, nullptr, 0, nullptr, 0);
|
||||
}
|
||||
|
||||
template<typename PassDescriptor>
|
||||
tracy_force_inline WebGPUZoneScope(WebGPUQueueCtx* ctx, WGPUCommandEncoder encoder, PassDescriptor& passDesc, const SourceLocationData* srcLocation, int32_t depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
InitBase(ctx, encoder, passDesc);
|
||||
WriteQueueItem(srcLocation, depth, 0, nullptr, 0, nullptr, 0, nullptr, 0);
|
||||
}
|
||||
|
||||
template<typename PassDescriptor>
|
||||
tracy_force_inline WebGPUZoneScope(WebGPUQueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, WGPUCommandEncoder encoder, PassDescriptor& passDesc, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
InitBase(ctx, encoder, passDesc);
|
||||
WriteQueueItem(nullptr, 0, line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
}
|
||||
|
||||
template<typename PassDescriptor>
|
||||
tracy_force_inline WebGPUZoneScope(WebGPUQueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, WGPUCommandEncoder encoder, PassDescriptor& passDesc, int32_t depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
InitBase(ctx, encoder, passDesc);
|
||||
WriteQueueItem(nullptr, depth, line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
}
|
||||
|
||||
tracy_force_inline ~WebGPUZoneScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
const auto queryId = m_queryId + 1;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
if (m_queryId % 32 == 0)
|
||||
ResolveQueryBatch(m_queryId-32);
|
||||
}
|
||||
};
|
||||
|
||||
static inline void DestroyWebGPUContext(WebGPUQueueCtx* ctx)
|
||||
{
|
||||
TracyWebGPUAssert(ctx);
|
||||
ctx->~WebGPUQueueCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
static inline WebGPUQueueCtx* CreateWebGPUContext(WGPUInstance instance, WGPUDevice device, WGPUQueue queue)
|
||||
{
|
||||
auto* ctx = static_cast<WebGPUQueueCtx*>(tracy_malloc(sizeof(WebGPUQueueCtx)));
|
||||
new (ctx) WebGPUQueueCtx{ instance, device, queue };
|
||||
if (ctx->GetId() == 255)
|
||||
{
|
||||
DestroyWebGPUContext(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#undef TracyWebGPUPanic
|
||||
#undef TracyWebGPULog
|
||||
#undef TracyWebGPUAssert
|
||||
#undef TracyWebGPUBreak
|
||||
#undef TracyWebGPUDebug
|
||||
#undef TRACY_WEBGPU_DEBUG_LEVEL
|
||||
|
||||
using TracyWebGPUCtx = tracy::WebGPUQueueCtx*;
|
||||
|
||||
#define TracyWebGPUSetupDevice(deviceDescriptor) tracy::WebGPUQueueCtx::SetupDevice(deviceDescriptor)
|
||||
|
||||
#define TracyWebGPUContext(instance, device, queue) tracy::CreateWebGPUContext(instance, device, queue);
|
||||
#define TracyWebGPUDestroy(ctx) tracy::DestroyWebGPUContext(ctx);
|
||||
#define TracyWebGPUContextName(ctx, name, size) ctx->Name(name, size);
|
||||
|
||||
#define TracyWebGPUUnnamedZone ___tracy_gpu_webgpu_zone
|
||||
#define TracyWebGPUSrcLocSymbol TracyConcat(__tracy_webgpu_source_location,TracyLine)
|
||||
#define TracyWebGPUSrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyWebGPUSrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color };
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyWebGPUZone(ctx, encoder, passDesc, name) TracyWebGPUNamedZoneS(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, TRACY_CALLSTACK, true)
|
||||
# define TracyWebGPUZoneC(ctx, encoder, passDesc, name, color) TracyWebGPUNamedZoneCS(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, color, TRACY_CALLSTACK, true)
|
||||
# define TracyWebGPUNamedZone(ctx, varname, encoder, passDesc, name, active) TracyWebGPUSrcLocObject(name, 0); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, TRACY_CALLSTACK, active };
|
||||
# define TracyWebGPUNamedZoneC(ctx, varname, encoder, passDesc, name, color, active) TracyWebGPUSrcLocObject(name, color); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, TRACY_CALLSTACK, active };
|
||||
# define TracyWebGPUZoneTransient(ctx, varname, encoder, passDesc, name, active) TracyWebGPUZoneTransientS(ctx, varname, encoder, passDesc, name, TRACY_CALLSTACK, active)
|
||||
#else
|
||||
# define TracyWebGPUZone(ctx, encoder, passDesc, name) TracyWebGPUNamedZone(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, true)
|
||||
# define TracyWebGPUZoneC(ctx, encoder, passDesc, name, color) TracyWebGPUNamedZoneC(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, color, true)
|
||||
# define TracyWebGPUNamedZone(ctx, varname, encoder, passDesc, name, active) TracyWebGPUSrcLocObject(name, 0); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, active };
|
||||
# define TracyWebGPUNamedZoneC(ctx, varname, encoder, passDesc, name, color, active) TracyWebGPUSrcLocObject(name, color); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, active };
|
||||
# define TracyWebGPUZoneTransient(ctx, varname, encoder, passDesc, name, active) tracy::WebGPUZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), encoder, passDesc, active };
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyWebGPUZoneS(ctx, encoder, passDesc, name, depth) TracyWebGPUNamedZoneS(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, depth, true)
|
||||
# define TracyWebGPUZoneCS(ctx, encoder, passDesc, name, color, depth) TracyWebGPUNamedZoneCS(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, color, depth, true)
|
||||
# define TracyWebGPUNamedZoneS(ctx, varname, encoder, passDesc, name, depth, active) TracyWebGPUSrcLocObject(name, 0); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, depth, active };
|
||||
# define TracyWebGPUNamedZoneCS(ctx, varname, encoder, passDesc, name, color, depth, active) TracyWebGPUSrcLocObject(name, color); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, depth, active };
|
||||
# define TracyWebGPUZoneTransientS(ctx, varname, encoder, passDesc, name, depth, active) tracy::WebGPUZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), encoder, passDesc, depth, active };
|
||||
#else
|
||||
# define TracyWebGPUZoneS(ctx, encoder, passDesc, name, depth) TracyWebGPUZone(ctx, encoder, passDesc, name)
|
||||
# define TracyWebGPUZoneCS(ctx, encoder, passDesc, name, color, depth) TracyWebGPUZoneC(ctx, encoder, passDesc, name, color)
|
||||
# define TracyWebGPUNamedZoneS(ctx, varname, encoder, passDesc, name, depth, active) TracyWebGPUNamedZone(ctx, varname, encoder, passDesc, name, active)
|
||||
# define TracyWebGPUNamedZoneCS(ctx, varname, encoder, passDesc, name, color, depth, active) TracyWebGPUNamedZoneC(ctx, varname, encoder, passDesc, name, color, active)
|
||||
# define TracyWebGPUZoneTransientS(ctx, varname, encoder, passDesc, name, depth, active) TracyWebGPUZoneTransient(ctx, varname, encoder, passDesc, name, active)
|
||||
#endif
|
||||
|
||||
#define TracyWebGPUCollect(ctx) ctx->Collect();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1033,14 +1033,15 @@ PYBIND11_MODULE( TracyServerBindings, m )
|
||||
// --- GPU contexts ---
|
||||
.def( "get_gpu_contexts", []( const Worker& w ) {
|
||||
static const char* gpuTypeStr[] = {
|
||||
"Invalid", "OpenGL", "Vulkan", "OpenCL", "Direct3D12", "Direct3D11", "Metal", "Custom", "CUDA", "Rocprof" };
|
||||
"Invalid", "OpenGL", "Vulkan", "OpenCL", "Direct3D12", "Direct3D11", "Metal", "Custom", "CUDA", "Rocprof", "WebGPU" };
|
||||
static size_t numTypes = sizeof(gpuTypeStr) / sizeof(gpuTypeStr[0]);
|
||||
std::vector<GpuContextSummary> result;
|
||||
for( const auto* ctx : w.GetGpuData() )
|
||||
{
|
||||
if( !ctx ) continue;
|
||||
const std::string name = ctx->name.Active() ? w.GetString( ctx->name ) : "";
|
||||
const uint8_t typeIdx = (uint8_t)ctx->type;
|
||||
const char* typeStr = typeIdx < 10 ? gpuTypeStr[typeIdx] : "Unknown";
|
||||
const char* typeStr = typeIdx < numTypes ? gpuTypeStr[typeIdx] : "Unknown";
|
||||
result.push_back( GpuContextSummary{
|
||||
name, ctx->count, std::string( typeStr ), ctx->thread } );
|
||||
}
|
||||
|
||||
@@ -11,6 +11,22 @@
|
||||
|
||||
#include "OfflineSymbolResolver.h"
|
||||
|
||||
bool ResolveSymbols( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// On Windows the default (no custom tool given) is the DbgHelp backend.
|
||||
if( addr2lineToolPath.empty() )
|
||||
{
|
||||
return ResolveSymbolsDbgHelp( imagePath, inputEntryList, resolvedEntries );
|
||||
}
|
||||
#endif
|
||||
// Everywhere else, and whenever a custom tool is given, use the addr2line-compatible backend.
|
||||
// An empty path lets that backend fall back to the 'addr2line' found in PATH.
|
||||
return ResolveSymbolsAddr2Line( addr2lineToolPath, addr2lineArgs, imagePath, inputEntryList, resolvedEntries );
|
||||
}
|
||||
|
||||
bool ApplyPathSubstitutions( std::string& path, const PathSubstitutionList& pathSubstitutionlist )
|
||||
{
|
||||
for( const auto& substitution : pathSubstitutionlist )
|
||||
@@ -31,7 +47,35 @@ tracy::StringIdx AddSymbolString( tracy::Worker& worker, const std::string& str
|
||||
return tracy::StringIdx( location.idx );
|
||||
}
|
||||
|
||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstitutionlist, bool verbose )
|
||||
void ResetSymbols( tracy::Worker& worker )
|
||||
{
|
||||
std::cout << "Resetting callstack frame symbols to the unresolved state..." << std::endl;
|
||||
|
||||
const tracy::StringIdx unresolvedName = AddSymbolString( worker, "[unresolved]" );
|
||||
const tracy::StringIdx unknownFile = AddSymbolString( worker, "[unknown]" );
|
||||
|
||||
uint64_t frameCount = 0;
|
||||
auto& callstackFrameMap = worker.GetCallstackFrameMap();
|
||||
for( auto it = callstackFrameMap.begin(); it != callstackFrameMap.end(); ++it )
|
||||
{
|
||||
if( !it->second ) continue;
|
||||
|
||||
tracy::CallstackFrameData& frameData = *it->second;
|
||||
for( uint8_t f = 0; f < frameData.size; f++ )
|
||||
{
|
||||
tracy::CallstackFrame& frame = frameData.data[f];
|
||||
frame.name = unresolvedName;
|
||||
frame.file = unknownFile;
|
||||
frame.line = 0;
|
||||
++frameCount;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Reset " << frameCount << " callstack frames." << std::endl;
|
||||
}
|
||||
|
||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstitutionlist,
|
||||
const std::string& addr2lineToolPath, const std::string& addr2lineArgs, bool verbose )
|
||||
{
|
||||
uint64_t callstackFrameCount = worker.GetCallstackFrameCount();
|
||||
std::string relativeSoNameMatch = "[unresolved]";
|
||||
@@ -91,7 +135,7 @@ bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& p
|
||||
}
|
||||
|
||||
SymbolEntryList resolvedEntries;
|
||||
ResolveSymbols( imagePath, entries, resolvedEntries );
|
||||
ResolveSymbols( addr2lineToolPath, addr2lineArgs, imagePath, entries, resolvedEntries );
|
||||
|
||||
if( resolvedEntries.size() != entries.size() )
|
||||
{
|
||||
@@ -131,7 +175,8 @@ bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& p
|
||||
return true;
|
||||
}
|
||||
|
||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings, bool verbose )
|
||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings,
|
||||
const std::string& addr2lineToolPath, const std::string& addr2lineArgs, bool verbose )
|
||||
{
|
||||
std::cout << "Resolving and patching symbols..." << std::endl;
|
||||
|
||||
@@ -160,7 +205,7 @@ void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSu
|
||||
}
|
||||
}
|
||||
|
||||
if ( !PatchSymbolsWithRegex(worker, pathSubstitutionList, verbose) )
|
||||
if ( !PatchSymbolsWithRegex(worker, pathSubstitutionList, addr2lineToolPath, addr2lineArgs, verbose) )
|
||||
{
|
||||
std::cerr << "Failed to patch symbols" << std::endl;
|
||||
}
|
||||
|
||||
@@ -29,12 +29,41 @@ struct SymbolEntry
|
||||
|
||||
using SymbolEntryList = std::vector<SymbolEntry>;
|
||||
|
||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
// Dispatches to the appropriate backend depending on the platform and whether a custom
|
||||
// addr2line-compatible tool was specified. When addr2lineToolPath is non-empty, the tool at
|
||||
// that path is invoked (on any platform); otherwise the platform default is used (DbgHelp on
|
||||
// Windows, the 'addr2line' found in PATH elsewhere). addr2lineArgs are extra arguments passed
|
||||
// verbatim to the addr2line-compatible tool (e.g. "--relative-address").
|
||||
bool ResolveSymbols( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries );
|
||||
|
||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings, bool verbose = false );
|
||||
// Backend invoking an addr2line-compatible tool. Available on all platforms. An empty
|
||||
// addr2lineToolPath falls back to the 'addr2line' found in PATH. addr2lineArgs are inserted
|
||||
// verbatim into the tool's command line.
|
||||
bool ResolveSymbolsAddr2Line( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries );
|
||||
|
||||
#ifdef _WIN32
|
||||
// Backend using the Windows DbgHelp library.
|
||||
bool ResolveSymbolsDbgHelp( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries );
|
||||
#endif
|
||||
|
||||
// Resets all callstack frame symbols back to the unresolved state ("[unresolved]" / "[unknown]"),
|
||||
// so a subsequent PatchSymbols pass re-resolves every frame. This is useful to chain several
|
||||
// resolution passes with different path substitutions. Only meaningful for traces captured with
|
||||
// TRACY_SYMBOL_OFFLINE_RESOLVE, where each frame's symAddr holds the image-relative offset.
|
||||
void ResetSymbols( tracy::Worker& worker );
|
||||
|
||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings,
|
||||
const std::string& addr2lineToolPath = std::string(),
|
||||
const std::string& addr2lineArgs = std::string(), bool verbose = false );
|
||||
|
||||
using PathSubstitutionList = std::vector<std::pair<std::regex, std::string> >;
|
||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstituionlist, bool verbose = false );
|
||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstituionlist,
|
||||
const std::string& addr2lineToolPath = std::string(),
|
||||
const std::string& addr2lineArgs = std::string(), bool verbose = false );
|
||||
|
||||
#endif // __SYMBOLRESOLVER_HPP__
|
||||
@@ -1,5 +1,3 @@
|
||||
#ifndef _WIN32
|
||||
|
||||
#include "OfflineSymbolResolver.h"
|
||||
|
||||
#include <fstream>
|
||||
@@ -10,6 +8,11 @@
|
||||
#include <memory>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
# define popen _popen
|
||||
# define pclose _pclose
|
||||
#endif
|
||||
|
||||
std::string ExecShellCommand( const char* cmd )
|
||||
{
|
||||
std::array<char, 128> buffer;
|
||||
@@ -29,23 +32,66 @@ std::string ExecShellCommand( const char* cmd )
|
||||
class SymbolResolver
|
||||
{
|
||||
public:
|
||||
SymbolResolver()
|
||||
SymbolResolver( const std::string& addr2lineToolPath, const std::string& addr2lineArgs )
|
||||
{
|
||||
// Extra arguments are inserted verbatim into the tool invocation. Tracy records frame
|
||||
// offsets as RVAs; for images with a non-zero preferred image base (PE, Mach-O) the user
|
||||
// can pass "--relative-address" here so llvm-addr2line / llvm-symbolizer add the base back.
|
||||
if( !addr2lineArgs.empty() )
|
||||
{
|
||||
m_addr2LineArgs = " " + addr2lineArgs;
|
||||
}
|
||||
|
||||
if( !addr2lineToolPath.empty() )
|
||||
{
|
||||
// If the value looks like a path (not a bare command name resolved via PATH), verify
|
||||
// it exists so a wrong path fails with an actionable error instead of a cryptic shell one.
|
||||
const bool looksLikePath = addr2lineToolPath.find( '/' ) != std::string::npos ||
|
||||
addr2lineToolPath.find( '\\' ) != std::string::npos;
|
||||
if( looksLikePath && !std::ifstream( addr2lineToolPath ).good() )
|
||||
{
|
||||
std::cerr << "Specified symbol resolution tool not found: '" << addr2lineToolPath
|
||||
<< "' (check the path passed to the '-a' option)" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// A user-provided path may contain spaces or other shell-special characters.
|
||||
escapeShellParam( addr2lineToolPath, m_addr2LinePath );
|
||||
std::cout << "Using user-specified symbol resolution tool: '" << addr2lineToolPath.c_str() << "'" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
std::cerr << "No symbol resolution tool specified (use the '-a' option to provide one)" << std::endl;
|
||||
#else
|
||||
std::stringstream result( ExecShellCommand("which addr2line") );
|
||||
std::getline(result, m_addr2LinePath);
|
||||
|
||||
if( !m_addr2LinePath.length() )
|
||||
{
|
||||
std::cerr << "'addr2line' was not found in the system, please installed it" << std::endl;
|
||||
std::cerr << "'addr2line' was not found in the system, please install it" << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Using 'addr2line' found at: '" << m_addr2LinePath.c_str() << "'" << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void escapeShellParam(std::string const& s, std::string& out)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// cmd.exe / the CRT command parser do not understand POSIX backslash escapes, and
|
||||
// backslashes are path separators on Windows. Wrap the parameter in double quotes
|
||||
// (which handles spaces) and drop any embedded quotes, which cannot appear in a path.
|
||||
out.reserve( s.size() + 2 );
|
||||
out.push_back( '"' );
|
||||
for( char c : s )
|
||||
{
|
||||
if( c != '"' ) out.push_back( c );
|
||||
}
|
||||
out.push_back( '"' );
|
||||
#else
|
||||
out.reserve( s.size() + 2 );
|
||||
out.push_back( '"' );
|
||||
for( unsigned char c : s )
|
||||
@@ -73,34 +119,51 @@ public:
|
||||
}
|
||||
}
|
||||
out.push_back( '"' );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
{
|
||||
if( !m_addr2LinePath.length() ) return false;
|
||||
|
||||
|
||||
std:: string escapedPath;
|
||||
escapeShellParam( imagePath, escapedPath );
|
||||
|
||||
// Command-line length limits: cmd.exe (used by _popen on Windows) allows ~8191 characters;
|
||||
// a single POSIX 'sh -c' argument is capped by MAX_ARG_STRLEN (128 KiB on Linux).
|
||||
// 8000 stays under all of these, so a single conservative budget works on every platform.
|
||||
const size_t maxCmdLength = 8000;
|
||||
|
||||
size_t entryIdx = 0;
|
||||
while( entryIdx < inputEntryList.size() )
|
||||
{
|
||||
const size_t startIdx = entryIdx;
|
||||
const size_t batchEndIdx = std::min( inputEntryList.size(), startIdx + (size_t)1024 );
|
||||
|
||||
printf( "Resolving symbols [%zu-%zu]\n", startIdx, batchEndIdx );
|
||||
|
||||
// generate a single addr2line cmd line for all addresses in one invocation
|
||||
// generate a single addr2line cmd line for as many addresses as fit the length budget
|
||||
std::stringstream ss;
|
||||
ss << m_addr2LinePath << " -C -f -e " << escapedPath << " -a ";
|
||||
for( ; entryIdx < batchEndIdx; entryIdx++ )
|
||||
ss << m_addr2LinePath << " -C -f" << m_addr2LineArgs << " -e " << escapedPath << " -a ";
|
||||
while( entryIdx < inputEntryList.size() )
|
||||
{
|
||||
const FrameEntry& entry = inputEntryList[entryIdx];
|
||||
ss << " 0x" << std::hex << entry.symbolOffset;
|
||||
entryIdx++;
|
||||
// always include at least one address, then stop once near the length limit
|
||||
if( static_cast<size_t>( ss.tellp() ) >= maxCmdLength ) break;
|
||||
}
|
||||
const size_t batchEndIdx = entryIdx;
|
||||
|
||||
std::string resultStr = ExecShellCommand( ss.str().c_str() );
|
||||
printf( "Resolving symbols [%zu-%zu]\n", startIdx, batchEndIdx );
|
||||
|
||||
std::string cmd = ss.str();
|
||||
#ifdef _WIN32
|
||||
// _popen runs the command through 'cmd.exe /c', which strips the outermost pair of
|
||||
// quotes. Wrap the whole command so the quoting around the (possibly spaced) tool
|
||||
// and image paths survives.
|
||||
cmd = "\"" + cmd + "\"";
|
||||
#endif
|
||||
|
||||
std::string resultStr = ExecShellCommand( cmd.c_str() );
|
||||
std::stringstream result( resultStr );
|
||||
|
||||
//printf("executing: '%s' got '%s'\n", ss.str().c_str(), result.str().c_str());
|
||||
@@ -147,13 +210,13 @@ public:
|
||||
|
||||
private:
|
||||
std::string m_addr2LinePath;
|
||||
std::string m_addr2LineArgs;
|
||||
};
|
||||
|
||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
bool ResolveSymbolsAddr2Line( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
{
|
||||
static SymbolResolver symbolResolver;
|
||||
static SymbolResolver symbolResolver( addr2lineToolPath, addr2lineArgs );
|
||||
return symbolResolver.ResolveSymbols( imagePath, inputEntryList, resolvedEntries );
|
||||
}
|
||||
|
||||
#endif // #ifndef _WIN32
|
||||
|
||||
@@ -122,8 +122,8 @@ private:
|
||||
|
||||
char SymbolResolver::s_symbolResolutionBuffer[symbolResolutionBufferSize];
|
||||
|
||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
bool ResolveSymbolsDbgHelp( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
{
|
||||
static SymbolResolver resolver;
|
||||
return resolver.ResolveSymbolsForModule( imagePath, inputEntryList, resolvedEntries );
|
||||
|
||||
@@ -38,7 +38,12 @@ void Usage()
|
||||
printf( " c: context switches, s: sampling data, C: symbol code, S: source cache\n" );
|
||||
printf( " -c: scan for source files missing in cache and add if found\n" );
|
||||
printf( " -r: resolve symbols and patch callstack frames\n");
|
||||
printf( " -R: reset all callstack frame symbols to unresolved (e.g. to re-run resolution)\n");
|
||||
printf( " -p: substitute symbol resolution path with an alternative: \"REGEX_MATCH;REPLACEMENT\"\n");
|
||||
printf( " -a: path to a custom addr2line-compatible tool to use for symbol resolution\n");
|
||||
printf( " -A: extra arguments passed verbatim to the symbol resolution tool,\n");
|
||||
printf( " e.g. \"--relative-address\" for llvm-addr2line on PE/Mach-O images\n");
|
||||
printf( " -v: verbose output while resolving symbols\n");
|
||||
printf( " -j: number of threads to use for compression (-1 to use all cores)\n" );
|
||||
|
||||
exit( 1 );
|
||||
@@ -61,10 +66,14 @@ int main( int argc, char** argv )
|
||||
bool buildDict = false;
|
||||
bool cacheSource = false;
|
||||
bool resolveSymbols = false;
|
||||
bool resetSymbols = false;
|
||||
std::vector<std::string> pathSubstitutions;
|
||||
std::string addr2lineToolPath;
|
||||
std::string addr2lineArgs;
|
||||
bool verboseSymbols = false;
|
||||
|
||||
int c;
|
||||
while( ( c = getopt( argc, argv, "4hez:ds:crp:j:" ) ) != -1 )
|
||||
while( ( c = getopt( argc, argv, "4hez:ds:crRp:a:A:vj:" ) ) != -1 )
|
||||
{
|
||||
switch( c )
|
||||
{
|
||||
@@ -137,9 +146,21 @@ int main( int argc, char** argv )
|
||||
case 'r':
|
||||
resolveSymbols = true;
|
||||
break;
|
||||
case 'R':
|
||||
resetSymbols = true;
|
||||
break;
|
||||
case 'p':
|
||||
pathSubstitutions.push_back(optarg);
|
||||
break;
|
||||
case 'a':
|
||||
addr2lineToolPath = optarg;
|
||||
break;
|
||||
case 'A':
|
||||
addr2lineArgs = optarg;
|
||||
break;
|
||||
case 'v':
|
||||
verboseSymbols = true;
|
||||
break;
|
||||
case 'j':
|
||||
streams = atoi( optarg );
|
||||
break;
|
||||
@@ -171,7 +192,7 @@ int main( int argc, char** argv )
|
||||
{
|
||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||
const bool allowBgThreads = false;
|
||||
const bool allowStringModification = resolveSymbols;
|
||||
const bool allowStringModification = resolveSymbols || resetSymbols;
|
||||
tracy::Worker worker( *f, (tracy::EventType::Type)events, allowBgThreads, allowStringModification );
|
||||
|
||||
#ifndef TRACY_NO_STATISTICS
|
||||
@@ -181,7 +202,8 @@ int main( int argc, char** argv )
|
||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
if( cacheSource ) worker.CacheSourceFiles();
|
||||
if( resolveSymbols ) PatchSymbols( worker, pathSubstitutions );
|
||||
if( resetSymbols ) ResetSymbols( worker );
|
||||
if( resolveSymbols ) PatchSymbols( worker, pathSubstitutions, addr2lineToolPath, addr2lineArgs, verboseSymbols );
|
||||
|
||||
auto w = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output, clev, zstdLevel, streams ) );
|
||||
if( !w )
|
||||
|
||||
Reference in New Issue
Block a user