mirror of
https://github.com/wolfpld/tracy.git
synced 2026-06-13 10:49:05 +00:00
Compare commits
91 Commits
slomp/seq-
...
slomp/webg
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4f68dbcbbc | ||
|
|
3c90c2d7f8 | ||
|
|
7e572c55fb | ||
|
|
07bfe3465e | ||
|
|
0544440a34 | ||
|
|
f287508772 | ||
|
|
f622b97436 | ||
|
|
dfded9d55d | ||
|
|
7180ea381f | ||
|
|
0c74658dd3 | ||
|
|
cc091a99a2 | ||
|
|
f89709e99e | ||
|
|
a4c5f15312 | ||
|
|
3455fd9f82 | ||
|
|
cfc046abcd | ||
|
|
9ab39d8af3 | ||
|
|
bfab6d03f4 | ||
|
|
0d848c3042 | ||
|
|
54270d3fd5 | ||
|
|
1341f98c61 | ||
|
|
6fc279eef4 | ||
|
|
66e4f5cef7 | ||
|
|
7637971e9e | ||
|
|
4e3cffc4ba | ||
|
|
28d3a91980 | ||
|
|
3956616fc2 | ||
|
|
0fbb2eaaa4 | ||
|
|
b27dab4584 | ||
|
|
75bee5370f | ||
|
|
e7499458e9 | ||
|
|
d34c45fa5a | ||
|
|
8fe5a511c9 | ||
|
|
afdd2e2f81 | ||
|
|
3c1b1b2f80 | ||
|
|
992134f85e | ||
|
|
37bc986584 | ||
|
|
feb4e7c989 | ||
|
|
4a8fe6f56e | ||
|
|
a960a25285 | ||
|
|
958cb8d7f8 | ||
|
|
59f17794a5 | ||
|
|
3b2c7dbacb | ||
|
|
56ed480ed2 | ||
|
|
0572c86551 | ||
|
|
6499e3383b | ||
|
|
8278ace0c1 | ||
|
|
5981eca141 | ||
|
|
1b2856b885 | ||
|
|
118f18cf4b | ||
|
|
bfbc1d3bee | ||
|
|
831779508f | ||
|
|
286309af3f | ||
|
|
3db70a2237 | ||
|
|
da952f3f38 | ||
|
|
efba4685ef | ||
|
|
598984c45d | ||
|
|
860011c604 | ||
|
|
0cdcbfc75d | ||
|
|
e5d4be95df | ||
|
|
7b3863d93d | ||
|
|
de2a18d964 | ||
|
|
9588912aa9 | ||
|
|
7ee4380f64 | ||
|
|
01e639db97 | ||
|
|
030e699eb5 | ||
|
|
16cdf3d645 | ||
|
|
2f143491eb | ||
|
|
796050ac1e | ||
|
|
31dbfef97d | ||
|
|
19519bbeb0 | ||
|
|
fc4f52e61d | ||
|
|
e2ac8f7973 | ||
|
|
e5aa8eba51 | ||
|
|
7437c41514 | ||
|
|
f441a5070b | ||
|
|
00b6abd67b | ||
|
|
e4e3d75eb8 | ||
|
|
fc5318dcad | ||
|
|
661c664b75 | ||
|
|
6dbebca666 | ||
|
|
73d78ad517 | ||
|
|
e5371d7987 | ||
|
|
9806f35714 | ||
|
|
d40289d594 | ||
|
|
86fbe529ed | ||
|
|
7cb98245ce | ||
|
|
55d5436fb9 | ||
|
|
2b11785b05 | ||
|
|
ebd3d9c3e6 | ||
|
|
bc8d8f5302 | ||
|
|
b049746853 |
2
.github/workflows/linux.yml
vendored
2
.github/workflows/linux.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
|||||||
if [ "${ACT:-}" != "true" ] && [ "${FORGEJO_ACTIONS:-}" != "true" ]; then
|
if [ "${ACT:-}" != "true" ] && [ "${FORGEJO_ACTIONS:-}" != "true" ]; then
|
||||||
cmake --build profiler/build
|
cmake --build profiler/build
|
||||||
else
|
else
|
||||||
cmake --build profiler/build --parallel
|
cmake --build profiler/build --parallel 2
|
||||||
fi
|
fi
|
||||||
- name: Update utility
|
- name: Update utility
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
2
.github/workflows/macos.yml
vendored
2
.github/workflows/macos.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
|||||||
- name: Build profiler
|
- name: Build profiler
|
||||||
run: |
|
run: |
|
||||||
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
||||||
cmake --build profiler/build --parallel --config Release
|
cmake --build profiler/build --parallel 2 --config Release
|
||||||
- name: Build update
|
- name: Build update
|
||||||
run: |
|
run: |
|
||||||
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
||||||
|
|||||||
2
.github/workflows/windows.yml
vendored
2
.github/workflows/windows.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
|||||||
- name: Build profiler
|
- name: Build profiler
|
||||||
run: |
|
run: |
|
||||||
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
||||||
cmake --build profiler/build --parallel --config Release
|
cmake --build profiler/build --parallel 2 --config Release
|
||||||
- name: Build update
|
- name: Build update
|
||||||
run: |
|
run: |
|
||||||
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
||||||
|
|||||||
@@ -137,6 +137,7 @@ set_option(TRACY_SYMBOL_OFFLINE_RESOLVE "Instead of full runtime symbol resoluti
|
|||||||
set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF TracyClient)
|
set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF TracyClient)
|
||||||
set_option(TRACY_DEBUGINFOD "Enable debuginfod support" OFF TracyClient)
|
set_option(TRACY_DEBUGINFOD "Enable debuginfod support" OFF TracyClient)
|
||||||
set_option(TRACY_IGNORE_MEMORY_FAULTS "Ignore instrumentation errors from memory free events that do not have a matching allocation" OFF TracyClient)
|
set_option(TRACY_IGNORE_MEMORY_FAULTS "Ignore instrumentation errors from memory free events that do not have a matching allocation" OFF TracyClient)
|
||||||
|
set_option(TRACY_OPENGL_AUTO_CALIBRATION "Periodically recalibrate OpenGL GPU/CPU clock drift (forces a CPU/GPU sync each time)" OFF TracyClient)
|
||||||
|
|
||||||
# advanced
|
# advanced
|
||||||
set_option(TRACY_VERBOSE "[advanced] Verbose output from the profiler" OFF TracyClient)
|
set_option(TRACY_VERBOSE "[advanced] Verbose output from the profiler" OFF TracyClient)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
|
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
|
||||||
|
|
||||||
Tracy supports profiling CPU (Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/tealsnow/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL, CUDA.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
Tracy supports profiling CPU (Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/tealsnow/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphics/compute APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL, CUDA, WebGPU.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
||||||
|
|
||||||
- [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
|
- [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
|
||||||
- [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
|
- [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
|
||||||
|
|||||||
164
examples/webgpu/triangle/CMakeLists.txt
Normal file
164
examples/webgpu/triangle/CMakeLists.txt
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
# CMakeLists.txt — WebGPU spinning triangle demo
|
||||||
|
#
|
||||||
|
# macOS:
|
||||||
|
# clang++ -std=c++17 -ObjC++ spinning_triangle.cpp platform/platform_macos.mm \
|
||||||
|
# -I/path/to/wgpu/include -L/path/to/wgpu/lib -lwgpu_native \
|
||||||
|
# -Wl,-rpath,@executable_path \
|
||||||
|
# -framework Cocoa -framework Metal -framework QuartzCore \
|
||||||
|
# -framework Foundation -framework IOKit -framework IOSurface \
|
||||||
|
# -o spinning_triangle
|
||||||
|
#
|
||||||
|
# Windows (MSVC):
|
||||||
|
# cl /std:c++17 spinning_triangle.cpp platform/platform_windows.cpp \
|
||||||
|
# /I\path\to\wgpu\include \path\to\wgpu\lib\wgpu_native.lib \
|
||||||
|
# user32.lib gdi32.lib /Fe:spinning_triangle.exe
|
||||||
|
#
|
||||||
|
# Linux / Wayland:
|
||||||
|
# g++ -std=c++17 spinning_triangle.cpp platform/platform_wayland.cpp \
|
||||||
|
# xdg-shell-protocol.c \
|
||||||
|
# -I/path/to/wgpu/include -L/path/to/wgpu/lib -lwgpu_native \
|
||||||
|
# -lwayland-client -o spinning_triangle
|
||||||
|
|
||||||
|
cmake_minimum_required(VERSION 3.16)
|
||||||
|
project(spinning_triangle LANGUAGES C CXX)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# WebGPU backend — set WGPU_PATH to your wgpu-native or Dawn installation.
|
||||||
|
# The library name differs between backends:
|
||||||
|
# wgpu-native → wgpu_native
|
||||||
|
# Dawn → webgpu_dawn
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
set(WGPU_PATH "" CACHE PATH "Root of the WebGPU native installation (contains include/ and lib/)")
|
||||||
|
set(WGPU_LIB "" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty")
|
||||||
|
|
||||||
|
if(NOT WGPU_PATH)
|
||||||
|
message(FATAL_ERROR "Set WGPU_PATH to the root of your WebGPU native installation.")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# When WGPU_PATH changes, discard any previously auto-detected WGPU_LIB so
|
||||||
|
# detection re-runs against the new path.
|
||||||
|
if(NOT "${WGPU_PATH}" STREQUAL "${_WGPU_PATH_LAST}")
|
||||||
|
unset(WGPU_LIB CACHE)
|
||||||
|
set(WGPU_LIB "" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty")
|
||||||
|
endif()
|
||||||
|
set(_WGPU_PATH_LAST "${WGPU_PATH}" CACHE INTERNAL "")
|
||||||
|
|
||||||
|
if(NOT WGPU_LIB)
|
||||||
|
unset(_WGPU_NATIVE_LIB CACHE)
|
||||||
|
unset(_WEBGPU_DAWN_LIB CACHE)
|
||||||
|
find_library(_WGPU_NATIVE_LIB NAMES wgpu_native wgpu_native.dll PATHS "${WGPU_PATH}/lib" NO_DEFAULT_PATH)
|
||||||
|
find_library(_WEBGPU_DAWN_LIB NAMES webgpu_dawn PATHS "${WGPU_PATH}/lib" NO_DEFAULT_PATH)
|
||||||
|
if(_WGPU_NATIVE_LIB)
|
||||||
|
set(WGPU_LIB "wgpu_native" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty" FORCE)
|
||||||
|
elseif(_WEBGPU_DAWN_LIB)
|
||||||
|
set(WGPU_LIB "webgpu_dawn" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty" FORCE)
|
||||||
|
else()
|
||||||
|
message(FATAL_ERROR "Could not detect a WebGPU library in ${WGPU_PATH}/lib. Set WGPU_LIB explicitly (wgpu_native or webgpu_dawn).")
|
||||||
|
endif()
|
||||||
|
message(STATUS "WebGPU library auto-detected: ${WGPU_LIB}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tracy root — defaults to two directories above this CMakeLists.txt.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
set(TRACY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
|
||||||
|
option(TRACY_ENABLE "Enable Tracy profiling" ON)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# macOS quarantine — pre-built WebGPU binaries downloaded from the internet
|
||||||
|
# carry a com.apple.quarantine extended attribute that prevents dyld from
|
||||||
|
# loading them ("damaged or incomplete" / Gatekeeper block). Strip it once
|
||||||
|
# at configure time so the linker and the runtime loader can both access the
|
||||||
|
# library directory without further user intervention.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
if(APPLE)
|
||||||
|
execute_process(
|
||||||
|
COMMAND xattr -dr com.apple.quarantine "${WGPU_PATH}/lib"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Platform — RGFW (cross-platform windowing, fetched automatically)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
include(FetchContent)
|
||||||
|
FetchContent_Declare(rgfw
|
||||||
|
GIT_REPOSITORY https://github.com/ColleagueRiley/RGFW.git
|
||||||
|
GIT_TAG main # pin to a specific commit for reproducible builds
|
||||||
|
GIT_SHALLOW TRUE
|
||||||
|
)
|
||||||
|
FetchContent_MakeAvailable(rgfw)
|
||||||
|
|
||||||
|
set(PLATFORM_SOURCES platform/platform_rgfw.cpp)
|
||||||
|
set(PLATFORM_INCLUDES ${rgfw_SOURCE_DIR})
|
||||||
|
|
||||||
|
if(APPLE)
|
||||||
|
set(PLATFORM_LIBS
|
||||||
|
"-framework Cocoa"
|
||||||
|
"-framework Metal"
|
||||||
|
"-framework QuartzCore"
|
||||||
|
"-framework Foundation"
|
||||||
|
"-framework IOKit"
|
||||||
|
"-framework IOSurface"
|
||||||
|
)
|
||||||
|
elseif(WIN32)
|
||||||
|
set(PLATFORM_LIBS user32 gdi32)
|
||||||
|
else()
|
||||||
|
find_package(X11 REQUIRED)
|
||||||
|
if(NOT X11_Xrandr_FOUND)
|
||||||
|
message(FATAL_ERROR "Xrandr not found — install libxrandr-dev")
|
||||||
|
endif()
|
||||||
|
set(PLATFORM_LIBS X11::X11 X11::Xrandr)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Target
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
add_executable(spinning_triangle
|
||||||
|
spinning_triangle.cpp
|
||||||
|
"${TRACY_DIR}/public/TracyClient.cpp"
|
||||||
|
${PLATFORM_SOURCES}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Treat TracyClient.cpp as third-party code — suppress all warnings so that
|
||||||
|
# upstream changes don't pollute our build output.
|
||||||
|
if(MSVC)
|
||||||
|
set_source_files_properties("${TRACY_DIR}/public/TracyClient.cpp"
|
||||||
|
PROPERTIES COMPILE_FLAGS "/w"
|
||||||
|
)
|
||||||
|
else()
|
||||||
|
set_source_files_properties("${TRACY_DIR}/public/TracyClient.cpp"
|
||||||
|
PROPERTIES COMPILE_FLAGS "-w"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_compile_features(spinning_triangle PRIVATE cxx_std_17)
|
||||||
|
|
||||||
|
if(TRACY_ENABLE)
|
||||||
|
target_compile_definitions(spinning_triangle PRIVATE TRACY_ENABLE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_include_directories(spinning_triangle PRIVATE
|
||||||
|
"${WGPU_PATH}/include"
|
||||||
|
"${TRACY_DIR}/public"
|
||||||
|
${PLATFORM_INCLUDES}
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_directories(spinning_triangle PRIVATE "${WGPU_PATH}/lib")
|
||||||
|
|
||||||
|
target_link_libraries(spinning_triangle PRIVATE
|
||||||
|
${WGPU_LIB}
|
||||||
|
${PLATFORM_LIBS}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Embed the rpath so the binary finds the WebGPU dylib/so next to itself.
|
||||||
|
if(APPLE)
|
||||||
|
set_target_properties(spinning_triangle PROPERTIES
|
||||||
|
BUILD_RPATH "${WGPU_PATH}/lib"
|
||||||
|
INSTALL_RPATH "@executable_path"
|
||||||
|
)
|
||||||
|
elseif(UNIX)
|
||||||
|
set_target_properties(spinning_triangle PROPERTIES
|
||||||
|
BUILD_RPATH "${WGPU_PATH}/lib"
|
||||||
|
INSTALL_RPATH "$ORIGIN"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
23
examples/webgpu/triangle/platform/platform.h
Normal file
23
examples/webgpu/triangle/platform/platform.h
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
// platform.h — interface between platform-agnostic code and platform backends
|
||||||
|
//
|
||||||
|
// Each platform_*.mm / platform_*.cpp file implements these five functions.
|
||||||
|
// Exactly one backend must be linked into the final binary.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include <webgpu/webgpu.h>
|
||||||
|
|
||||||
|
// Initialize the windowing system and create a window of the given dimensions.
|
||||||
|
// Returns true on success.
|
||||||
|
bool platformInit(int width, int height, const char* title);
|
||||||
|
|
||||||
|
// Create a WebGPU surface backed by the platform window.
|
||||||
|
// Must be called after wgpuCreateInstance() and platformInit().
|
||||||
|
WGPUSurface platformCreateSurface(WGPUInstance instance);
|
||||||
|
|
||||||
|
// Elapsed wall-clock time in seconds since platformInit().
|
||||||
|
double platformGetTime();
|
||||||
|
|
||||||
|
// Enter the platform event/render loop.
|
||||||
|
// Calls render() each frame at ~60 fps.
|
||||||
|
// Calls shutdown() exactly once before returning.
|
||||||
|
void platformRunLoop(void (*render)(), void (*shutdown)());
|
||||||
72
examples/webgpu/triangle/platform/platform_rgfw.cpp
Normal file
72
examples/webgpu/triangle/platform/platform_rgfw.cpp
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
// platform_rgfw.cpp — RGFW windowing backend for the WebGPU example
|
||||||
|
// https://github.com/ColleagueRiley/RGFW
|
||||||
|
|
||||||
|
#include "platform.h" // webgpu/webgpu.h first so RGFW sees WGPUSurface
|
||||||
|
|
||||||
|
#define RGFW_WEBGPU
|
||||||
|
#define RGFW_IMPLEMENTATION
|
||||||
|
#include <RGFW.h>
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
#if defined(__linux__)
|
||||||
|
#include <X11/Xlib.h>
|
||||||
|
static bool platformHasDisplay() {
|
||||||
|
// RGFW workaround: RGFW indiscriminately passes XOpenDisplay(0) unchecked
|
||||||
|
// to X11 functions like XCreateWindow(), which will lead to SIGSEGV.
|
||||||
|
Display* display = XOpenDisplay(0);
|
||||||
|
if (display == nullptr) {
|
||||||
|
fprintf(stderr, "ERROR: failed to open X11 display (is $DISPLAY set?)\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
XCloseDisplay(display);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static bool platformHasDisplay() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static RGFW_window* sWin = nullptr;
|
||||||
|
static std::chrono::steady_clock::time_point sStartTime;
|
||||||
|
|
||||||
|
bool platformInit(int width, int height, const char* title) {
|
||||||
|
if (!platformHasDisplay()) {
|
||||||
|
fprintf(stderr, "ERROR: no display found\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
sWin = RGFW_createWindow(title, 0, 0, width, height, RGFW_windowCenter);
|
||||||
|
if (!sWin) {
|
||||||
|
fprintf(stderr, "ERROR: failed to create window\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
RGFW_window_setExitKey(sWin, RGFW_keyEscape);
|
||||||
|
sStartTime = std::chrono::steady_clock::now();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
WGPUSurface platformCreateSurface(WGPUInstance instance) {
|
||||||
|
return RGFW_window_createSurface_WebGPU(sWin, instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
double platformGetTime() {
|
||||||
|
return std::chrono::duration<double>(
|
||||||
|
std::chrono::steady_clock::now() - sStartTime).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
void platformRunLoop(void (*render)(), void (*shutdown)()) {
|
||||||
|
while (RGFW_window_shouldClose(sWin) == RGFW_FALSE) {
|
||||||
|
RGFW_event event;
|
||||||
|
while (RGFW_window_checkEvent(sWin, &event)) {
|
||||||
|
if (event.type == RGFW_windowClose) goto done;
|
||||||
|
}
|
||||||
|
render();
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
shutdown();
|
||||||
|
RGFW_window_close(sWin);
|
||||||
|
sWin = nullptr;
|
||||||
|
}
|
||||||
352
examples/webgpu/triangle/spinning_triangle.cpp
Normal file
352
examples/webgpu/triangle/spinning_triangle.cpp
Normal file
@@ -0,0 +1,352 @@
|
|||||||
|
// spinning_triangle.cpp — platform-agnostic WebGPU spinning triangle demo.
|
||||||
|
|
||||||
|
#include "platform/platform.h"
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <webgpu/webgpu.h>
|
||||||
|
|
||||||
|
#include <tracy/Tracy.hpp>
|
||||||
|
#include <tracy/TracyWebGPU.hpp>
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Globals
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static const int kWidth = 800;
|
||||||
|
static const int kHeight = 600;
|
||||||
|
|
||||||
|
static WGPUInstance gInstance = nullptr;
|
||||||
|
static WGPUSurface gSurface = nullptr;
|
||||||
|
static WGPUAdapter gAdapter = nullptr;
|
||||||
|
static WGPUDevice gDevice = nullptr;
|
||||||
|
static WGPUQueue gQueue = nullptr;
|
||||||
|
static WGPURenderPipeline gPipeline = nullptr;
|
||||||
|
static WGPUBuffer gUniformBuf = nullptr;
|
||||||
|
static WGPUBindGroup gBindGroup = nullptr;
|
||||||
|
|
||||||
|
static TracyWebGPUCtx gTracyCtx = nullptr;
|
||||||
|
|
||||||
|
static WGPUTextureFormat gSurfaceFormat = WGPUTextureFormat_BGRA8Unorm;
|
||||||
|
|
||||||
|
// TODO: this can become platformError() instead
|
||||||
|
int error(int code, const char* message) {
|
||||||
|
fprintf(stderr, "ERROR: %s (code: %d)\n", message, code);
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// WGSL shader — vertex colours baked in, rotation via a uniform float.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static const char* kShaderSource = R"(
|
||||||
|
struct Uniforms {
|
||||||
|
angle: f32,
|
||||||
|
};
|
||||||
|
@group(0) @binding(0) var<uniform> u: Uniforms;
|
||||||
|
|
||||||
|
struct VSOut {
|
||||||
|
@builtin(position) pos: vec4f,
|
||||||
|
@location(0) color: vec3f,
|
||||||
|
};
|
||||||
|
|
||||||
|
@vertex
|
||||||
|
fn vs_main(@builtin(vertex_index) vi: u32) -> VSOut {
|
||||||
|
var positions = array<vec2f, 3>(
|
||||||
|
vec2f( 0.0, 0.5),
|
||||||
|
vec2f(-0.433, -0.25),
|
||||||
|
vec2f( 0.433, -0.25),
|
||||||
|
);
|
||||||
|
var colors = array<vec3f, 3>(
|
||||||
|
vec3f(1.0, 0.0, 0.0),
|
||||||
|
vec3f(0.0, 1.0, 0.0),
|
||||||
|
vec3f(0.0, 0.0, 1.0),
|
||||||
|
);
|
||||||
|
|
||||||
|
let c = cos(u.angle);
|
||||||
|
let s = sin(u.angle);
|
||||||
|
let p = positions[vi];
|
||||||
|
let rotated = vec2f(p.x * c - p.y * s, p.x * s + p.y * c);
|
||||||
|
|
||||||
|
var out: VSOut;
|
||||||
|
out.pos = vec4f(rotated, 0.0, 1.0);
|
||||||
|
out.color = colors[vi];
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
@fragment
|
||||||
|
fn fs_main(@location(0) color: vec3f) -> @location(0) vec4f {
|
||||||
|
return vec4f(color, 1.0);
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Adapter / Device request callbacks (current wgpu-native API)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static void onAdapterReady(WGPURequestAdapterStatus status,
|
||||||
|
WGPUAdapter adapter,
|
||||||
|
WGPUStringView message,
|
||||||
|
void* userdata1, void* /*userdata2*/) {
|
||||||
|
if (status == WGPURequestAdapterStatus_Success) {
|
||||||
|
*(WGPUAdapter*)userdata1 = adapter;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Adapter request failed: %.*s\n",
|
||||||
|
(int)message.length, message.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void onDeviceReady(WGPURequestDeviceStatus status,
|
||||||
|
WGPUDevice device,
|
||||||
|
WGPUStringView message,
|
||||||
|
void* userdata1, void* /*userdata2*/) {
|
||||||
|
if (status == WGPURequestDeviceStatus_Success) {
|
||||||
|
*(WGPUDevice*)userdata1 = device;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Device request failed: %.*s\n",
|
||||||
|
(int)message.length, message.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// WebGPU init
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static int initWebGPU() {
|
||||||
|
// Adapter
|
||||||
|
WGPURequestAdapterOptions adapterOpts = {};
|
||||||
|
adapterOpts.compatibleSurface = gSurface;
|
||||||
|
|
||||||
|
WGPURequestAdapterCallbackInfo adapterCB = {};
|
||||||
|
adapterCB.mode = WGPUCallbackMode_AllowProcessEvents;
|
||||||
|
adapterCB.callback = onAdapterReady;
|
||||||
|
adapterCB.userdata1 = &gAdapter;
|
||||||
|
wgpuInstanceRequestAdapter(gInstance, &adapterOpts, adapterCB);
|
||||||
|
while (!gAdapter) { wgpuInstanceProcessEvents(gInstance); }
|
||||||
|
if (!gAdapter) return error(11, "No adapter");
|
||||||
|
|
||||||
|
WGPUUncapturedErrorCallbackInfo errorCB = {};
|
||||||
|
errorCB.callback = [](WGPUDevice const*, WGPUErrorType type,
|
||||||
|
WGPUStringView message, void*, void*) {
|
||||||
|
fprintf(stderr, "[WGPU ERROR] type=%d %.*s\n",
|
||||||
|
(int)type, (int)message.length, message.data);
|
||||||
|
};
|
||||||
|
|
||||||
|
WGPUDeviceDescriptor deviceDesc = {};
|
||||||
|
deviceDesc.uncapturedErrorCallbackInfo = errorCB;
|
||||||
|
|
||||||
|
TracyWebGPUSetupDeviceDescriptor(deviceDesc);
|
||||||
|
|
||||||
|
WGPURequestDeviceCallbackInfo deviceCB = {};
|
||||||
|
deviceCB.mode = WGPUCallbackMode_AllowProcessEvents;
|
||||||
|
deviceCB.callback = onDeviceReady;
|
||||||
|
deviceCB.userdata1 = &gDevice;
|
||||||
|
wgpuAdapterRequestDevice(gAdapter, &deviceDesc, deviceCB);
|
||||||
|
while (!gDevice) { wgpuInstanceProcessEvents(gInstance); }
|
||||||
|
if (!gDevice) return error(12, "No device");
|
||||||
|
|
||||||
|
gQueue = wgpuDeviceGetQueue(gDevice);
|
||||||
|
gTracyCtx = TracyWebGPUContext(gInstance, gDevice, gQueue);
|
||||||
|
TracyWebGPUContextName(gTracyCtx, "WebGPU", 6);
|
||||||
|
|
||||||
|
// Configure surface
|
||||||
|
WGPUSurfaceConfiguration config = {};
|
||||||
|
config.device = gDevice;
|
||||||
|
config.format = gSurfaceFormat;
|
||||||
|
config.usage = WGPUTextureUsage_RenderAttachment;
|
||||||
|
config.alphaMode = WGPUCompositeAlphaMode_Opaque;
|
||||||
|
config.width = kWidth;
|
||||||
|
config.height = kHeight;
|
||||||
|
config.presentMode = WGPUPresentMode_Fifo;
|
||||||
|
wgpuSurfaceConfigure(gSurface, &config);
|
||||||
|
|
||||||
|
// Shader module
|
||||||
|
WGPUShaderSourceWGSL wgslSrc = {};
|
||||||
|
wgslSrc.chain.sType = WGPUSType_ShaderSourceWGSL;
|
||||||
|
wgslSrc.code = { kShaderSource, WGPU_STRLEN };
|
||||||
|
|
||||||
|
WGPUShaderModuleDescriptor smDesc = {};
|
||||||
|
smDesc.nextInChain = (WGPUChainedStruct*)&wgslSrc;
|
||||||
|
WGPUShaderModule shaderMod = wgpuDeviceCreateShaderModule(gDevice, &smDesc);
|
||||||
|
|
||||||
|
// Uniform buffer (one f32 for rotation angle)
|
||||||
|
WGPUBufferDescriptor bufDesc = {};
|
||||||
|
bufDesc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
|
||||||
|
bufDesc.size = sizeof(float);
|
||||||
|
gUniformBuf = wgpuDeviceCreateBuffer(gDevice, &bufDesc);
|
||||||
|
|
||||||
|
// Bind group layout + bind group
|
||||||
|
WGPUBindGroupLayoutEntry bglEntry = {};
|
||||||
|
bglEntry.binding = 0;
|
||||||
|
bglEntry.visibility = WGPUShaderStage_Vertex;
|
||||||
|
bglEntry.buffer.type = WGPUBufferBindingType_Uniform;
|
||||||
|
bglEntry.buffer.minBindingSize = sizeof(float);
|
||||||
|
|
||||||
|
WGPUBindGroupLayoutDescriptor bglDesc = {};
|
||||||
|
bglDesc.entryCount = 1;
|
||||||
|
bglDesc.entries = &bglEntry;
|
||||||
|
WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(gDevice, &bglDesc);
|
||||||
|
|
||||||
|
WGPUBindGroupEntry bgEntry = {};
|
||||||
|
bgEntry.binding = 0;
|
||||||
|
bgEntry.buffer = gUniformBuf;
|
||||||
|
bgEntry.size = sizeof(float);
|
||||||
|
|
||||||
|
WGPUBindGroupDescriptor bgDesc = {};
|
||||||
|
bgDesc.layout = bgl;
|
||||||
|
bgDesc.entryCount = 1;
|
||||||
|
bgDesc.entries = &bgEntry;
|
||||||
|
gBindGroup = wgpuDeviceCreateBindGroup(gDevice, &bgDesc);
|
||||||
|
|
||||||
|
// Pipeline layout
|
||||||
|
WGPUPipelineLayoutDescriptor plDesc = {};
|
||||||
|
plDesc.bindGroupLayoutCount = 1;
|
||||||
|
plDesc.bindGroupLayouts = &bgl;
|
||||||
|
WGPUPipelineLayout pipelineLayout = wgpuDeviceCreatePipelineLayout(gDevice, &plDesc);
|
||||||
|
|
||||||
|
// Render pipeline
|
||||||
|
WGPUColorTargetState colorTarget = {};
|
||||||
|
colorTarget.format = gSurfaceFormat;
|
||||||
|
colorTarget.writeMask = WGPUColorWriteMask_All;
|
||||||
|
|
||||||
|
WGPUFragmentState fragState = {};
|
||||||
|
fragState.module = shaderMod;
|
||||||
|
fragState.entryPoint = { "fs_main", WGPU_STRLEN };
|
||||||
|
fragState.targetCount = 1;
|
||||||
|
fragState.targets = &colorTarget;
|
||||||
|
|
||||||
|
WGPURenderPipelineDescriptor rpDesc = {};
|
||||||
|
rpDesc.layout = pipelineLayout;
|
||||||
|
rpDesc.vertex.module = shaderMod;
|
||||||
|
rpDesc.vertex.entryPoint = { "vs_main", WGPU_STRLEN };
|
||||||
|
rpDesc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
|
||||||
|
rpDesc.multisample.count = 1;
|
||||||
|
rpDesc.multisample.mask = 0xFFFFFFFF;
|
||||||
|
rpDesc.fragment = &fragState;
|
||||||
|
|
||||||
|
gPipeline = wgpuDeviceCreateRenderPipeline(gDevice, &rpDesc);
|
||||||
|
|
||||||
|
// Cleanup intermediates
|
||||||
|
wgpuShaderModuleRelease(shaderMod);
|
||||||
|
wgpuPipelineLayoutRelease(pipelineLayout);
|
||||||
|
wgpuBindGroupLayoutRelease(bgl);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Frame rendering
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Returns the surface texture for the current frame, or {.texture=nullptr} on
|
||||||
|
// a skippable condition (timeout, occlusion) or an error.
|
||||||
|
static WGPUSurfaceTexture getWindowSurface() {
|
||||||
|
WGPUSurfaceTexture surfTex = {};
|
||||||
|
wgpuSurfaceGetCurrentTexture(gSurface, &surfTex);
|
||||||
|
if (surfTex.status == WGPUSurfaceGetCurrentTextureStatus_SuccessOptimal ||
|
||||||
|
surfTex.status == WGPUSurfaceGetCurrentTextureStatus_SuccessSuboptimal)
|
||||||
|
return surfTex;
|
||||||
|
|
||||||
|
// Timeout and Occluded are normal OS events (window covered / on a different Space).
|
||||||
|
bool silent = surfTex.status == WGPUSurfaceGetCurrentTextureStatus_Timeout;
|
||||||
|
#ifdef WGPU_H_
|
||||||
|
silent = silent || surfTex.status == (WGPUSurfaceGetCurrentTextureStatus)WGPUSurfaceGetCurrentTextureStatus_Occluded;
|
||||||
|
#endif
|
||||||
|
if (!silent)
|
||||||
|
fprintf(stderr, "Failed to get surface texture (status %d)\n", surfTex.status);
|
||||||
|
if (surfTex.texture) wgpuTextureRelease(surfTex.texture);
|
||||||
|
surfTex.texture = nullptr;
|
||||||
|
return surfTex;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void renderFrame() {
|
||||||
|
ZoneScoped;
|
||||||
|
|
||||||
|
// Update rotation angle
|
||||||
|
float angle = (float)platformGetTime();
|
||||||
|
wgpuQueueWriteBuffer(gQueue, gUniformBuf, 0, &angle, sizeof(float));
|
||||||
|
|
||||||
|
WGPUSurfaceTexture surfTex = getWindowSurface();
|
||||||
|
if (!surfTex.texture) return;
|
||||||
|
|
||||||
|
WGPUTextureView view = wgpuTextureCreateView(surfTex.texture, nullptr);
|
||||||
|
|
||||||
|
// Command encoder
|
||||||
|
WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(gDevice, nullptr);
|
||||||
|
|
||||||
|
// Render pass
|
||||||
|
WGPURenderPassColorAttachment colorAtt = {};
|
||||||
|
colorAtt.view = view;
|
||||||
|
colorAtt.loadOp = WGPULoadOp_Clear;
|
||||||
|
colorAtt.storeOp = WGPUStoreOp_Store;
|
||||||
|
colorAtt.clearValue = { 0.05, 0.05, 0.08, 1.0 };
|
||||||
|
colorAtt.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
|
||||||
|
|
||||||
|
WGPURenderPassDescriptor passDesc = {};
|
||||||
|
passDesc.colorAttachmentCount = 1;
|
||||||
|
passDesc.colorAttachments = &colorAtt;
|
||||||
|
|
||||||
|
{
|
||||||
|
ZoneScopedN("render-pass");
|
||||||
|
TracyWebGPUNamedZone(gTracyCtx, tracyZone, encoder, passDesc, "triangle draw", true);
|
||||||
|
WGPURenderPassEncoder pass = wgpuCommandEncoderBeginRenderPass(encoder, &passDesc);
|
||||||
|
wgpuRenderPassEncoderSetPipeline(pass, gPipeline);
|
||||||
|
wgpuRenderPassEncoderSetBindGroup(pass, 0, gBindGroup, 0, nullptr);
|
||||||
|
wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
|
||||||
|
wgpuRenderPassEncoderEnd(pass);
|
||||||
|
wgpuRenderPassEncoderRelease(pass);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Submit
|
||||||
|
WGPUCommandBuffer cmdBuf = wgpuCommandEncoderFinish(encoder, nullptr);
|
||||||
|
wgpuQueueSubmit(gQueue, 1, &cmdBuf);
|
||||||
|
|
||||||
|
// Present
|
||||||
|
wgpuSurfacePresent(gSurface);
|
||||||
|
|
||||||
|
// Process Events
|
||||||
|
wgpuInstanceProcessEvents(gInstance);
|
||||||
|
TracyWebGPUCollect(gTracyCtx);
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
wgpuCommandBufferRelease(cmdBuf);
|
||||||
|
wgpuCommandEncoderRelease(encoder);
|
||||||
|
wgpuTextureViewRelease(view);
|
||||||
|
wgpuTextureRelease(surfTex.texture);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Shutdown
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static void shutdown() {
|
||||||
|
fprintf(stderr, "application is shutting down...\n");
|
||||||
|
TracyWebGPUDestroy(gTracyCtx);
|
||||||
|
if (gBindGroup) wgpuBindGroupRelease(gBindGroup);
|
||||||
|
if (gUniformBuf) wgpuBufferRelease(gUniformBuf);
|
||||||
|
if (gPipeline) wgpuRenderPipelineRelease(gPipeline);
|
||||||
|
if (gQueue) wgpuQueueRelease(gQueue);
|
||||||
|
if (gDevice) wgpuDeviceRelease(gDevice);
|
||||||
|
if (gAdapter) wgpuAdapterRelease(gAdapter);
|
||||||
|
if (gSurface) wgpuSurfaceRelease(gSurface);
|
||||||
|
if (gInstance) wgpuInstanceRelease(gInstance);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// main
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
if (!platformInit(kWidth, kHeight, "WebGPU Spinning Triangle"))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
gInstance = wgpuCreateInstance(nullptr);
|
||||||
|
if (!gInstance) return error(2, "Failed to create WebGPU instance.");
|
||||||
|
|
||||||
|
gSurface = platformCreateSurface(gInstance);
|
||||||
|
if (!gSurface) return error(3, "Failed to create surface.");
|
||||||
|
|
||||||
|
if (initWebGPU() != 0) return 4;
|
||||||
|
|
||||||
|
platformRunLoop(renderFrame, shutdown);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -3,3 +3,151 @@ function Link(el)
|
|||||||
el.attributes['reference'] = nil
|
el.attributes['reference'] = nil
|
||||||
return el
|
return el
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Drop Div wrappers (e.g. table/titlepage containers), keeping their content.
|
||||||
|
function Div(el)
|
||||||
|
return el.content
|
||||||
|
end
|
||||||
|
|
||||||
|
-- ---------------------------------------------------------------------------
|
||||||
|
-- LaTeX math -> plain-text approximation.
|
||||||
|
--
|
||||||
|
-- The target Markdown renderer has no math support, so a raw "$\frac{1}{2}$"
|
||||||
|
-- would show verbatim. We turn each math node into the closest Unicode/ASCII
|
||||||
|
-- equivalent: fractions become "a/b", \times becomes "x", super/subscripts use
|
||||||
|
-- Unicode digits, and the one multi-line display equation becomes a fenced
|
||||||
|
-- code block (Markdown collapses plain newlines, a code block keeps them).
|
||||||
|
-- ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
local sup = {['0']='⁰',['1']='¹',['2']='²',['3']='³',['4']='⁴',['5']='⁵',
|
||||||
|
['6']='⁶',['7']='⁷',['8']='⁸',['9']='⁹',['+']='⁺',['-']='⁻',
|
||||||
|
['=']='⁼',['(']='⁽',[')']='⁾'}
|
||||||
|
local sub = {['0']='₀',['1']='₁',['2']='₂',['3']='₃',['4']='₄',['5']='₅',
|
||||||
|
['6']='₆',['7']='₇',['8']='₈',['9']='₉',['+']='₊',['-']='₋',
|
||||||
|
['=']='₌',['(']='₍',[')']='₎'}
|
||||||
|
|
||||||
|
-- Symbol replacements, applied as literal substitutions. Longer commands must
|
||||||
|
-- precede those that are a prefix of them (e.g. \rightarrow before \right).
|
||||||
|
local symbols = {
|
||||||
|
{'\\leftrightarrow','↔'}, {'\\rightarrow','→'}, {'\\leftarrow','←'},
|
||||||
|
{'\\Rightarrow','⇒'}, {'\\Leftarrow','⇐'}, {'\\to','→'}, {'\\mapsto','↦'},
|
||||||
|
{'\\times','×'}, {'\\cdot','·'}, {'\\div','÷'}, {'\\ast','*'}, {'\\star','*'},
|
||||||
|
{'\\leq','≤'}, {'\\geq','≥'}, {'\\neq','≠'}, {'\\approx','≈'}, {'\\equiv','≡'},
|
||||||
|
{'\\ll','«'}, {'\\gg','»'}, {'\\le','≤'}, {'\\ge','≥'},
|
||||||
|
{'\\ldots','…'}, {'\\cdots','…'}, {'\\dots','…'}, {'\\infty','∞'},
|
||||||
|
{'\\pm','±'}, {'\\mp','∓'}, {'\\propto','∝'}, {'\\sum','Σ'}, {'\\prod','Π'},
|
||||||
|
{'\\alpha','α'}, {'\\beta','β'}, {'\\gamma','γ'}, {'\\delta','δ'}, {'\\Delta','Δ'},
|
||||||
|
{'\\mu','µ'}, {'\\sigma','σ'}, {'\\pi','π'}, {'\\lambda','λ'}, {'\\theta','θ'},
|
||||||
|
{'\\left',''}, {'\\right',''},
|
||||||
|
{'\\qquad',' '}, {'\\quad',' '}, {'\\,',' '}, {'\\;',' '}, {'\\:',' '},
|
||||||
|
{'\\ ',' '}, {'\\!',''},
|
||||||
|
{'\\%','%'}, {'\\#','#'}, {'\\&','&'}, {'\\_','_'}, {'\\{','{'}, {'\\}','}'},
|
||||||
|
{'\\$','$'},
|
||||||
|
}
|
||||||
|
|
||||||
|
-- Literal (non-pattern) string replacement; avoids Lua pattern magic in keys.
|
||||||
|
local function lit_replace(s, a, b)
|
||||||
|
local out, i = {}, 1
|
||||||
|
while true do
|
||||||
|
local p = s:find(a, i, true)
|
||||||
|
if not p then out[#out + 1] = s:sub(i); break end
|
||||||
|
out[#out + 1] = s:sub(i, p - 1)
|
||||||
|
out[#out + 1] = b
|
||||||
|
i = p + #a
|
||||||
|
end
|
||||||
|
return table.concat(out)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Strip the outer braces of a "%b{}" capture.
|
||||||
|
local function grp(b) return b:sub(2, #b - 1) end
|
||||||
|
|
||||||
|
-- Map a string to Unicode super/subscript, or nil if any char is unsupported.
|
||||||
|
local function map_script(txt, map)
|
||||||
|
local res = {}
|
||||||
|
for i = 1, #txt do
|
||||||
|
local c = txt:sub(i, i)
|
||||||
|
if not map[c] then return nil end
|
||||||
|
res[#res + 1] = map[c]
|
||||||
|
end
|
||||||
|
return table.concat(res)
|
||||||
|
end
|
||||||
|
|
||||||
|
local function convert(s)
|
||||||
|
-- Text/font wrappers: keep the content, recurse to handle nesting.
|
||||||
|
for _, cmd in ipairs({'text', 'mathrm', 'mathit', 'mathbf', 'mathbb',
|
||||||
|
'mathsf', 'mathtt', 'mathcal', 'operatorname',
|
||||||
|
'textbf', 'textit', 'textrm'}) do
|
||||||
|
s = s:gsub('\\' .. cmd .. '(%b{})', function(b) return convert(grp(b)) end)
|
||||||
|
end
|
||||||
|
-- Fractions -> "num/den" (spaced when either side has spaces).
|
||||||
|
local function frac(a, b)
|
||||||
|
local n, d = convert(grp(a)), convert(grp(b))
|
||||||
|
local sep = (n:find(' ', 1, true) or d:find(' ', 1, true)) and ' / ' or '/'
|
||||||
|
return n .. sep .. d
|
||||||
|
end
|
||||||
|
s = s:gsub('\\frac(%b{})(%b{})', frac)
|
||||||
|
s = s:gsub('\\dfrac(%b{})(%b{})', frac)
|
||||||
|
s = s:gsub('\\tfrac(%b{})(%b{})', frac)
|
||||||
|
s = s:gsub('\\sfrac(%b{})(%b{})', frac)
|
||||||
|
-- Roots.
|
||||||
|
s = s:gsub('\\sqrt(%b{})', function(b) return '√(' .. convert(grp(b)) .. ')' end)
|
||||||
|
-- Single-char scripts first, so the braced fallback (e.g. "_native") below
|
||||||
|
-- is not re-scanned and mangled into Unicode subscripts.
|
||||||
|
s = s:gsub('%^([%w])', function(c) return sup[c] or ('^' .. c) end)
|
||||||
|
s = s:gsub('_([%w])', function(c) return sub[c] or ('_' .. c) end)
|
||||||
|
-- Braced scripts: Unicode when the content is all digits/signs, else keep
|
||||||
|
-- a readable "^(...)" / "_..." form.
|
||||||
|
s = s:gsub('%^(%b{})', function(b)
|
||||||
|
local inner = convert(grp(b))
|
||||||
|
return map_script(inner, sup) or ('^(' .. inner .. ')')
|
||||||
|
end)
|
||||||
|
s = s:gsub('_(%b{})', function(b)
|
||||||
|
local inner = convert(grp(b))
|
||||||
|
return map_script(inner, sub) or ('_' .. inner)
|
||||||
|
end)
|
||||||
|
-- Remaining symbols.
|
||||||
|
for _, pair in ipairs(symbols) do s = lit_replace(s, pair[1], pair[2]) end
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Convert a display equation, preserving its line structure for a code block.
|
||||||
|
local function convert_display(s)
|
||||||
|
s = convert(s)
|
||||||
|
for _, env in ipairs({'cases', 'aligned', 'align', 'array', 'matrix',
|
||||||
|
'gathered', 'split'}) do
|
||||||
|
s = lit_replace(s, '\\begin{' .. env .. '}', '')
|
||||||
|
s = lit_replace(s, '\\end{' .. env .. '}', '')
|
||||||
|
end
|
||||||
|
s = lit_replace(s, '\\\\', '\n') -- row break
|
||||||
|
s = s:gsub('%s*&%s*', ' ') -- column separator -> spacing
|
||||||
|
local lines = {}
|
||||||
|
for line in (s .. '\n'):gmatch('(.-)\n') do
|
||||||
|
line = line:gsub('^%s+', ''):gsub('%s+$', '')
|
||||||
|
if line ~= '' then lines[#lines + 1] = line end
|
||||||
|
end
|
||||||
|
for i = 2, #lines do lines[i] = ' ' .. lines[i] end -- indent continuations
|
||||||
|
return table.concat(lines, '\n')
|
||||||
|
end
|
||||||
|
|
||||||
|
function Math(el)
|
||||||
|
if el.mathtype == 'DisplayMath' then
|
||||||
|
return el -- handled at block level by Para, to emit a code block
|
||||||
|
end
|
||||||
|
return pandoc.Str(convert(el.text))
|
||||||
|
end
|
||||||
|
|
||||||
|
-- A paragraph that is solely a display equation becomes a fenced code block.
|
||||||
|
function Para(el)
|
||||||
|
local maths, only_math = {}, true
|
||||||
|
for _, x in ipairs(el.content) do
|
||||||
|
if x.t == 'Math' and x.mathtype == 'DisplayMath' then
|
||||||
|
maths[#maths + 1] = x
|
||||||
|
elseif x.t ~= 'Space' and x.t ~= 'SoftBreak' and x.t ~= 'LineBreak' then
|
||||||
|
only_math = false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if #maths == 0 or not only_math then return nil end
|
||||||
|
local parts = {}
|
||||||
|
for _, m in ipairs(maths) do parts[#parts + 1] = convert_display(m.text) end
|
||||||
|
return pandoc.CodeBlock(table.concat(parts, '\n\n'))
|
||||||
|
end
|
||||||
|
|||||||
@@ -7,12 +7,18 @@ sed -i -e 's@\\ctrl@Ctrl@g' _tmp.tex
|
|||||||
sed -i -e 's@\\shift@Shift@g' _tmp.tex
|
sed -i -e 's@\\shift@Shift@g' _tmp.tex
|
||||||
sed -i -e 's@\\Alt@Alt@g' _tmp.tex
|
sed -i -e 's@\\Alt@Alt@g' _tmp.tex
|
||||||
sed -i -e 's@\\del@Delete@g' _tmp.tex
|
sed -i -e 's@\\del@Delete@g' _tmp.tex
|
||||||
python3 fa-icons.py ../profiler/src/profiler/IconsFontAwesome6.h _tmp.tex
|
python3 fa-icons.py ../profiler/src/profiler/IconsFontAwesome7.h _tmp.tex
|
||||||
sed -i -e 's@\\LMB{}~@@g' _tmp.tex
|
sed -i -e 's@\\LMB{}~@@g' _tmp.tex
|
||||||
sed -i -e 's@\\MMB{}~@@g' _tmp.tex
|
sed -i -e 's@\\MMB{}~@@g' _tmp.tex
|
||||||
sed -i -e 's@\\RMB{}~@@g' _tmp.tex
|
sed -i -e 's@\\RMB{}~@@g' _tmp.tex
|
||||||
sed -i -e 's@\\Scroll{}~@@g' _tmp.tex
|
sed -i -e 's@\\Scroll{}~@@g' _tmp.tex
|
||||||
|
|
||||||
|
# Resolve \circled{} markers and lstlisting escapeinside (@...@) snippets, which
|
||||||
|
# pandoc would otherwise emit verbatim or drop, to their Unicode equivalents.
|
||||||
|
sed -i -e 's|@\\circled{a}@|(a)|g' -e 's|@\\circled{b}@|(b)|g' -e 's|@\\circled{c}@|(c)|g' _tmp.tex
|
||||||
|
sed -i -e 's|\\circled{a}|(a)|g' -e 's|\\circled{b}|(b)|g' -e 's|\\circled{c}|(c)|g' _tmp.tex
|
||||||
|
sed -i -e 's|@\\ldots@|…|g' _tmp.tex
|
||||||
|
|
||||||
sed -i -e 's@\\nameref{quicklook}@A quick look at Tracy Profiler@g' _tmp.tex
|
sed -i -e 's@\\nameref{quicklook}@A quick look at Tracy Profiler@g' _tmp.tex
|
||||||
sed -i -e 's@\\nameref{firststeps}@First steps@g' _tmp.tex
|
sed -i -e 's@\\nameref{firststeps}@First steps@g' _tmp.tex
|
||||||
sed -i -e 's@\\nameref{client}@Client markup@g' _tmp.tex
|
sed -i -e 's@\\nameref{client}@Client markup@g' _tmp.tex
|
||||||
@@ -26,7 +32,10 @@ sed -i -e 's@\\nameref{configurationfiles}@Configuration files@g' _tmp.tex
|
|||||||
awk -f bclogo2quote.awk _tmp.tex > _tmp_quoted.tex
|
awk -f bclogo2quote.awk _tmp.tex > _tmp_quoted.tex
|
||||||
mv _tmp_quoted.tex _tmp.tex
|
mv _tmp_quoted.tex _tmp.tex
|
||||||
|
|
||||||
pandoc --wrap=none --reference-location=block --number-sections -L filter.lua -s _tmp.tex -o tracy.md
|
pandoc --wrap=none --reference-location=block --number-sections -L filter.lua -t 'markdown-simple_tables-multiline_tables-grid_tables+pipe_tables' -s _tmp.tex -o tracy.md
|
||||||
|
|
||||||
|
awk -f tablecaption.awk tracy.md > _tmp_caption.md
|
||||||
|
mv _tmp_caption.md tracy.md
|
||||||
|
|
||||||
sed -i -e 's/^> \*\*IMPORTANT:\([^*]*\)\*\*/> [!IMPORTANT]\
|
sed -i -e 's/^> \*\*IMPORTANT:\([^*]*\)\*\*/> [!IMPORTANT]\
|
||||||
> **\1**/' tracy.md
|
> **\1**/' tracy.md
|
||||||
@@ -37,6 +46,6 @@ sed -i -e 's/^> \*\*CAUTION:\([^*]*\)\*\*/> [!CAUTION]\
|
|||||||
sed -i -e 's/^> \*\*NOTE:\([^*]*\)\*\*/> [!NOTE]\
|
sed -i -e 's/^> \*\*NOTE:\([^*]*\)\*\*/> [!NOTE]\
|
||||||
> **\1**/' tracy.md
|
> **\1**/' tracy.md
|
||||||
|
|
||||||
python3 icon-explain.py ../profiler/src/profiler/IconsFontAwesome6.h tracy.md
|
python3 icon-explain.py ../profiler/src/profiler/IconsFontAwesome7.h tracy.md
|
||||||
|
|
||||||
rm -f _tmp.tex
|
rm -f _tmp.tex
|
||||||
|
|||||||
16
manual/tablecaption.awk
Normal file
16
manual/tablecaption.awk
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Pandoc emits table captions as a line beginning with ": ", which GitHub
|
||||||
|
# renders literally instead of as a caption. Strip the marker and italicize
|
||||||
|
# the caption instead. Captions may span several physical lines when they
|
||||||
|
# contain a hard line break (a trailing backslash). Underscores are used for
|
||||||
|
# the emphasis so captions that already contain "*...*" markup are left intact.
|
||||||
|
!incap && /^: / {
|
||||||
|
incap = 1
|
||||||
|
$0 = "_" substr($0, 3)
|
||||||
|
}
|
||||||
|
incap && !/\\$/ {
|
||||||
|
print $0 "_"
|
||||||
|
incap = 0
|
||||||
|
next
|
||||||
|
}
|
||||||
|
incap { print; next }
|
||||||
|
{ print }
|
||||||
447
manual/tracy.md
447
manual/tracy.md
@@ -3,7 +3,6 @@ bibliography:
|
|||||||
- tracy.bib
|
- tracy.bib
|
||||||
---
|
---
|
||||||
|
|
||||||
::: titlepage
|
|
||||||
Tracy Profiler
|
Tracy Profiler
|
||||||
|
|
||||||
The user manual
|
The user manual
|
||||||
@@ -12,8 +11,7 @@ The user manual
|
|||||||
|
|
||||||
**Bartosz Taudul** [\<wolf@nereid.pl\>](mailto:wolf@nereid.pl)
|
**Bartosz Taudul** [\<wolf@nereid.pl\>](mailto:wolf@nereid.pl)
|
||||||
|
|
||||||
2026-06-05 <https://github.com/wolfpld/tracy>
|
2026-06-09 <https://github.com/wolfpld/tracy>
|
||||||
:::
|
|
||||||
|
|
||||||
# Quick overview {#quick-overview .unnumbered}
|
# Quick overview {#quick-overview .unnumbered}
|
||||||
|
|
||||||
@@ -95,11 +93,11 @@ The concept of Tracy being a real-time profiler may be explained in a couple of
|
|||||||
|
|
||||||
It is hard to imagine how long a nanosecond is. One good analogy is to compare it with a measure of length. Let's say that one second is one meter (the average doorknob is at the height of one meter).
|
It is hard to imagine how long a nanosecond is. One good analogy is to compare it with a measure of length. Let's say that one second is one meter (the average doorknob is at the height of one meter).
|
||||||
|
|
||||||
One millisecond ($\frac{1}{1000}$ of a second) would be then the length of a millimeter. The average size of a red ant or the width of a pencil is 5 or 6 mm. A modern game running at 60 frames per second has only 16 ms to update the game world and render the entire scene.
|
One millisecond (1/1000 of a second) would be then the length of a millimeter. The average size of a red ant or the width of a pencil is 5 or 6 mm. A modern game running at 60 frames per second has only 16 ms to update the game world and render the entire scene.
|
||||||
|
|
||||||
One microsecond ($\frac{1}{1000}$ of a millisecond) in our comparison equals one micron. The diameter of a typical bacterium ranges from 1 to 10 microns. The diameter of a red blood cell or width of a strand of spider web silk is about 7 μm.
|
One microsecond (1/1000 of a millisecond) in our comparison equals one micron. The diameter of a typical bacterium ranges from 1 to 10 microns. The diameter of a red blood cell or width of a strand of spider web silk is about 7 μm.
|
||||||
|
|
||||||
And finally, one nanosecond ($\frac{1}{1000}$ of a microsecond) would be one nanometer. The modern microprocessor transistor gate, the width of the DNA helix, or the thickness of a cell membrane are in the range of 5 nm. In one ns the light can travel only 30 cm.
|
And finally, one nanosecond (1/1000 of a microsecond) would be one nanometer. The modern microprocessor transistor gate, the width of the DNA helix, or the thickness of a cell membrane are in the range of 5 nm. In one ns the light can travel only 30 cm.
|
||||||
|
|
||||||
Tracy can achieve single-digit nanosecond measurement resolution due to usage of hardware timing mechanisms on the x86 and ARM architectures[^4]. Other profilers may rely on the timers provided by the operating system, which do have significantly reduced resolution (about 300 ns -- 1 μs). This is enough to hide the subtle impact of cache access optimization, etc.
|
Tracy can achieve single-digit nanosecond measurement resolution due to usage of hardware timing mechanisms on the x86 and ARM architectures[^4]. Other profilers may rely on the timers provided by the operating system, which do have significantly reduced resolution (about 300 ns -- 1 μs). This is enough to hide the subtle impact of cache access optimization, etc.
|
||||||
|
|
||||||
@@ -115,7 +113,7 @@ It is wrong to think so. Optimizing a function to execute in 430 ns, instead of
|
|||||||
|
|
||||||
[^6]: This is a real optimization case. The values are median function run times and do not reflect the real execution time, which explains the discrepancy in the total reported time.
|
[^6]: This is a real optimization case. The values are median function run times and do not reflect the real execution time, which explains the discrepancy in the total reported time.
|
||||||
|
|
||||||
You also need to understand how timer precision is reflected in measurement errors. Take a look at figure [1](#timer). There you can see three discrete timer tick events, which increase the value reported by the timer by 300 ns. You can also see four readings of time ranges, marked $A_1$, $A_2$; $B_1$, $B_2$; $C_1$, $C_2$ and $D_1$, $D_2$.
|
You also need to understand how timer precision is reflected in measurement errors. Take a look at figure [1](#timer). There you can see three discrete timer tick events, which increase the value reported by the timer by 300 ns. You can also see four readings of time ranges, marked A₁, A₂; B₁, B₂; C₁, C₂ and D₁, D₂.
|
||||||
|
|
||||||
<figure id="timer" data-latex-placement="h">
|
<figure id="timer" data-latex-placement="h">
|
||||||
|
|
||||||
@@ -124,11 +122,11 @@ You also need to understand how timer precision is reflected in measurement erro
|
|||||||
|
|
||||||
Now let's take a look at the timer readings.
|
Now let's take a look at the timer readings.
|
||||||
|
|
||||||
- The $A$ and $D$ ranges both take a very short amount of time (10 ns), but the $A$ range is reported as 300 ns, and the $D$ range is reported as 0 ns.
|
- The A and D ranges both take a very short amount of time (10 ns), but the A range is reported as 300 ns, and the D range is reported as 0 ns.
|
||||||
|
|
||||||
- The $B$ range takes a considerable amount of time (590 ns), but according to the timer readings, it took the same time (300 ns) as the short lived $A$ range.
|
- The B range takes a considerable amount of time (590 ns), but according to the timer readings, it took the same time (300 ns) as the short lived A range.
|
||||||
|
|
||||||
- The $C$ range (610 ns) is only 20 ns longer than the $B$ range, but it is reported as 900 ns, a 600 ns difference!
|
- The C range (610 ns) is only 20 ns longer than the B range, but it is reported as 900 ns, a 600 ns difference!
|
||||||
|
|
||||||
Here, you can see why using a high-precision timer is essential. While there is no escape from the measurement errors, a profiler can reduce their impact by increasing the timer accuracy.
|
Here, you can see why using a high-precision timer is essential. While there is no escape from the measurement errors, a profiler can reduce their impact by increasing the timer accuracy.
|
||||||
|
|
||||||
@@ -190,20 +188,18 @@ You may wonder why you should use Tracy when so many other profilers are availab
|
|||||||
|
|
||||||
## Performance impact {#perfimpact}
|
## Performance impact {#perfimpact}
|
||||||
|
|
||||||
Let's profile an example application to check how much slowdown is introduced by using Tracy. For this purpose we have used etcpak[^10]. The input data was a $16384 \times 16384$ pixels test image, and the $4 \times 4$ pixel block compression function was selected to be instrumented. The image was compressed on 12 parallel threads, and the timing data represents a mean compression time of a single image.
|
Let's profile an example application to check how much slowdown is introduced by using Tracy. For this purpose we have used etcpak[^10]. The input data was a 16384 × 16384 pixels test image, and the 4 × 4 pixel block compression function was selected to be instrumented. The image was compressed on 12 parallel threads, and the timing data represents a mean compression time of a single image.
|
||||||
|
|
||||||
[^10]: <https://github.com/wolfpld/etcpak>
|
[^10]: <https://github.com/wolfpld/etcpak>
|
||||||
|
|
||||||
The results are presented in table [1](#PerformanceImpact). Dividing the average of run time differences (37.7 ms) by the count of captured zones per single image (16777216) shows us that the impact of profiling is only 2.25 ns per zone (this includes two events: start and end of a zone).
|
The results are presented in table [1](#PerformanceImpact). Dividing the average of run time differences (37.7 ms) by the count of captured zones per single image (16777216) shows us that the impact of profiling is only 2.25 ns per zone (this includes two events: start and end of a zone).
|
||||||
|
|
||||||
::: {#PerformanceImpact}
|
| **Mode** | **Zones (total)** | **Zones (single image)** | **Clean run** | **Profiling run** | **Difference** |
|
||||||
**Mode** **Zones (total)** **Zones (single image)** **Clean run** **Profiling run** **Difference**
|
|:--:|:--:|:--:|:--:|:--:|:--:|
|
||||||
---------- ------------------- -------------------------- --------------- ------------------- ----------------
|
| ETC1 | 201326592 | 16777216 | 110.9 ms | 148.2 ms | +37.3 ms |
|
||||||
ETC1 201326592 16777216 110.9 ms 148.2 ms +37.3 ms
|
| ETC2 | 201326592 | 16777216 | 212.4 ms | 250.5 ms | +38.1 ms |
|
||||||
ETC2 201326592 16777216 212.4 ms 250.5 ms +38.1 ms
|
|
||||||
|
|
||||||
: Zone capture time cost.
|
_Zone capture time cost._
|
||||||
:::
|
|
||||||
|
|
||||||
### Assembly analysis
|
### Assembly analysis
|
||||||
|
|
||||||
@@ -401,7 +397,7 @@ Here's a sample command to set up a build directory with profiling enabled. The
|
|||||||
|
|
||||||
### Short-lived applications
|
### Short-lived applications
|
||||||
|
|
||||||
In case you want to profile a short-lived program (for example, a compression utility that finishes its work in one second), set the `TRACY_NO_EXIT` environment variable to $1$. With this option enabled, Tracy will not exit until an incoming connection is made, even if the application has already finished executing. If your platform doesn't support an easy setup of environment variables, you may also add the `TRACY_NO_EXIT` define to your build configuration, which has the same effect.
|
In case you want to profile a short-lived program (for example, a compression utility that finishes its work in one second), set the `TRACY_NO_EXIT` environment variable to 1. With this option enabled, Tracy will not exit until an incoming connection is made, even if the application has already finished executing. If your platform doesn't support an easy setup of environment variables, you may also add the `TRACY_NO_EXIT` define to your build configuration, which has the same effect.
|
||||||
|
|
||||||
### On-demand profiling {#ondemand}
|
### On-demand profiling {#ondemand}
|
||||||
|
|
||||||
@@ -426,11 +422,11 @@ The program name that is sent out in the broadcast messages can be customized by
|
|||||||
|
|
||||||
### Client network interface
|
### Client network interface
|
||||||
|
|
||||||
By default, the Tracy client will listen on all network interfaces. If you want to restrict it to only listening on the localhost interface, define the `TRACY_ONLY_LOCALHOST` macro at compile-time, or set the `TRACY_ONLY_LOCALHOST` environment variable to $1$ at runtime.
|
By default, the Tracy client will listen on all network interfaces. If you want to restrict it to only listening on the localhost interface, define the `TRACY_ONLY_LOCALHOST` macro at compile-time, or set the `TRACY_ONLY_LOCALHOST` environment variable to 1 at runtime.
|
||||||
|
|
||||||
If you need to use a specific Tracy client address, such as QNX requires, define the `TRACY_CLIENT_ADDRESS` macro at compile-time as the desired string address.
|
If you need to use a specific Tracy client address, such as QNX requires, define the `TRACY_CLIENT_ADDRESS` macro at compile-time as the desired string address.
|
||||||
|
|
||||||
By default, the Tracy client will listen on IPv6 interfaces, falling back to IPv4 only if IPv6 is unavailable. If you want to restrict it to only listening on IPv4 interfaces, define the `TRACY_ONLY_IPV4` macro at compile-time, or set the `TRACY_ONLY_IPV4` environment variable to $1$ at runtime.
|
By default, the Tracy client will listen on IPv6 interfaces, falling back to IPv4 only if IPv6 is unavailable. If you want to restrict it to only listening on IPv4 interfaces, define the `TRACY_ONLY_IPV4` macro at compile-time, or set the `TRACY_ONLY_IPV4` environment variable to 1 at runtime.
|
||||||
|
|
||||||
### Setup for multi-DLL projects
|
### Setup for multi-DLL projects
|
||||||
|
|
||||||
@@ -522,15 +518,13 @@ The best way to run Tracy is on bare metal. Avoid profiling applications in virt
|
|||||||
|
|
||||||
Additionally, you can rebuild your application with the `TRACY_DISALLOW_HW_TIMER` define, which will disable usage of the hardware timer, even if it *appears* to be available. See table [2](#timeroptions) for details.
|
Additionally, you can rebuild your application with the `TRACY_DISALLOW_HW_TIMER` define, which will disable usage of the hardware timer, even if it *appears* to be available. See table [2](#timeroptions) for details.
|
||||||
|
|
||||||
::: {#timeroptions}
|
| **Scenario** | **HW timer** | **Fallback timer** |
|
||||||
**Scenario** **HW timer** **Fallback timer**
|
|:--:|:--:|:--:|
|
||||||
---------------------------------------------------- -------------- -----------------------
|
| Neither defined | Used | Not compiled in |
|
||||||
Neither defined Used Not compiled in
|
| Only `TRACY_TIMER_FALLBACK` | Used | Compiled in as backup |
|
||||||
Only `TRACY_TIMER_FALLBACK` Used Compiled in as backup
|
| `TRACY_DISALLOW_HW_TIMER` + `TRACY_TIMER_FALLBACK` | Disabled | Used |
|
||||||
`TRACY_DISALLOW_HW_TIMER` + `TRACY_TIMER_FALLBACK` Disabled Used
|
|
||||||
|
|
||||||
: Timer options interaction
|
_Timer options interaction_
|
||||||
:::
|
|
||||||
|
|
||||||
#### Docker on Linux
|
#### Docker on Linux
|
||||||
|
|
||||||
@@ -558,13 +552,13 @@ Inside that header, enable any subset of the hooks you need by defining the corr
|
|||||||
|
|
||||||
The available hooks are:
|
The available hooks are:
|
||||||
|
|
||||||
- `TRACY_HAS_CUSTOM_THREAD_ID` $\rightarrow$ `tracy::PlatformGetThreadId()`. Required.
|
- `TRACY_HAS_CUSTOM_THREAD_ID` → `tracy::PlatformGetThreadId()`. Required.
|
||||||
|
|
||||||
- `TRACY_HAS_CUSTOM_USER_INFO` $\rightarrow$ `tracy::PlatformGetHostname()`, `tracy::PlatformGetUserLogin()`, `tracy::PlatformGetUserFullName()`.
|
- `TRACY_HAS_CUSTOM_USER_INFO` → `tracy::PlatformGetHostname()`, `tracy::PlatformGetUserLogin()`, `tracy::PlatformGetUserFullName()`.
|
||||||
|
|
||||||
- `TRACY_HAS_CUSTOM_SAFE_COPY` $\rightarrow$ `tracy::PlatformSafeMemcpy()`.
|
- `TRACY_HAS_CUSTOM_SAFE_COPY` → `tracy::PlatformSafeMemcpy()`.
|
||||||
|
|
||||||
- `TRACY_HAS_CUSTOM_ALLOCATOR` $\rightarrow$ `tracy::PlatformMalloc()`, `tracy::PlatformFree()`, `tracy::PlatformRealloc()`, `tracy::PlatformAllocatorInit()`, `tracy::PlatformAllocatorThreadInit()`, `tracy::PlatformAllocatorFinalize()`, `tracy::PlatformAllocatorThreadFinalize()`.
|
- `TRACY_HAS_CUSTOM_ALLOCATOR` → `tracy::PlatformMalloc()`, `tracy::PlatformFree()`, `tracy::PlatformRealloc()`, `tracy::PlatformAllocatorInit()`, `tracy::PlatformAllocatorThreadInit()`, `tracy::PlatformAllocatorFinalize()`, `tracy::PlatformAllocatorThreadFinalize()`.
|
||||||
|
|
||||||
Template files are provided in the repository ( `examples/CustomPlatform/CustomPlatform(.h|.cpp)` ). See `CustomPlatform.h` for the contract each `Platform*` function must satisfy (return values, threading guarantees, and footguns to avoid). Copy these files into your project, fill in the bodies for the hooks you enable, and point Tracy at the header.
|
Template files are provided in the repository ( `examples/CustomPlatform/CustomPlatform(.h|.cpp)` ). See `CustomPlatform.h` for the contract each `Platform*` function must satisfy (return values, threading guarantees, and footguns to avoid). Copy these files into your project, fill in the bodies for the hooks you enable, and point Tracy at the header.
|
||||||
|
|
||||||
@@ -604,11 +598,11 @@ When using Tracy Profiler, keep in mind the following requirements:
|
|||||||
|
|
||||||
- If there are recursive zones at any point in a zone stack, each unique zone source location should not appear more than 255 times.
|
- If there are recursive zones at any point in a zone stack, each unique zone source location should not appear more than 255 times.
|
||||||
|
|
||||||
- Profiling session cannot be longer than 1.6 days ($2^{47}$ ns). This also includes on-demand sessions.
|
- Profiling session cannot be longer than 1.6 days (2⁴⁷ ns). This also includes on-demand sessions.
|
||||||
|
|
||||||
- No more than 4 billion ($2^{32}$) memory free events may be recorded.
|
- No more than 4 billion (2³²) memory free events may be recorded.
|
||||||
|
|
||||||
- No more than 16 million ($2^{24}$) unique call stacks can be captured.
|
- No more than 16 million (2²⁴) unique call stacks can be captured.
|
||||||
|
|
||||||
[^18]: A source location is a place in the code, which is identified by source file name and line number, for example, when you markup a zone.
|
[^18]: A source location is a place in the code, which is identified by source file name and line number, for example, when you markup a zone.
|
||||||
|
|
||||||
@@ -900,31 +894,29 @@ This is an automatic process, and it doesn't require user interaction. If you ar
|
|||||||
|
|
||||||
Some features of the profiler are only available on selected platforms. Please refer to table [3](#featuretable) for details.
|
Some features of the profiler are only available on selected platforms. Please refer to table [3](#featuretable) for details.
|
||||||
|
|
||||||
::: {#featuretable}
|
| **Feature** | **Windows** | **Linux** | **Android** | **OSX** | **iOS** | **BSD** | **QNX** |
|
||||||
**Feature** **Windows** **Linux** **Android** **OSX** **iOS** **BSD** **QNX**
|
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
|
||||||
-------------------------- ------------- ----------- ------------- --------- --------- --------- ---------
|
| Profiling program init | | | | | | | |
|
||||||
Profiling program init
|
| CPU zones | | | | | | | |
|
||||||
CPU zones
|
| Locks | | | | | | | |
|
||||||
Locks
|
| Plots | | | | | | | |
|
||||||
Plots
|
| Messages | | | | | | | |
|
||||||
Messages
|
| Memory | | | | | | | |
|
||||||
Memory
|
| GPU zones (OpenGL) | | | | | | | |
|
||||||
GPU zones (OpenGL)
|
| GPU zones (Vulkan) | | | | | | | |
|
||||||
GPU zones (Vulkan)
|
| GPU zones (Metal) | | | | ^*b*^ | ^*b*^ | | |
|
||||||
GPU zones (Metal) ^*b*^ ^*b*^
|
| Call stacks | | | | | | | |
|
||||||
Call stacks
|
| Symbol resolution | | | | | | | |
|
||||||
Symbol resolution
|
| Crash handling | | | | | | | |
|
||||||
Crash handling
|
| CPU usage probing | | | | | | | |
|
||||||
CPU usage probing
|
| Context switches | | | | | | | |
|
||||||
Context switches
|
| Wait stacks | | | | | | | |
|
||||||
Wait stacks
|
| CPU topology information | | | | | | | |
|
||||||
CPU topology information
|
| Call stack sampling | | | | | | | |
|
||||||
Call stack sampling
|
| Hardware sampling | ^*a*^ | | | | | | |
|
||||||
Hardware sampling ^*a*^
|
| VSync capture | | | | | | | |
|
||||||
VSync capture
|
|
||||||
|
|
||||||
: Feature support matrix
|
_Feature support matrix_
|
||||||
:::
|
|
||||||
|
|
||||||
-- Not possible to support due to platform limitations.\
|
-- Not possible to support due to platform limitations.\
|
||||||
^*a*^Possible through WSL2. ^*b*^Only tested on Apple Silicon M1 series
|
^*a*^Possible through WSL2. ^*b*^Only tested on Apple Silicon M1 series
|
||||||
@@ -1045,7 +1037,7 @@ Images are sent using the `FrameImage(image, width, height, offset, flip)` macro
|
|||||||
|
|
||||||
[^36]: For example, OpenGL flips images, but Vulkan does not.
|
[^36]: For example, OpenGL flips images, but Vulkan does not.
|
||||||
|
|
||||||
Handling image data requires a lot of memory and bandwidth[^37]. To achieve sane memory usage, you should scale down taken screenshots to a suitable size, e.g., $320\times180$.
|
Handling image data requires a lot of memory and bandwidth[^37]. To achieve sane memory usage, you should scale down taken screenshots to a suitable size, e.g., 320×180.
|
||||||
|
|
||||||
[^37]: One uncompressed 1080p image takes 8 MB.
|
[^37]: One uncompressed 1080p image takes 8 MB.
|
||||||
|
|
||||||
@@ -1055,18 +1047,16 @@ To further reduce image data size, frame images are internally compressed using
|
|||||||
|
|
||||||
[^39]: One pixel is stored in a nibble (4 bits) instead of 32 bits.
|
[^39]: One pixel is stored in a nibble (4 bits) instead of 32 bits.
|
||||||
|
|
||||||
::: {#EtcSimd}
|
| **Implementation** | **Required define** | **Time** |
|
||||||
**Implementation** **Required define** **Time**
|
|:------------------:|:-------------------:|:--------:|
|
||||||
-------------------- --------------------- ----------
|
| x86 Reference | --- | 198.2 μs |
|
||||||
x86 Reference --- 198.2 μs
|
| x86 SSE4.1^a^ | `__SSE4_1__` | 25.4 μs |
|
||||||
x86 SSE4.1^a^ `__SSE4_1__` 25.4 μs
|
| x86 AVX2 | `__AVX2__` | 17.4 μs |
|
||||||
x86 AVX2 `__AVX2__` 17.4 μs
|
| ARM Reference | --- | 1.04 ms |
|
||||||
ARM Reference --- 1.04 ms
|
| ARM32 NEON^b^ | `__ARM_NEON` | 529 μs |
|
||||||
ARM32 NEON^b^ `__ARM_NEON` 529 μs
|
| ARM64 NEON | `__ARM_NEON` | 438 μs |
|
||||||
ARM64 NEON `__ARM_NEON` 438 μs
|
|
||||||
|
|
||||||
: Client compression time of $320\times180$ image. x86: Ryzen 9 3900X (MSVC); ARM: ODROID-C2 (gcc).
|
_Client compression time of 320×180 image. x86: Ryzen 9 3900X (MSVC); ARM: ODROID-C2 (gcc)._
|
||||||
:::
|
|
||||||
|
|
||||||
^a)^ VEX encoding; ^b)^ ARM32 NEON code compiled for ARM64
|
^a)^ VEX encoding; ^b)^ ARM32 NEON code compiled for ARM64
|
||||||
|
|
||||||
@@ -1077,7 +1067,7 @@ To further reduce image data size, frame images are internally compressed using
|
|||||||
>
|
>
|
||||||
> - This second thread will be periodically woken up, even if there are no frame images to compress[^41]. If you are not using the frame image capture functionality and you don't wish this thread to be running, you can define the `TRACY_NO_FRAME_IMAGE` macro.
|
> - This second thread will be periodically woken up, even if there are no frame images to compress[^41]. If you are not using the frame image capture functionality and you don't wish this thread to be running, you can define the `TRACY_NO_FRAME_IMAGE` macro.
|
||||||
>
|
>
|
||||||
> - Due to implementation details of the network buffer, a single frame image cannot be greater than 256 KB after compression. Note that a $960\times540$ image fits in this limit.
|
> - Due to implementation details of the network buffer, a single frame image cannot be greater than 256 KB after compression. Note that a 960×540 image fits in this limit.
|
||||||
|
|
||||||
[^40]: Small part of compression task is offloaded to the server.
|
[^40]: Small part of compression task is offloaded to the server.
|
||||||
|
|
||||||
@@ -1118,7 +1108,7 @@ Everything needs to be correctly initialized (the cleanup is left for the reader
|
|||||||
glBufferData(GL_PIXEL_PACK_BUFFER, 320*180*4, nullptr, GL_STREAM_READ);
|
glBufferData(GL_PIXEL_PACK_BUFFER, 320*180*4, nullptr, GL_STREAM_READ);
|
||||||
}
|
}
|
||||||
|
|
||||||
We will now set up a screen capture, which will downscale the screen contents to $320\times180$ pixels and copy the resulting image to a buffer accessible by the CPU when the operation is done. This should be placed right before *swap buffers* or *present* call.
|
We will now set up a screen capture, which will downscale the screen contents to 320×180 pixels and copy the resulting image to a buffer accessible by the CPU when the operation is done. This should be placed right before *swap buffers* or *present* call.
|
||||||
|
|
||||||
assert(m_fiQueue.empty() || m_fiQueue.front() != m_fiIdx); // check for buffer overrun
|
assert(m_fiQueue.empty() || m_fiQueue.front() != m_fiIdx); // check for buffer overrun
|
||||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_fiFramebuffer[m_fiIdx]);
|
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_fiFramebuffer[m_fiIdx]);
|
||||||
@@ -1179,25 +1169,19 @@ With all this done, you can perform the screen capture as follows:
|
|||||||
While this approach is much more complex than the previously discussed one, the resulting image quality increase makes it worthwhile.
|
While this approach is much more complex than the previously discussed one, the resulting image quality increase makes it worthwhile.
|
||||||
|
|
||||||
<figure id="highqualityss" data-latex-placement="h">
|
<figure id="highqualityss" data-latex-placement="h">
|
||||||
<div class="minipage">
|
|
||||||
<img src="images/screenshot-lo.png" style="width:90.0%" />
|
<img src="images/screenshot-lo.png" style="width:90.0%" />
|
||||||
</div>
|
|
||||||
<div class="minipage">
|
|
||||||
<img src="images/screenshot-hi.png" style="width:90.0%" />
|
<img src="images/screenshot-hi.png" style="width:90.0%" />
|
||||||
</div>
|
|
||||||
<figcaption>High-quality screen shot</figcaption>
|
<figcaption>High-quality screen shot</figcaption>
|
||||||
</figure>
|
</figure>
|
||||||
|
|
||||||
You can see the performance results you may expect in a simple application in table [5](#asynccapture). The naïve capture performs synchronous retrieval of full-screen image and resizes it using *stb_image_resize*. The proper and high-quality captures do things as described in this chapter.
|
You can see the performance results you may expect in a simple application in table [5](#asynccapture). The naïve capture performs synchronous retrieval of full-screen image and resizes it using *stb_image_resize*. The proper and high-quality captures do things as described in this chapter.
|
||||||
|
|
||||||
::: {#asynccapture}
|
| **Resolution** | **Naïve capture** | **Proper capture** | **High quality** |
|
||||||
**Resolution** **Naïve capture** **Proper capture** **High quality**
|
|:--------------:|:-----------------:|:------------------:|:----------------:|
|
||||||
------------------ ------------------- -------------------- ------------------
|
| 1280×720 | 80 FPS | 4200 FPS | 2800 FPS |
|
||||||
$1280\times720$ 80 FPS 4200 FPS 2800 FPS
|
| 2560×1440 | 23 FPS | 3300 FPS | 1600 FPS |
|
||||||
$2560\times1440$ 23 FPS 3300 FPS 1600 FPS
|
|
||||||
|
|
||||||
: Frame capture efficiency
|
_Frame capture efficiency_
|
||||||
:::
|
|
||||||
|
|
||||||
## Marking zones {#markingzones}
|
## Marking zones {#markingzones}
|
||||||
|
|
||||||
@@ -1241,15 +1225,15 @@ Zone objects can't be moved or copied.
|
|||||||
>
|
>
|
||||||
> {
|
> {
|
||||||
> ZoneNamed(Zone1, true);
|
> ZoneNamed(Zone1, true);
|
||||||
> @\circled{a}@
|
> (a)
|
||||||
> {
|
> {
|
||||||
> ZoneNamed(Zone2, true);
|
> ZoneNamed(Zone2, true);
|
||||||
> @\circled{b}@
|
> (b)
|
||||||
> }
|
> }
|
||||||
> @\circled{c}@
|
> (c)
|
||||||
> }
|
> }
|
||||||
>
|
>
|
||||||
> It is valid to set the `Zone1` text or name *only* in places or . After `Zone2` is created at you can no longer perform operations on `Zone1`, until `Zone2` is destroyed.
|
> It is valid to set the `Zone1` text or name *only* in places (a) or (c). After `Zone2` is created at (b) you can no longer perform operations on `Zone1`, until `Zone2` is destroyed.
|
||||||
|
|
||||||
### Filtering zones {#filteringzones}
|
### Filtering zones {#filteringzones}
|
||||||
|
|
||||||
@@ -1366,7 +1350,7 @@ To configure how plot values are presented by the profiler, you may use the `Tra
|
|||||||
|
|
||||||
- `tracy::PlotFormatType::Memory` -- treats the values as memory sizes. Will display kilobytes, megabytes, etc.
|
- `tracy::PlotFormatType::Memory` -- treats the values as memory sizes. Will display kilobytes, megabytes, etc.
|
||||||
|
|
||||||
- `tracy::PlotFormatType::Percentage` -- values will be displayed as percentage (with value $100$ being equal to $100\%$).
|
- `tracy::PlotFormatType::Percentage` -- values will be displayed as percentage (with value 100 being equal to 100%).
|
||||||
|
|
||||||
The `step` parameter determines whether the plot will be displayed as a staircase or will smoothly change between plot points (see figure [5](#plotconfig)). The `fill` parameter can be used to disable filling the area below the plot with a solid color.
|
The `step` parameter determines whether the plot will be displayed as a staircase or will smoothly change between plot points (see figure [5](#plotconfig)). The `fill` parameter can be used to disable filling the area below the plot with a solid color.
|
||||||
|
|
||||||
@@ -1511,6 +1495,12 @@ You also need to periodically collect the GPU events using the `TracyGpuCollect`
|
|||||||
|
|
||||||
[^49]: Because Apple is unable to implement standards properly.
|
[^49]: Because Apple is unable to implement standards properly.
|
||||||
|
|
||||||
|
##### Calibrated context
|
||||||
|
|
||||||
|
By default, the OpenGL context is uncalibrated: the CPU and GPU clocks are aligned only once, when the context is created, so over long captures the two time domains may drift apart (section [5.4](#options) describes correcting this drift manually). Defining `TRACY_OPENGL_AUTO_CALIBRATION` before including `TracyOpenGL.hpp` enables periodic recalibration instead: roughly once per second Tracy samples the GPU and CPU clocks together and emits a calibration event, allowing the profiler to track and remove the drift automatically.
|
||||||
|
|
||||||
|
This is opt-in because OpenGL exposes no atomic CPU+GPU timestamp query (unlike Vulkan's `VK_EXT_calibrated_timestamps` or Direct3D 12, whose contexts are always calibrated). Recalibration therefore reads the GPU clock with `glGetInteger64v(GL_TIMESTAMP)`, which forces a CPU/GPU synchronization (a pipeline stall) each time it runs. Enable it only when the improved long-capture alignment is worth the periodic stall.
|
||||||
|
|
||||||
### Vulkan
|
### Vulkan
|
||||||
|
|
||||||
Similarly, for Vulkan support you should include the `public/tracy/TracyVulkan.hpp` header file. Tracing Vulkan devices and queues is a bit more involved, and the Vulkan initialization macro `TracyVkContext(physdev, device, queue, cmdbuf)` returns an instance of `TracyVkCtx` object, which tracks an associated Vulkan queue. Cleanup is performed using the `TracyVkDestroy(ctx)` macro. You may create multiple Vulkan contexts. To set a custom name for the context, use the `TracyVkContextName(ctx, name, size)` macro.
|
Similarly, for Vulkan support you should include the `public/tracy/TracyVulkan.hpp` header file. Tracing Vulkan devices and queues is a bit more involved, and the Vulkan initialization macro `TracyVkContext(physdev, device, queue, cmdbuf)` returns an instance of `TracyVkCtx` object, which tracks an associated Vulkan queue. Cleanup is performed using the `TracyVkDestroy(ctx)` macro. You may create multiple Vulkan contexts. To set a custom name for the context, use the `TracyVkContextName(ctx, name, size)` macro.
|
||||||
@@ -1678,28 +1668,26 @@ Capture of true calls stacks can be performed by using macros with the `S` postf
|
|||||||
|
|
||||||
Be aware that call stack collection is a relatively slow operation. Table [6](#CallstackTimes) and figure [6](#CallstackPlot) show how long it took to perform a single capture of varying depth on multiple CPU architectures.
|
Be aware that call stack collection is a relatively slow operation. Table [6](#CallstackTimes) and figure [6](#CallstackPlot) show how long it took to perform a single capture of varying depth on multiple CPU architectures.
|
||||||
|
|
||||||
::: {#CallstackTimes}
|
| **Depth** | **x86** | **x64** | **ARM** | **ARM64** |
|
||||||
**Depth** **x86** **x64** **ARM** **ARM64**
|
|:---------:|:-------:|:-------:|:--------:|:---------:|
|
||||||
----------- --------- --------- ---------- -----------
|
| 1 | 34 ns | 98 ns | 6.62 μs | 6.63 μs |
|
||||||
1 34 ns 98 ns 6.62 μs 6.63 μs
|
| 2 | 35 ns | 150 ns | 8.08 μs | 8.25 μs |
|
||||||
2 35 ns 150 ns 8.08 μs 8.25 μs
|
| 3 | 36 ns | 168 ns | 9.75 μs | 10 μs |
|
||||||
3 36 ns 168 ns 9.75 μs 10 μs
|
| 4 | 39 ns | 190 ns | 10.92 μs | 11.58 μs |
|
||||||
4 39 ns 190 ns 10.92 μs 11.58 μs
|
| 5 | 42 ns | 206 ns | 12.5 μs | 13.33 μs |
|
||||||
5 42 ns 206 ns 12.5 μs 13.33 μs
|
| 10 | 52 ns | 306 ns | 19.62 μs | 21.71 μs |
|
||||||
10 52 ns 306 ns 19.62 μs 21.71 μs
|
| 15 | 63 ns | 415 ns | 26.83 μs | 30.13 μs |
|
||||||
15 63 ns 415 ns 26.83 μs 30.13 μs
|
| 20 | 77 ns | 531 ns | 34.25 μs | 38.71 μs |
|
||||||
20 77 ns 531 ns 34.25 μs 38.71 μs
|
| 25 | 89 ns | 630 ns | 41.17 μs | 47.17 μs |
|
||||||
25 89 ns 630 ns 41.17 μs 47.17 μs
|
| 30 | 109 ns | 735 ns | 48.33 μs | 55.63 μs |
|
||||||
30 109 ns 735 ns 48.33 μs 55.63 μs
|
| 35 | 123 ns | 843 ns | 55.87 μs | 64.09 μs |
|
||||||
35 123 ns 843 ns 55.87 μs 64.09 μs
|
| 40 | 142 ns | 950 ns | 63.12 μs | 72.59 μs |
|
||||||
40 142 ns 950 ns 63.12 μs 72.59 μs
|
| 45 | 154 ns | 1.05 μs | 70.54 μs | 81 μs |
|
||||||
45 154 ns 1.05 μs 70.54 μs 81 μs
|
| 50 | 167 ns | 1.16 μs | 78 μs | 89.5 μs |
|
||||||
50 167 ns 1.16 μs 78 μs 89.5 μs
|
| 55 | 179 ns | 1.26 μs | 85.04 μs | 98 μs |
|
||||||
55 179 ns 1.26 μs 85.04 μs 98 μs
|
| 60 | 193 ns | 1.37 μs | 92.75 μs | 106.59 μs |
|
||||||
60 193 ns 1.37 μs 92.75 μs 106.59 μs
|
|
||||||
|
|
||||||
: Median times of zone capture with call stack. x86, x64: i7 8700K; ARM: Banana Pi; ARM64: ODROID-C2. Selected architectures are plotted on figure [6](#CallstackPlot)
|
_Median times of zone capture with call stack. x86, x64: i7 8700K; ARM: Banana Pi; ARM64: ODROID-C2. Selected architectures are plotted on figure [6](#CallstackPlot)_
|
||||||
:::
|
|
||||||
|
|
||||||
<figure id="CallstackPlot" data-latex-placement="h">
|
<figure id="CallstackPlot" data-latex-placement="h">
|
||||||
|
|
||||||
@@ -1812,6 +1800,10 @@ By default, tracy client resolves callstack symbols in a background thread at ru
|
|||||||
|
|
||||||
The generated tracy capture will have callstack frames symbols showing `[unresolved]`. The `update` tool can be used to load that capture, perform symbol resolution offline (by passing `-r`) and writing out a new capture with symbols resolved. By default `update` will use the original shared libraries paths that were recorded in the capture (which assumes running in the same machine or a machine with identical filesystem setup as the one used to run the tracy instrumented application). You can do path substitution with the `-p` option to perform any number of path substitions in order to use symbols located elsewhere.
|
The generated tracy capture will have callstack frames symbols showing `[unresolved]`. The `update` tool can be used to load that capture, perform symbol resolution offline (by passing `-r`) and writing out a new capture with symbols resolved. By default `update` will use the original shared libraries paths that were recorded in the capture (which assumes running in the same machine or a machine with identical filesystem setup as the one used to run the tracy instrumented application). You can do path substitution with the `-p` option to perform any number of path substitions in order to use symbols located elsewhere.
|
||||||
|
|
||||||
|
By default symbol resolution is performed with the platform's native facility: the DbgHelp library on Windows, and the `addr2line` tool found in `PATH` elsewhere. You can override this with the `-a` option, passing the path to a custom `addr2line`-compatible tool (for instance an `addr2line` from a cross-compilation toolchain, or `llvm-addr2line`). The `-a` option works on all platforms, including Windows, and takes precedence over the platform default.
|
||||||
|
|
||||||
|
Extra arguments can be passed verbatim to the resolution tool with the `-A` option. Tracy records callstack frame offsets relative to the image base, but `addr2line`-compatible tools expect a full virtual address for images that have a non-zero preferred image base (such as PE on Windows or Mach-O on Apple). For these, pass `-A "--relative-address"` so that `llvm-addr2line` or `llvm-symbolizer` adds the image base back. ELF images need no such adjustment.
|
||||||
|
|
||||||
> [!IMPORTANT]
|
> [!IMPORTANT]
|
||||||
> **Important**
|
> **Important**
|
||||||
>
|
>
|
||||||
@@ -1845,34 +1837,30 @@ Be aware that for Lua call stack retrieval to work, you need to be on a platform
|
|||||||
|
|
||||||
Cost of performing Lua call stack capture is presented in table [7](#CallstackTimesLua) and figure [7](#CallstackPlotLua). Lua call stacks include native call stacks, which have a capture cost of their own (table [6](#CallstackTimes)), and the `depth` parameter is applied for both captures. The presented data were captured with full Lua stack depth, but only 13 frames were available on the native call stack. Hence, to explain the non-linearity of the graph, you need to consider what was truly measured:
|
Cost of performing Lua call stack capture is presented in table [7](#CallstackTimesLua) and figure [7](#CallstackPlotLua). Lua call stacks include native call stacks, which have a capture cost of their own (table [6](#CallstackTimes)), and the `depth` parameter is applied for both captures. The presented data were captured with full Lua stack depth, but only 13 frames were available on the native call stack. Hence, to explain the non-linearity of the graph, you need to consider what was truly measured:
|
||||||
|
|
||||||
$$\text{Cost}_{\text{total}}(\text{depth}) =
|
Cost_total(depth) =
|
||||||
\begin{cases}
|
Cost_Lua(depth) + Cost_native(depth) when depth ≤ 13
|
||||||
\text{Cost}_{\text{Lua}}(\text{depth}) + \text{Cost}_{\text{native}}(\text{depth}) & \text{when depth} \leq 13 \\
|
Cost_Lua(depth) + Cost_native(13) when depth > 13
|
||||||
\text{Cost}_{\text{Lua}}(\text{depth}) + \text{Cost}_{\text{native}}(13) & \text{when depth} > 13
|
|
||||||
\end{cases}$$
|
|
||||||
|
|
||||||
::: {#CallstackTimesLua}
|
| **Depth** | **Time** |
|
||||||
**Depth** **Time**
|
|:---------:|:--------:|
|
||||||
----------- ----------
|
| 1 | 707 ns |
|
||||||
1 707 ns
|
| 2 | 699 ns |
|
||||||
2 699 ns
|
| 3 | 624 ns |
|
||||||
3 624 ns
|
| 4 | 727 ns |
|
||||||
4 727 ns
|
| 5 | 836 ns |
|
||||||
5 836 ns
|
| 10 | 1.77 μs |
|
||||||
10 1.77 μs
|
| 15 | 2.44 μs |
|
||||||
15 2.44 μs
|
| 20 | 2.51 μs |
|
||||||
20 2.51 μs
|
| 25 | 2.98 μs |
|
||||||
25 2.98 μs
|
| 30 | 3.6 μs |
|
||||||
30 3.6 μs
|
| 35 | 4.33 μs |
|
||||||
35 4.33 μs
|
| 40 | 5.17 μs |
|
||||||
40 5.17 μs
|
| 45 | 6.01 μs |
|
||||||
45 6.01 μs
|
| 50 | 6.99 μs |
|
||||||
50 6.99 μs
|
| 55 | 8.11 μs |
|
||||||
55 8.11 μs
|
| 60 | 9.17 μs |
|
||||||
60 9.17 μs
|
|
||||||
|
|
||||||
: Median times of Lua zone capture with call stack (x64, 13 native frames)
|
_Median times of Lua zone capture with call stack (x64, 13 native frames)_
|
||||||
:::
|
|
||||||
|
|
||||||
<figure id="CallstackPlotLua" data-latex-placement="h">
|
<figure id="CallstackPlotLua" data-latex-placement="h">
|
||||||
|
|
||||||
@@ -2010,6 +1998,39 @@ After you release the lock use the `TracyCLockAfterUnlock` macro:
|
|||||||
|
|
||||||
You can optionally mark the location of where the lock is held by using the `TracyCLockMark` macro, this should be done after acquiring the lock.
|
You can optionally mark the location of where the lock is held by using the `TracyCLockMark` macro, this should be done after acquiring the lock.
|
||||||
|
|
||||||
|
Similarly, you can use the following macros to mark a shared lock using the C API:
|
||||||
|
|
||||||
|
- `TracyCSharedLockAnnounce(lock_ctx)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockTerminate(lock_ctx)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockBeforeLock(lock_ctx)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockAfterLock(lock_ctx)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockAfterUnlock(lock_ctx)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockAfterTryLock(lock_ctx, acquired)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockBeforeSharedLock(lock_ctx)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockAfterSharedLock(lock_ctx)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockAfterSharedUnlock(lock_ctx)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockAfterTrySharedLock(lock_ctx, acquired)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockMark(lock_ctx)`
|
||||||
|
|
||||||
|
- `TracyCSharedLockCustomName(lock_ctx, name, size)`
|
||||||
|
|
||||||
|
A shared lock context has to be defined next to the shared lock that it will be marking:
|
||||||
|
|
||||||
|
TracyCSharedLockCtx tracy_shared_lock_ctx;
|
||||||
|
HANDLE shared_lock;
|
||||||
|
|
||||||
|
The same rules apply to shared locks as to regular locks, but you need to use the shared lock macros instead. Lock implementations in classes `Lockable` and `SharedLockable` show how to properly perform context handling.
|
||||||
|
|
||||||
### Memory profiling {#cmemoryprofiling}
|
### Memory profiling {#cmemoryprofiling}
|
||||||
|
|
||||||
Use the following macros in your implementations of `malloc` and `free`:
|
Use the following macros in your implementations of `malloc` and `free`:
|
||||||
@@ -2677,11 +2698,11 @@ While the call stack sampling is a generic software-implemented functionality of
|
|||||||
|
|
||||||
Tracy can use these counters to present you the following three statistics, which may help guide you in discovering why your code is not as fast as possible:
|
Tracy can use these counters to present you the following three statistics, which may help guide you in discovering why your code is not as fast as possible:
|
||||||
|
|
||||||
1. *Instructions Per Cycle (IPC)* -- shows how many instructions were executing concurrently within a single core cycle. Higher values are better. The maximum achievable value depends on the design of the CPU, including things such as the number of execution units and their individual capabilities. Calculated as $\frac{\text{\#instructions retired}}{\text{\#cycles}}$. You can disable it with the `TRACY_NO_SAMPLE_RETIREMENT` macro.
|
1. *Instructions Per Cycle (IPC)* -- shows how many instructions were executing concurrently within a single core cycle. Higher values are better. The maximum achievable value depends on the design of the CPU, including things such as the number of execution units and their individual capabilities. Calculated as #instructions retired / #cycles. You can disable it with the `TRACY_NO_SAMPLE_RETIREMENT` macro.
|
||||||
|
|
||||||
2. *Branch miss rate* -- shows how frequently the CPU branch predictor makes a wrong choice. Lower values are better. Calculated as $\frac{\text{\#branch misses}}{\text{\#branch instructions}}$. You can disable it with the `TRACY_NO_SAMPLE_BRANCH` macro.
|
2. *Branch miss rate* -- shows how frequently the CPU branch predictor makes a wrong choice. Lower values are better. Calculated as #branch misses / #branch instructions. You can disable it with the `TRACY_NO_SAMPLE_BRANCH` macro.
|
||||||
|
|
||||||
3. *Cache miss rate* -- shows how frequently the CPU has to retrieve data from memory. Lower values are better. The specifics of which cache level is taken into account here vary from one implementation to another. Calculated as $\frac{\text{\#cache misses}}{\text{\#cache references}}$. You can disable it with the `TRACY_NO_SAMPLE_CACHE` macro.
|
3. *Cache miss rate* -- shows how frequently the CPU has to retrieve data from memory. Lower values are better. The specifics of which cache level is taken into account here vary from one implementation to another. Calculated as #cache misses / #cache references. You can disable it with the `TRACY_NO_SAMPLE_CACHE` macro.
|
||||||
|
|
||||||
Each performance counter has to be collected by a dedicated Performance Monitoring Unit (PMU). However, the availability of PMUs is very limited, so you may not be able to capture all the statistics mentioned above at the same time (as each requires capture of two different counters). In such a case, you will need to manually select what needs to be sampled with the macros specified above.
|
Each performance counter has to be collected by a dedicated Performance Monitoring Unit (PMU). However, the availability of PMUs is very limited, so you may not be able to capture all the statistics mentioned above at the same time (as each requires capture of two different counters). In such a case, you will need to manually select what needs to be sampled with the macros specified above.
|
||||||
|
|
||||||
@@ -2918,7 +2939,7 @@ You can also adjust some settings that affect global profiler behavior in this w
|
|||||||
|
|
||||||
- *Zone name shortening* -- Sets the default zone name shortening behavior used in new traces. See section [5.4](#options) for more information.
|
- *Zone name shortening* -- Sets the default zone name shortening behavior used in new traces. See section [5.4](#options) for more information.
|
||||||
|
|
||||||
- *Scroll multipliers* -- Allows you to fine-tune the sensitivity of the horizontal and vertical scroll in the timeline. The default values ($1.0$) are an attempt at the best possible settings, but differences in hardware manufacturers, platform implementations, and user expectations may require adjustments.
|
- *Scroll multipliers* -- Allows you to fine-tune the sensitivity of the horizontal and vertical scroll in the timeline. The default values (1.0) are an attempt at the best possible settings, but differences in hardware manufacturers, platform implementations, and user expectations may require adjustments.
|
||||||
|
|
||||||
- *Memory limit* -- When enabled, profiler will stop recording data when memory usage exceeds the specified percentage of the total system memory. This mechanism does not measure the current system memory usage or limits. The upper value is not capped, as you may use swap. See section [4.6](#memoryusage) for more information.
|
- *Memory limit* -- When enabled, profiler will stop recording data when memory usage exceeds the specified percentage of the total system memory. This mechanism does not measure the current system memory usage or limits. The upper value is not capped, as you may use swap. See section [4.6](#memoryusage) for more information.
|
||||||
|
|
||||||
@@ -3004,52 +3025,46 @@ The `update` utility supports optional higher levels of data compression, which
|
|||||||
|
|
||||||
- `-z level` -- selects Zstandard algorithm, with a specified compression level.
|
- `-z level` -- selects Zstandard algorithm, with a specified compression level.
|
||||||
|
|
||||||
::: {#compressiontimes}
|
| **Mode** | **Size** | **Ratio** | **Save time** | **Load time** |
|
||||||
**Mode** **Size** **Ratio** **Save time** **Load time**
|
|:-----------:|:---------:|:---------:|:-------------:|:-------------:|
|
||||||
------------- ----------- ----------- --------------- ---------------
|
| lz4 | 162.48 MB | 17.19% | 1.91 s | 470 ms |
|
||||||
lz4 162.48 MB 17.19% 1.91 s 470 ms
|
| lz4 hc | 77.33 MB | 8.18% | 39.24 s | 401 ms |
|
||||||
lz4 hc 77.33 MB 8.18% 39.24 s 401 ms
|
| lz4 extreme | 72.67 MB | 7.68% | 4:30 | 406 ms |
|
||||||
lz4 extreme 72.67 MB 7.68% 4:30 406 ms
|
| zstd 1 | 63.17 MB | 6.68% | 2.27 s | 868 ms |
|
||||||
zstd 1 63.17 MB 6.68% 2.27 s 868 ms
|
| zstd 2 | 63.29 MB | 6.69% | 2.31 s | 884 ms |
|
||||||
zstd 2 63.29 MB 6.69% 2.31 s 884 ms
|
| zstd 3 | 62.94 MB | 6.65% | 2.43 s | 867 ms |
|
||||||
zstd 3 62.94 MB 6.65% 2.43 s 867 ms
|
| zstd 4 | 62.81 MB | 6.64% | 2.44 s | 855 ms |
|
||||||
zstd 4 62.81 MB 6.64% 2.44 s 855 ms
|
| zstd 5 | 61.04 MB | 6.45% | 3.98 s | 855 ms |
|
||||||
zstd 5 61.04 MB 6.45% 3.98 s 855 ms
|
| zstd 6 | 60.27 MB | 6.37% | 4.19 s | 827 ms |
|
||||||
zstd 6 60.27 MB 6.37% 4.19 s 827 ms
|
| zstd 7 | 61.53 MB | 6.5% | 6.6 s | 761 ms |
|
||||||
zstd 7 61.53 MB 6.5% 6.6 s 761 ms
|
| zstd 8 | 60.44 MB | 6.39% | 7.84 s | 746 ms |
|
||||||
zstd 8 60.44 MB 6.39% 7.84 s 746 ms
|
| zstd 9 | 59.58 MB | 6.3% | 9.6 s | 724 ms |
|
||||||
zstd 9 59.58 MB 6.3% 9.6 s 724 ms
|
| zstd 10 | 59.36 MB | 6.28% | 10.29 s | 706 ms |
|
||||||
zstd 10 59.36 MB 6.28% 10.29 s 706 ms
|
| zstd 11 | 59.2 MB | 6.26% | 11.23 s | 717 ms |
|
||||||
zstd 11 59.2 MB 6.26% 11.23 s 717 ms
|
| zstd 12 | 58.51 MB | 6.19% | 15.43 s | 695 ms |
|
||||||
zstd 12 58.51 MB 6.19% 15.43 s 695 ms
|
| zstd 13 | 56.16 MB | 5.94% | 35.55 s | 642 ms |
|
||||||
zstd 13 56.16 MB 5.94% 35.55 s 642 ms
|
| zstd 14 | 55.76 MB | 5.89% | 37.74 s | 627 ms |
|
||||||
zstd 14 55.76 MB 5.89% 37.74 s 627 ms
|
| zstd 15 | 54.65 MB | 5.78% | 1:01 | 600 ms |
|
||||||
zstd 15 54.65 MB 5.78% 1:01 600 ms
|
| zstd 16 | 50.94 MB | 5.38% | 1:34 | 537 ms |
|
||||||
zstd 16 50.94 MB 5.38% 1:34 537 ms
|
| zstd 17 | 50.18 MB | 5.30% | 1:44 | 542 ms |
|
||||||
zstd 17 50.18 MB 5.30% 1:44 542 ms
|
| zstd 18 | 49.91 MB | 5.28% | 2:17 | 554 ms |
|
||||||
zstd 18 49.91 MB 5.28% 2:17 554 ms
|
| zstd 19 | 46.99 MB | 4.97% | 7:09 | 605 ms |
|
||||||
zstd 19 46.99 MB 4.97% 7:09 605 ms
|
| zstd 20 | 46.81 MB | 4.95% | 7:08 | 608 ms |
|
||||||
zstd 20 46.81 MB 4.95% 7:08 608 ms
|
| zstd 21 | 45.77 MB | 4.84% | 13:01 | 614 ms |
|
||||||
zstd 21 45.77 MB 4.84% 13:01 614 ms
|
| zstd 22 | 45.52 MB | 4.81% | 15:11 | 621 ms |
|
||||||
zstd 22 45.52 MB 4.81% 15:11 621 ms
|
|
||||||
|
|
||||||
: Compression results for an example trace.\
|
_Compression results for an example trace.\
|
||||||
Tests performed on Ryzen 9 3900X.
|
Tests performed on Ryzen 9 3900X._
|
||||||
:::
|
|
||||||
|
|
||||||
<figure id="savetime">
|
<figure id="savetime">
|
||||||
<div class="minipage">
|
|
||||||
<figure id="savesize" data-latex-placement="H">
|
<figure id="savesize" data-latex-placement="H">
|
||||||
|
|
||||||
<figcaption>Plot of trace sizes for different compression modes (see table <a href="#compressiontimes">8</a>).</figcaption>
|
<figcaption>Plot of trace sizes for different compression modes (see table <a href="#compressiontimes">8</a>).</figcaption>
|
||||||
</figure>
|
</figure>
|
||||||
</div>
|
|
||||||
<div class="minipage">
|
|
||||||
<figure id="savetime" data-latex-placement="H">
|
<figure id="savetime" data-latex-placement="H">
|
||||||
|
|
||||||
<figcaption>Logarithmic plot of trace compression times for different compression modes (see table <a href="#compressiontimes">8</a>).</figcaption>
|
<figcaption>Logarithmic plot of trace compression times for different compression modes (see table <a href="#compressiontimes">8</a>).</figcaption>
|
||||||
</figure>
|
</figure>
|
||||||
</div>
|
|
||||||
<figcaption>Logarithmic plot of trace compression times for different compression modes (see table <a href="#compressiontimes">8</a>).</figcaption>
|
<figcaption>Logarithmic plot of trace compression times for different compression modes (see table <a href="#compressiontimes">8</a>).</figcaption>
|
||||||
</figure>
|
</figure>
|
||||||
|
|
||||||
@@ -3068,37 +3083,33 @@ Saving and loading trace data can be parallelized using the `-j streams` paramet
|
|||||||
|
|
||||||
Going overboard with the number of streams is not recommended, especially with the fast compression modes where it will be difficult to keep each stream busy. Also, complex compression codecs (e.g. zstd at level 22) have significantly worse compression rates when the work is divided. This is a fairly nuanced topic, and you are encouraged to do your own measurements, but for a rough guideline on the behavior, you can refer to tables [9](#streamsize) and [10](#streamspeedup).
|
Going overboard with the number of streams is not recommended, especially with the fast compression modes where it will be difficult to keep each stream busy. Also, complex compression codecs (e.g. zstd at level 22) have significantly worse compression rates when the work is divided. This is a fairly nuanced topic, and you are encouraged to do your own measurements, but for a rough guideline on the behavior, you can refer to tables [9](#streamsize) and [10](#streamspeedup).
|
||||||
|
|
||||||
::: {#streamsize}
|
| | **4** | **8** | **16** | **32** |
|
||||||
**4** **8** **16** **32**
|
|:-------:|:-------:|:-------:|:-------:|:-------:|
|
||||||
--------- --------- --------- --------- ---------
|
| lz4 | 100.30% | 100.30% | 100.61% | 102.73% |
|
||||||
lz4 100.30% 100.30% 100.61% 102.73%
|
| lz4 hc | 100.80% | 101.20% | 101.61% | 102.41% |
|
||||||
lz4 hc 100.80% 101.20% 101.61% 102.41%
|
| lz4 ext | 100.40% | 101.21% | 101.62% | 102.02% |
|
||||||
lz4 ext 100.40% 101.21% 101.62% 102.02%
|
| zstd 1 | 100.90% | 101.36% | 101.81% | 102.26% |
|
||||||
zstd 1 100.90% 101.36% 101.81% 102.26%
|
| zstd 3 | 100.51% | 101.02% | 101.53% | 102.04% |
|
||||||
zstd 3 100.51% 101.02% 101.53% 102.04%
|
| zstd 6 | 100.55% | 101.10% | 101.65% | 102.75% |
|
||||||
zstd 6 100.55% 101.10% 101.65% 102.75%
|
| zstd 9 | 101.27% | 103.16% | 105.06% | 108.23% |
|
||||||
zstd 9 101.27% 103.16% 105.06% 108.23%
|
| zstd 18 | 103.08% | 106.15% | 109.23% | 115.38% |
|
||||||
zstd 18 103.08% 106.15% 109.23% 115.38%
|
| zstd 22 | 107.08% | 113.27% | 122.12% | 130.97% |
|
||||||
zstd 22 107.08% 113.27% 122.12% 130.97%
|
|
||||||
|
|
||||||
: The increase in file size for different compression modes, as compared to a single stream.
|
_The increase in file size for different compression modes, as compared to a single stream._
|
||||||
:::
|
|
||||||
|
|
||||||
::: {#streamspeedup}
|
| | **4** | **8** | **16** | **32** |
|
||||||
**4** **8** **16** **32**
|
|:-------:|:-----:|:-----:|:------:|:------:|
|
||||||
--------- ------- ------- -------- --------
|
| lz4 | 2.04 | 2.52 | 2.11 | 3.24 |
|
||||||
lz4 2.04 2.52 2.11 3.24
|
| lz4 hc | 3.56 | 6.73 | 9.49 | 15.26 |
|
||||||
lz4 hc 3.56 6.73 9.49 15.26
|
| lz4 ext | 3.38 | 6.53 | 9.57 | 17.03 |
|
||||||
lz4 ext 3.38 6.53 9.57 17.03
|
| zstd 1 | 2.24 | 3.68 | 3.40 | 3.37 |
|
||||||
zstd 1 2.24 3.68 3.40 3.37
|
| zstd 3 | 3.23 | 4.13 | 4.07 | 4.50 |
|
||||||
zstd 3 3.23 4.13 4.07 4.50
|
| zstd 6 | 3.52 | 6.00 | 6.53 | 6.95 |
|
||||||
zstd 6 3.52 6.00 6.53 6.95
|
| zstd 9 | 3.10 | 4.26 | 5.12 | 5.40 |
|
||||||
zstd 9 3.10 4.26 5.12 5.40
|
| zstd 18 | 3.22 | 5.41 | 8.49 | 14.51 |
|
||||||
zstd 18 3.22 5.41 8.49 14.51
|
| zstd 22 | 3.99 | 7.47 | 11.10 | 18.20 |
|
||||||
zstd 22 3.99 7.47 11.10 18.20
|
|
||||||
|
|
||||||
: The speedup (*x* times faster) in saving time for different modes of compression, as compared to a single stream.
|
_The speedup (*x* times faster) in saving time for different modes of compression, as compared to a single stream._
|
||||||
:::
|
|
||||||
|
|
||||||
### Frame images dictionary {#fidict}
|
### Frame images dictionary {#fidict}
|
||||||
|
|
||||||
@@ -3152,7 +3163,7 @@ The workflow is identical, whether you are viewing a previously saved trace or i
|
|||||||
|
|
||||||
In most cases Tracy will display an approximation of time value, depending on how big it is. For example, a short time range will be displayed as 123 ns, and some longer ones will be shortened to 123.45 μs, 123.45 ms, 12.34 s, 1:23.4, 12:34:56, or even 1d12:34:56 to indicate more than a day has passed.
|
In most cases Tracy will display an approximation of time value, depending on how big it is. For example, a short time range will be displayed as 123 ns, and some longer ones will be shortened to 123.45 μs, 123.45 ms, 12.34 s, 1:23.4, 12:34:56, or even 1d12:34:56 to indicate more than a day has passed.
|
||||||
|
|
||||||
While such a presentation makes time values easy to read, it is not always appropriate. For example, you may have multiple events happen at a time approximated to 1:23.4, giving you the precision of only $\sfrac{1}{10}$ of a second. And there's certainly a lot that can happen in 100 ms.
|
While such a presentation makes time values easy to read, it is not always appropriate. For example, you may have multiple events happen at a time approximated to 1:23.4, giving you the precision of only 1/10 of a second. And there's certainly a lot that can happen in 100 ms.
|
||||||
|
|
||||||
An alternative time display is used in appropriate places to solve this problem. It combines a day--hour--minute--second value with full nanosecond resolution, resulting in values such as 1:23 456,789,012 ns.
|
An alternative time display is used in appropriate places to solve this problem. It combines a day--hour--minute--second value with full nanosecond resolution, resulting in values such as 1:23 456,789,012 ns.
|
||||||
|
|
||||||
@@ -3614,7 +3625,7 @@ You can freely adjust each time range on the timeline by clicking the left mouse
|
|||||||
|
|
||||||
Tracy allows adding custom notes to the trace. For example, you may want to mark a region to ignore because the application was out-of-focus or a region where a new user was connecting to the game, which resulted in a frame drop that needs to be investigated.
|
Tracy allows adding custom notes to the trace. For example, you may want to mark a region to ignore because the application was out-of-focus or a region where a new user was connecting to the game, which resulted in a frame drop that needs to be investigated.
|
||||||
|
|
||||||
Methods of specifying the annotation region are described in section [5.3](#timeranges). When a new annotation is added, a settings window is displayed (section [5.21](#annotationsettings)), allowing you to enter a description.
|
Methods of specifying the annotation region are described in section [5.3](#timeranges). When a new annotation is added, it is assigned a semi-unique random name to make it distinguishable. The settings window is also opened (section [5.21](#annotationsettings)), allowing you to enter your own description of the annotation.
|
||||||
|
|
||||||
Annotations are displayed on the timeline, as presented in figure [21](#annotation). Clicking on the circle next to the text description will open the annotation settings window, in which you can modify or remove the region. List of all annotations in the trace is available in the annotations list window described in section [5.22](#annotationlist), which is accessible through the * Tools* button on the control menu.
|
Annotations are displayed on the timeline, as presented in figure [21](#annotation). Clicking on the circle next to the text description will open the annotation settings window, in which you can modify or remove the region. List of all annotations in the trace is available in the annotations list window described in section [5.22](#annotationlist), which is accessible through the * Tools* button on the control menu.
|
||||||
|
|
||||||
@@ -4157,7 +4168,9 @@ The information about the selected memory allocation is displayed in this window
|
|||||||
|
|
||||||
## Trace information window {#traceinfo}
|
## Trace information window {#traceinfo}
|
||||||
|
|
||||||
This window contains information about the current trace: captured program name, time of the capture, profiler version which performed the capture, and a custom trace description, which you can fill in.
|
This window contains information about the current trace: captured program name, time of the capture, profiler version which performed the capture.
|
||||||
|
|
||||||
|
There's an text entry field for an optional custom description of the trace for you to fill in. This description will appear on the profiler window title bar, or when comparing two traces (section [5.8](#compare)), enabling you to quickly recognize what the trace contains. For some people it's fine to just have *any* semi-unique description to be able to identify a specific trace. For such purposes there's an * Generate name* button, which will set the trace description to an abstract meaningless identifier.
|
||||||
|
|
||||||
If the * Public sidecar* option is selected, the file containing trace-specific user settings (see section [9.2](#tracespecific)) will be saved on disk next to the trace file.
|
If the * Public sidecar* option is selected, the file containing trace-specific user settings (see section [9.2](#tracespecific)) will be saved on disk next to the trace file.
|
||||||
|
|
||||||
@@ -4191,6 +4204,7 @@ If an application should crash during profiling (section [2.5](#crashhandling))
|
|||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
- Dice icon
|
||||||
- User Gear icon
|
- User Gear icon
|
||||||
|
|
||||||
## Zone information window {#zoneinfo}
|
## Zone information window {#zoneinfo}
|
||||||
@@ -4314,8 +4328,8 @@ You need to take special care when reading call stacks. Contrary to their name,
|
|||||||
|
|
||||||
Let's say you are looking at the call stack of some function called within `Application::Run`. This is the result you might get:
|
Let's say you are looking at the call stack of some function called within `Application::Run`. This is the result you might get:
|
||||||
|
|
||||||
0. @\ldots@
|
0. …
|
||||||
1. @\ldots@
|
1. …
|
||||||
2. Application::Run
|
2. Application::Run
|
||||||
3. std::unique_ptr<Application>::reset
|
3. std::unique_ptr<Application>::reset
|
||||||
4. main
|
4. main
|
||||||
@@ -4513,9 +4527,9 @@ As described in chapter [3.17.6](#hardwaresampling), on some platforms, Tracy c
|
|||||||
|
|
||||||
- *Cycles* -- an option very similar to the *sample count*, but the data is collected directly by the CPU hardware counters. This may make the results more reliable.
|
- *Cycles* -- an option very similar to the *sample count*, but the data is collected directly by the CPU hardware counters. This may make the results more reliable.
|
||||||
|
|
||||||
- *Branch impact* -- indicates places where many branch instructions are issued, and at the same time, incorrectly predicted. Calculated as $\sqrt{\text{\#branch instructions}*\text{\#branch misses}}$. This is more useful than the raw branch miss rate, as it considers the number of events taking place.
|
- *Branch impact* -- indicates places where many branch instructions are issued, and at the same time, incorrectly predicted. Calculated as √(#branch instructions\*#branch misses). This is more useful than the raw branch miss rate, as it considers the number of events taking place.
|
||||||
|
|
||||||
- *Cache impact* -- similar to *branch impact*, but it shows cache miss data instead. These values are calculated as $\sqrt{\text{\#cache references}*\text{\#cache misses}}$ and will highlight places with lots of cache accesses that also miss.
|
- *Cache impact* -- similar to *branch impact*, but it shows cache miss data instead. These values are calculated as √(#cache references\*#cache misses) and will highlight places with lots of cache accesses that also miss.
|
||||||
|
|
||||||
- The rest of the available selections just show raw values gathered from the hardware counters. These are: *Retirements*, *Branches taken*, *Branch miss*, *Cache access* and *Cache miss*.
|
- The rest of the available selections just show raw values gathered from the hardware counters. These are: *Retirements*, *Branches taken*, *Branch miss*, *Cache access* and *Cache miss*.
|
||||||
|
|
||||||
@@ -4570,7 +4584,7 @@ This window presents information and statistics about a lock. The lock events co
|
|||||||
|
|
||||||
You may view a live replay of the profiled application screen captures (see section [3.3.3](#frameimages)) using this window. Playback is controlled by the * Play* and * Pause* buttons and the *Frame image* slider can be used to scrub to the desired timestamp. Alternatively you may use the ** and ** buttons to change single frame back or forward.
|
You may view a live replay of the profiled application screen captures (see section [3.3.3](#frameimages)) using this window. Playback is controlled by the * Play* and * Pause* buttons and the *Frame image* slider can be used to scrub to the desired timestamp. Alternatively you may use the ** and ** buttons to change single frame back or forward.
|
||||||
|
|
||||||
If the *Sync timeline* option is selected, the profiler will focus the timeline view on the frame corresponding to the currently displayed screenshot. The *Zoom 2$\times$* option enlarges the image for easier viewing.
|
If the *Sync timeline* option is selected, the profiler will focus the timeline view on the frame corresponding to the currently displayed screenshot. The *Zoom 2×* option enlarges the image for easier viewing.
|
||||||
|
|
||||||
The following parameters also accompany each displayed frame image: *timestamp*, showing at which time the image was captured, *frame*, displaying the numerical value of the corresponding frame, and *ratio*, telling how well the in-memory loss-less compression was able to reduce the image data size.
|
The following parameters also accompany each displayed frame image: *timestamp*, showing at which time the image was captured, *frame*, displaying the numerical value of the corresponding frame, and *ratio*, telling how well the in-memory loss-less compression was able to reduce the image data size.
|
||||||
|
|
||||||
@@ -4594,7 +4608,12 @@ The profiled program is highlighted using green color. Furthermore, the yellow h
|
|||||||
|
|
||||||
## Annotation settings window {#annotationsettings}
|
## Annotation settings window {#annotationsettings}
|
||||||
|
|
||||||
In this window, you may modify how a timeline annotation (section [5.3.1](#annotatingtrace)) is presented by setting its text description or selecting region highlight color. If the note is no longer needed, you may also remove it here.
|
In this window, you may modify how a timeline annotation (section [5.3.1](#annotatingtrace)) is presented by setting its text description or selecting region highlight color. A random annotation description can be set with the * Generate name* button. If the note is no longer needed, you may also remove it here.
|
||||||
|
|
||||||
|
|
||||||
|
-----
|
||||||
|
|
||||||
|
- Dice icon
|
||||||
|
|
||||||
## Annotation list window {#annotationlist}
|
## Annotation list window {#annotationlist}
|
||||||
|
|
||||||
@@ -4748,7 +4767,7 @@ So, which model should you run and what hardware you need to be able to do so? L
|
|||||||
|
|
||||||
As a rule of thumb, the specified number of parameters is how much total memory is needed to run the model with 8-bit quantization. Another way to get a rough estimate is to look at the model file size. Strive to fit the active parameters completely into VRAM, leaving space for computation scratch space and the context.
|
As a rule of thumb, the specified number of parameters is how much total memory is needed to run the model with 8-bit quantization. Another way to get a rough estimate is to look at the model file size. Strive to fit the active parameters completely into VRAM, leaving space for computation scratch space and the context.
|
||||||
|
|
||||||
To make this practical, the 35B-A3B model at 2 bit quantization requires $35 * 2 / 8 = 8.75$ GB, which fits into the 4 + 16 GB budget in the example above. The 3B active parameters similarly calculate to 0.75 GB, with additional 1 GB or so needed for computation buffer and another 1 GB for the 50K context, which is less than the 4 GB of VRAM available, making everything fit.
|
To make this practical, the 35B-A3B model at 2 bit quantization requires 35 \* 2 / 8 = 8.75 GB, which fits into the 4 + 16 GB budget in the example above. The 3B active parameters similarly calculate to 0.75 GB, with additional 1 GB or so needed for computation buffer and another 1 GB for the 50K context, which is less than the 4 GB of VRAM available, making everything fit.
|
||||||
|
|
||||||
## Usage {#llmusage}
|
## Usage {#llmusage}
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
\usepackage{verbatim}
|
\usepackage{verbatim}
|
||||||
\usepackage[hyphens]{url}
|
\usepackage[hyphens]{url}
|
||||||
\usepackage{hyperref} % For hyperlinks in the PDF
|
\usepackage{hyperref} % For hyperlinks in the PDF
|
||||||
\usepackage{fontawesome6}
|
\usepackage{fontawesome7}
|
||||||
\usepackage[os=win]{menukeys}
|
\usepackage[os=win]{menukeys}
|
||||||
\usepackage{xfrac}
|
\usepackage{xfrac}
|
||||||
\usepackage[euler]{textgreek}
|
\usepackage[euler]{textgreek}
|
||||||
@@ -141,7 +141,7 @@ There's much more Tracy can do, which can be explored by carefully reading this
|
|||||||
\section{A quick look at Tracy Profiler}
|
\section{A quick look at Tracy Profiler}
|
||||||
\label{quicklook}
|
\label{quicklook}
|
||||||
|
|
||||||
Tracy is a real-time, nanosecond resolution \emph{hybrid frame and sampling profiler} that you can use for remote or embedded telemetry of games and other applications. It can profile CPU\footnote{Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as Rust, Zig, C\#, OCaml, Odin, etc.}, GPU\footnote{All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL.}, memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
Tracy is a real-time, nanosecond resolution \emph{hybrid frame and sampling profiler} that you can use for remote or embedded telemetry of games and other applications. It can profile CPU\footnote{Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as Rust, Zig, C\#, OCaml, Odin, etc.}, GPU\footnote{All major graphics/compute APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL, CUDA, WebGPU.}, memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
||||||
|
|
||||||
While Tracy can perform statistical analysis of sampled call stack data, just like other \emph{statistical profilers} (such as VTune, perf, or Very Sleepy), it mainly focuses on manual markup of the source code. Such markup allows frame-by-frame inspection of the program execution. For example, you will be able to see exactly which functions are called, how much time they require, and how they interact with each other in a multi-threaded environment. In contrast, the statistical analysis may show you the hot spots in your code, but it cannot accurately pinpoint the underlying cause for semi-random frame stutter that may occur every couple of seconds.
|
While Tracy can perform statistical analysis of sampled call stack data, just like other \emph{statistical profilers} (such as VTune, perf, or Very Sleepy), it mainly focuses on manual markup of the source code. Such markup allows frame-by-frame inspection of the program execution. For example, you will be able to see exactly which functions are called, how much time they require, and how they interact with each other in a multi-threaded environment. In contrast, the statistical analysis may show you the hot spots in your code, but it cannot accurately pinpoint the underlying cause for semi-random frame stutter that may occur every couple of seconds.
|
||||||
|
|
||||||
@@ -1050,6 +1050,8 @@ Memory & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faXm
|
|||||||
GPU zones (OpenGL) & \faCheck & \faCheck & \faCheck & \faPoo & \faPoo & & \faXmark \\
|
GPU zones (OpenGL) & \faCheck & \faCheck & \faCheck & \faPoo & \faPoo & & \faXmark \\
|
||||||
GPU zones (Vulkan) & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & & \faXmark \\
|
GPU zones (Vulkan) & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & & \faXmark \\
|
||||||
GPU zones (Metal) & \faXmark & \faXmark & \faXmark & \faCheck\textsuperscript{\emph{b}} & \faCheck\textsuperscript{\emph{b}} & \faXmark & \faXmark \\
|
GPU zones (Metal) & \faXmark & \faXmark & \faXmark & \faCheck\textsuperscript{\emph{b}} & \faCheck\textsuperscript{\emph{b}} & \faXmark & \faXmark \\
|
||||||
|
GPU zones (CUDA) & \faCheck & \faCheck & \faXmark & \faXmark & \faXmark & \faQuestion & \faXmark \\
|
||||||
|
GPU zones (WebGPU) & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faQuestion & \faQuestion \\
|
||||||
Call stacks & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faXmark \\
|
Call stacks & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faXmark \\
|
||||||
Symbol resolution & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck \\
|
Symbol resolution & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck & \faCheck \\
|
||||||
Crash handling & \faCheck & \faCheck & \faCheck & \faXmark & \faXmark & \faXmark & \faXmark \\
|
Crash handling & \faCheck & \faCheck & \faCheck & \faXmark & \faXmark & \faXmark & \faXmark \\
|
||||||
@@ -1645,7 +1647,7 @@ To mark that a separate memory pool is to be tracked you should use the named ve
|
|||||||
\subsection{GPU profiling}
|
\subsection{GPU profiling}
|
||||||
\label{gpuprofiling}
|
\label{gpuprofiling}
|
||||||
|
|
||||||
Tracy provides bindings for profiling OpenGL, Vulkan, Direct3D 11, Direct3D 12, Metal, OpenCL and CUDA execution time on GPU.
|
Tracy provides bindings for profiling OpenGL, Vulkan, Direct3D 11, Direct3D 12, Metal, OpenCL, CUDA and WebGPU execution time on GPU.
|
||||||
|
|
||||||
Note that the CPU and GPU timers may be unsynchronized unless you create a calibrated context, but the availability of calibrated contexts is limited. You can try to correct the desynchronization of uncalibrated contexts in the profiler's options (section~\ref{options}).
|
Note that the CPU and GPU timers may be unsynchronized unless you create a calibrated context, but the availability of calibrated contexts is limited. You can try to correct the desynchronization of uncalibrated contexts in the profiler's options (section~\ref{options}).
|
||||||
|
|
||||||
@@ -1701,6 +1703,12 @@ logo=\bcattention
|
|||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{bclogo}
|
\end{bclogo}
|
||||||
|
|
||||||
|
\subparagraph{Calibrated context}
|
||||||
|
|
||||||
|
By default, the OpenGL context is uncalibrated: the CPU and GPU clocks are aligned only once, when the context is created, so over long captures the two time domains may drift apart (section~\ref{options} describes correcting this drift manually). Defining \texttt{TRACY\_OPENGL\_AUTO\_CALIBRATION} before including \texttt{TracyOpenGL.hpp} enables periodic recalibration instead: roughly once per second Tracy samples the GPU and CPU clocks together and emits a calibration event, allowing the profiler to track and remove the drift automatically.
|
||||||
|
|
||||||
|
This is opt-in because OpenGL exposes no atomic CPU+GPU timestamp query (unlike Vulkan's \texttt{VK\_EXT\_calibrated\_timestamps} or Direct3D~12, whose contexts are always calibrated). Recalibration therefore reads the GPU clock with \texttt{glGetInteger64v(GL\_TIMESTAMP)}, which forces a CPU/GPU synchronization (a pipeline stall) each time it runs. Enable it only when the improved long-capture alignment is worth the periodic stall.
|
||||||
|
|
||||||
\subsubsection{Vulkan}
|
\subsubsection{Vulkan}
|
||||||
|
|
||||||
Similarly, for Vulkan support you should include the \texttt{public/tracy/TracyVulkan.hpp} header file. Tracing Vulkan devices and queues is a bit more involved, and the Vulkan initialization macro \texttt{TracyVkContext(physdev, device, queue, cmdbuf)} returns an instance of \texttt{TracyVkCtx} object, which tracks an associated Vulkan queue. Cleanup is performed using the \texttt{TracyVkDestroy(ctx)} macro. You may create multiple Vulkan contexts. To set a custom name for the context, use the \texttt{TracyVkContextName(ctx, name, size)} macro.
|
Similarly, for Vulkan support you should include the \texttt{public/tracy/TracyVulkan.hpp} header file. Tracing Vulkan devices and queues is a bit more involved, and the Vulkan initialization macro \texttt{TracyVkContext(physdev, device, queue, cmdbuf)} returns an instance of \texttt{TracyVkCtx} object, which tracks an associated Vulkan queue. Cleanup is performed using the \texttt{TracyVkDestroy(ctx)} macro. You may create multiple Vulkan contexts. To set a custom name for the context, use the \texttt{TracyVkContextName(ctx, name, size)} macro.
|
||||||
@@ -1785,6 +1793,16 @@ Unlike other GPU backends in Tracy, there is no need to call \texttt{TracyCUDACo
|
|||||||
|
|
||||||
To stop profiling, call the \texttt{TracyCUDAStopProfiling(ctx)} macro.
|
To stop profiling, call the \texttt{TracyCUDAStopProfiling(ctx)} macro.
|
||||||
|
|
||||||
|
\subsubsection{WebGPU}
|
||||||
|
|
||||||
|
WebGPU support is enabled by including the \texttt{public/tracy/TracyWebGPU.hpp} header file. Both major implementations of WebGPU (Dawn and wgpu-native) are supported.
|
||||||
|
|
||||||
|
Before creating the WebGPU device, make sure to call \texttt{TracyWebGPUSetupDeviceDescriptor()} to let Tracy request the necessary device features and extensions necessary for profiling. After the device is created, use the \texttt{TracyWebGPUContext()} macro to instantiate the necessary \texttt{WebGPUQueueCtx} object required for GPU instrumentation. The object should later be cleaned up with the \texttt{TracyWebGPUDestroy()} macro. To set a custom name for the context, use the \texttt{TracyWebGPUContextName()} macro.
|
||||||
|
|
||||||
|
To instrument a GPU zone, use the various \texttt{TracyWebGPU*Zone*()} macros. Note that WebGPU only offers command instrumentation at the "pass"-level. While command-level granularity is possible through implementation-specific WebGPU extensions, Tracy does not support it at the moment. Supply the corresponding WebGPU pass descriptor to the instrumentation macro \textit{before} creating the WebGPU pass encoder.
|
||||||
|
|
||||||
|
You are required to periodically collect the GPU events using the \texttt{TracyWebGPUCollect()} macro. Good places for collection are: after synchronous waits, after event processing \texttt{wgpuInstanceProcessEvents}, after present drawable calls (\texttt{wgpuSurfacePresent}), and inside the completion callback of command queues (\texttt{wgpuQueueOnSubmittedWorkDone}).
|
||||||
|
|
||||||
\subsubsection{ROCm}
|
\subsubsection{ROCm}
|
||||||
|
|
||||||
On Linux, if rocprofiler-sdk is installed, tracy can automatically trace GPU dispatches and collect
|
On Linux, if rocprofiler-sdk is installed, tracy can automatically trace GPU dispatches and collect
|
||||||
@@ -1818,13 +1836,13 @@ sudo amd-smi set -g 0 -l stable_std
|
|||||||
|
|
||||||
Putting more than one GPU zone macro in a single scope features the same issue as with the \texttt{ZoneScoped} macros, described in section~\ref{multizone} (but this time the variable name is \texttt{\_\_\_tracy\_gpu\_zone}).
|
Putting more than one GPU zone macro in a single scope features the same issue as with the \texttt{ZoneScoped} macros, described in section~\ref{multizone} (but this time the variable name is \texttt{\_\_\_tracy\_gpu\_zone}).
|
||||||
|
|
||||||
To solve this problem, in case of OpenGL use the \texttt{TracyGpuNamedZone} macro in place of \texttt{TracyGpuZone} (or the color variant). The same applies to Vulkan, Direct3D 11/12 and Metal -- replace \texttt{TracyVkZone} with \texttt{TracyVkNamedZone}, \texttt{TracyD3D11Zone}/\texttt{TracyD3D12Zone} with \texttt{TracyD3D11NamedZone}/\texttt{TracyD3D12NamedZone}, and \texttt{TracyMetalZone} with \texttt{TracyMetalNamedZone}.
|
To solve this problem, in case of OpenGL use the \texttt{TracyGpuNamedZone} macro in place of \texttt{TracyGpuZone} (or the color variant). The same applies to Vulkan, Direct3D 11/12, Metal and WebGPU -- replace \texttt{TracyVkZone} with \texttt{TracyVkNamedZone}, \texttt{TracyD3D11Zone}/\texttt{TracyD3D12Zone} with \texttt{TracyD3D11NamedZone}/\texttt{TracyD3D12NamedZone}, \texttt{TracyMetalZone} with \texttt{TracyMetalNamedZone}, and \texttt{TracyWebGPUZone} with \texttt{TracyWebGPUNamedZone}.
|
||||||
|
|
||||||
Remember to provide your name for the created stack variable as the first parameter to the macros.
|
Remember to provide your name for the created stack variable as the first parameter to the macros.
|
||||||
|
|
||||||
\subsubsection{Transient GPU zones}
|
\subsubsection{Transient GPU zones}
|
||||||
|
|
||||||
Transient zones (see section~\ref{transientzones} for details) are available in OpenGL, Vulkan, and Direct3D 11/12 macros. Transient zones are not available for Metal at this moment.
|
Transient zones (see section~\ref{transientzones} for details) are available in OpenGL, Vulkan, Direct3D 11/12 and WebGPU macros. Transient zones are not available for Metal at this moment.
|
||||||
|
|
||||||
\subsection{Fibers}
|
\subsection{Fibers}
|
||||||
\label{fibers}
|
\label{fibers}
|
||||||
@@ -2041,6 +2059,20 @@ filesystem setup as the one used to run the tracy instrumented application).
|
|||||||
You can do path substitution with the \texttt{-p} option to perform any number of path
|
You can do path substitution with the \texttt{-p} option to perform any number of path
|
||||||
substitions in order to use symbols located elsewhere.
|
substitions in order to use symbols located elsewhere.
|
||||||
|
|
||||||
|
By default symbol resolution is performed with the platform's native facility: the DbgHelp
|
||||||
|
library on Windows, and the \texttt{addr2line} tool found in \texttt{PATH} elsewhere. You can
|
||||||
|
override this with the \texttt{-a} option, passing the path to a custom
|
||||||
|
\texttt{addr2line}-compatible tool (for instance an \texttt{addr2line} from a cross-compilation
|
||||||
|
toolchain, or \texttt{llvm-addr2line}). The \texttt{-a} option works on all platforms, including
|
||||||
|
Windows, and takes precedence over the platform default.
|
||||||
|
|
||||||
|
Extra arguments can be passed verbatim to the resolution tool with the \texttt{-A} option. Tracy
|
||||||
|
records callstack frame offsets relative to the image base, but \texttt{addr2line}-compatible
|
||||||
|
tools expect a full virtual address for images that have a non-zero preferred image base (such as
|
||||||
|
PE on Windows or Mach-O on Apple). For these, pass \texttt{-A "--relative-address"} so that
|
||||||
|
\texttt{llvm-addr2line} or \texttt{llvm-symbolizer} adds the image base back. ELF images need no
|
||||||
|
such adjustment.
|
||||||
|
|
||||||
\begin{bclogo}[
|
\begin{bclogo}[
|
||||||
noborder=true,
|
noborder=true,
|
||||||
couleur=black!5,
|
couleur=black!5,
|
||||||
@@ -2253,6 +2285,31 @@ TracyCLockAfterUnlock(tracy_lock_ctx);
|
|||||||
|
|
||||||
You can optionally mark the location of where the lock is held by using the \texttt{TracyCLockMark} macro, this should be done after acquiring the lock.
|
You can optionally mark the location of where the lock is held by using the \texttt{TracyCLockMark} macro, this should be done after acquiring the lock.
|
||||||
|
|
||||||
|
Similarly, you can use the following macros to mark a shared lock using the C API:
|
||||||
|
\begin{itemize}
|
||||||
|
\item \texttt{TracyCSharedLockAnnounce(lock\_ctx)}
|
||||||
|
\item \texttt{TracyCSharedLockTerminate(lock\_ctx)}
|
||||||
|
\item \texttt{TracyCSharedLockBeforeLock(lock\_ctx)}
|
||||||
|
\item \texttt{TracyCSharedLockAfterLock(lock\_ctx)}
|
||||||
|
\item \texttt{TracyCSharedLockAfterUnlock(lock\_ctx)}
|
||||||
|
\item \texttt{TracyCSharedLockAfterTryLock(lock\_ctx, acquired)}
|
||||||
|
\item \texttt{TracyCSharedLockBeforeSharedLock(lock\_ctx)}
|
||||||
|
\item \texttt{TracyCSharedLockAfterSharedLock(lock\_ctx)}
|
||||||
|
\item \texttt{TracyCSharedLockAfterSharedUnlock(lock\_ctx)}
|
||||||
|
\item \texttt{TracyCSharedLockAfterTrySharedLock(lock\_ctx, acquired)}
|
||||||
|
\item \texttt{TracyCSharedLockMark(lock\_ctx)}
|
||||||
|
\item \texttt{TracyCSharedLockCustomName(lock\_ctx, name, size)}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
A shared lock context has to be defined next to the shared lock that it will be marking:
|
||||||
|
\begin{lstlisting}
|
||||||
|
TracyCSharedLockCtx tracy_shared_lock_ctx;
|
||||||
|
HANDLE shared_lock;
|
||||||
|
\end{lstlisting}
|
||||||
|
|
||||||
|
The same rules apply to shared locks as to regular locks, but you need to use the shared lock macros instead.
|
||||||
|
Lock implementations in classes \texttt{Lockable} and \texttt{SharedLockable} show how to properly perform context handling.
|
||||||
|
|
||||||
\subsubsection{Memory profiling}
|
\subsubsection{Memory profiling}
|
||||||
\label{cmemoryprofiling}
|
\label{cmemoryprofiling}
|
||||||
|
|
||||||
@@ -3832,7 +3889,7 @@ You will find the zones with locks and their associated threads on this combined
|
|||||||
The left-hand side \emph{index area} of the timeline view displays various labels (threads, locks), which can be categorized in the following way:
|
The left-hand side \emph{index area} of the timeline view displays various labels (threads, locks), which can be categorized in the following way:
|
||||||
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item \emph{Light blue label} -- GPU context. Multi-threaded Vulkan, OpenCL, Direct3D 12 and Metal contexts are additionally split into separate threads.
|
\item \emph{Light blue label} -- GPU context. Multi-threaded Vulkan, OpenCL, Direct3D 12, Metal and WebGPU contexts are additionally split into separate threads.
|
||||||
\item \emph{Pink label} -- CPU data graph.
|
\item \emph{Pink label} -- CPU data graph.
|
||||||
\item \emph{White label} -- A CPU thread. It will be replaced by a bright red label in a thread that has crashed (section~\ref{crashhandling}). If automated sampling was performed, clicking the~\LMB{}~left mouse button on the \emph{\faGhost{}~ghost zones} button will switch zone display mode between 'instrumented' and 'ghost.'
|
\item \emph{White label} -- A CPU thread. It will be replaced by a bright red label in a thread that has crashed (section~\ref{crashhandling}). If automated sampling was performed, clicking the~\LMB{}~left mouse button on the \emph{\faGhost{}~ghost zones} button will switch zone display mode between 'instrumented' and 'ghost.'
|
||||||
\item \emph{Green label} -- Fiber, coroutine, or any other sort of cooperative multitasking 'green thread.'
|
\item \emph{Green label} -- Fiber, coroutine, or any other sort of cooperative multitasking 'green thread.'
|
||||||
@@ -3854,7 +3911,7 @@ In an example in figure~\ref{zoneslocks} you can see that there are two threads:
|
|||||||
|
|
||||||
Meanwhile, the \emph{Streaming thread} is performing some \emph{Streaming jobs}. The first \emph{Streaming job} sent a message (section~\ref{messagelog}). In addition to being listed in the message log, it is indicated by a triangle over the thread separator. When multiple messages are in one place, the triangle outline shape changes to a filled triangle.
|
Meanwhile, the \emph{Streaming thread} is performing some \emph{Streaming jobs}. The first \emph{Streaming job} sent a message (section~\ref{messagelog}). In addition to being listed in the message log, it is indicated by a triangle over the thread separator. When multiple messages are in one place, the triangle outline shape changes to a filled triangle.
|
||||||
|
|
||||||
The GPU zones are displayed just like CPU zones, with an OpenGL/Vulkan/Direct3D/Metal/OpenCL context in place of a thread name.
|
The GPU zones are displayed just like CPU zones, with an OpenGL/Vulkan/Direct3D/Metal/OpenCL/CUDA/WebGPU context in place of a thread name.
|
||||||
|
|
||||||
Hovering the \faArrowPointer{} mouse pointer over a zone will highlight all other zones that have the exact source location with a white outline. Clicking the \LMB{}~left mouse button on a zone will open the zone information window (section~\ref{zoneinfo}). Holding the \keys{\ctrl} key and clicking the \LMB{}~left mouse button on a zone will open the zone statistics window (section~\ref{findzone}). Clicking the \MMB{}~middle mouse button on a zone will zoom the view to the extent of the zone.
|
Hovering the \faArrowPointer{} mouse pointer over a zone will highlight all other zones that have the exact source location with a white outline. Clicking the \LMB{}~left mouse button on a zone will open the zone information window (section~\ref{zoneinfo}). Holding the \keys{\ctrl} key and clicking the \LMB{}~left mouse button on a zone will open the zone statistics window (section~\ref{findzone}). Clicking the \MMB{}~middle mouse button on a zone will zoom the view to the extent of the zone.
|
||||||
|
|
||||||
@@ -4026,7 +4083,7 @@ You can freely adjust each time range on the timeline by clicking the \LMB{}~lef
|
|||||||
|
|
||||||
Tracy allows adding custom notes to the trace. For example, you may want to mark a region to ignore because the application was out-of-focus or a region where a new user was connecting to the game, which resulted in a frame drop that needs to be investigated.
|
Tracy allows adding custom notes to the trace. For example, you may want to mark a region to ignore because the application was out-of-focus or a region where a new user was connecting to the game, which resulted in a frame drop that needs to be investigated.
|
||||||
|
|
||||||
Methods of specifying the annotation region are described in section~\ref{timeranges}. When a new annotation is added, a settings window is displayed (section~\ref{annotationsettings}), allowing you to enter a description.
|
Methods of specifying the annotation region are described in section~\ref{timeranges}. When a new annotation is added, it is assigned a semi-unique random name to make it distinguishable. The settings window is also opened (section~\ref{annotationsettings}), allowing you to enter your own description of the annotation.
|
||||||
|
|
||||||
Annotations are displayed on the timeline, as presented in figure~\ref{annotation}. Clicking on the circle next to the text description will open the annotation settings window, in which you can modify or remove the region. List of all annotations in the trace is available in the annotations list window described in section~\ref{annotationlist}, which is accessible through the \emph{\faScrewdriverWrench{} Tools} button on the control menu.
|
Annotations are displayed on the timeline, as presented in figure~\ref{annotation}. Clicking on the circle next to the text description will open the annotation settings window, in which you can modify or remove the region. List of all annotations in the trace is available in the annotations list window described in section~\ref{annotationlist}, which is accessible through the \emph{\faScrewdriverWrench{} Tools} button on the control menu.
|
||||||
|
|
||||||
@@ -4063,7 +4120,7 @@ In this window, you can set various trace-related options. For example, the time
|
|||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item \emph{\faSignature{} Draw CPU usage graph} -- You can disable drawing of the CPU usage graph here.
|
\item \emph{\faSignature{} Draw CPU usage graph} -- You can disable drawing of the CPU usage graph here.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Metal/Direct3D/OpenCL zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets of uncalibrated contexts (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}.
|
\item \emph{\faEye{} Draw GPU zones} -- Allows disabling display of OpenGL/Vulkan/Metal/Direct3D/OpenCL/CUDA/WebGPU zones. The \emph{GPU zones} drop-down allows disabling individual GPU contexts and setting CPU/GPU drift offsets of uncalibrated contexts (see section~\ref{gpuprofiling} for more information). The \emph{\faRobot~Auto} button automatically measures the GPU drift value\footnote{There is an assumption that drift is linear. Automated measurement calculates and removes change over time in delay-to-execution of GPU zones. Resulting value may still be incorrect.}.
|
||||||
\item \emph{\faMicrochip{} Draw CPU zones} -- Determines whether CPU zones are displayed.
|
\item \emph{\faMicrochip{} Draw CPU zones} -- Determines whether CPU zones are displayed.
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item \emph{\faGhost{} Draw ghost zones} -- Controls if ghost zones should be displayed in threads which don't have any instrumented zones available.
|
\item \emph{\faGhost{} Draw ghost zones} -- Controls if ghost zones should be displayed in threads which don't have any instrumented zones available.
|
||||||
@@ -4537,7 +4594,9 @@ The information about the selected memory allocation is displayed in this window
|
|||||||
\subsection{Trace information window}
|
\subsection{Trace information window}
|
||||||
\label{traceinfo}
|
\label{traceinfo}
|
||||||
|
|
||||||
This window contains information about the current trace: captured program name, time of the capture, profiler version which performed the capture, and a custom trace description, which you can fill in.
|
This window contains information about the current trace: captured program name, time of the capture, profiler version which performed the capture.
|
||||||
|
|
||||||
|
There's an text entry field for an optional custom description of the trace for you to fill in. This description will appear on the profiler window title bar, or when comparing two traces (section~\ref{compare}), enabling you to quickly recognize what the trace contains. For some people it's fine to just have \emph{any} semi-unique description to be able to identify a specific trace. For such purposes there's an \emph{\faDice{}~Generate name} button, which will set the trace description to an abstract meaningless identifier.
|
||||||
|
|
||||||
If the \emph{\faUserGear{}~Public sidecar} option is selected, the file containing trace-specific user settings (see section~\ref{tracespecific}) will be saved on disk next to the trace file.
|
If the \emph{\faUserGear{}~Public sidecar} option is selected, the file containing trace-specific user settings (see section~\ref{tracespecific}) will be saved on disk next to the trace file.
|
||||||
|
|
||||||
@@ -4877,7 +4936,7 @@ The profiled program is highlighted using green color. Furthermore, the yellow h
|
|||||||
\subsection{Annotation settings window}
|
\subsection{Annotation settings window}
|
||||||
\label{annotationsettings}
|
\label{annotationsettings}
|
||||||
|
|
||||||
In this window, you may modify how a timeline annotation (section~\ref{annotatingtrace}) is presented by setting its text description or selecting region highlight color. If the note is no longer needed, you may also remove it here.
|
In this window, you may modify how a timeline annotation (section~\ref{annotatingtrace}) is presented by setting its text description or selecting region highlight color. A random annotation description can be set with the \emph{\faDice{}~Generate name} button. If the note is no longer needed, you may also remove it here.
|
||||||
|
|
||||||
\subsection{Annotation list window}
|
\subsection{Annotation list window}
|
||||||
\label{annotationlist}
|
\label{annotationlist}
|
||||||
|
|||||||
@@ -135,6 +135,10 @@ if get_option('ignore_memory_faults')
|
|||||||
tracy_common_args += ['-DTRACY_IGNORE_MEMORY_FAULTS']
|
tracy_common_args += ['-DTRACY_IGNORE_MEMORY_FAULTS']
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if get_option('opengl_auto_calibration')
|
||||||
|
tracy_common_args += ['-DTRACY_OPENGL_AUTO_CALIBRATION']
|
||||||
|
endif
|
||||||
|
|
||||||
tracy_shared_libs = get_option('default_library') == 'shared'
|
tracy_shared_libs = get_option('default_library') == 'shared'
|
||||||
|
|
||||||
if tracy_shared_libs
|
if tracy_shared_libs
|
||||||
|
|||||||
@@ -29,3 +29,4 @@ option('verbose', type : 'boolean', value : false, description : 'Enable verbose
|
|||||||
option('no_internal_message', type : 'boolean', value : false, description : 'Prevent the profiler from logging messages')
|
option('no_internal_message', type : 'boolean', value : false, description : 'Prevent the profiler from logging messages')
|
||||||
option('debuginfod', type : 'boolean', value : false, description : 'Enable debuginfod support')
|
option('debuginfod', type : 'boolean', value : false, description : 'Enable debuginfod support')
|
||||||
option('ignore_memory_faults', type : 'boolean', value : false, description : 'Ignore instrumentation errors from memory free events that do not have a matching allocation')
|
option('ignore_memory_faults', type : 'boolean', value : false, description : 'Ignore instrumentation errors from memory free events that do not have a matching allocation')
|
||||||
|
option('opengl_auto_calibration', type : 'boolean', value : false, description : 'Periodically recalibrate OpenGL GPU/CPU clock drift (forces a CPU/GPU sync each time)')
|
||||||
|
|||||||
@@ -70,6 +70,7 @@ set(SERVER_FILES
|
|||||||
TracyMarkdown.cpp
|
TracyMarkdown.cpp
|
||||||
TracyMicroArchitecture.cpp
|
TracyMicroArchitecture.cpp
|
||||||
TracyMouse.cpp
|
TracyMouse.cpp
|
||||||
|
TracyNameGen.cpp
|
||||||
TracyProtoHistory.cpp
|
TracyProtoHistory.cpp
|
||||||
TracySourceContents.cpp
|
TracySourceContents.cpp
|
||||||
TracySourceTokenizer.cpp
|
TracySourceTokenizer.cpp
|
||||||
@@ -149,15 +150,30 @@ Embed(PROFILER_FILES SystemPrompt src/llm/system.prompt.md)
|
|||||||
Embed(PROFILER_FILES SkillCallstack src/llm/skill.callstack.md)
|
Embed(PROFILER_FILES SkillCallstack src/llm/skill.callstack.md)
|
||||||
Embed(PROFILER_FILES SkillOptimization src/llm/skill.optimization.md)
|
Embed(PROFILER_FILES SkillOptimization src/llm/skill.optimization.md)
|
||||||
Embed(PROFILER_FILES ToolsJson src/llm/tools.json)
|
Embed(PROFILER_FILES ToolsJson src/llm/tools.json)
|
||||||
|
|
||||||
Embed(PROFILER_FILES FontFixed src/font/FiraCode-Retina.ttf)
|
Embed(PROFILER_FILES FontFixed src/font/FiraCode-Retina.ttf)
|
||||||
Embed(PROFILER_FILES FontIcons src/font/Font\ Awesome\ 6\ Free-Solid-900.otf)
|
Embed(PROFILER_FILES FontIcons src/font/Font\ Awesome\ 7\ Free-Solid-900.otf)
|
||||||
Embed(PROFILER_FILES FontNormal src/font/Roboto-Regular.ttf)
|
Embed(PROFILER_FILES FontNormal src/font/Roboto-Regular.ttf)
|
||||||
Embed(PROFILER_FILES FontBold src/font/Roboto-Bold.ttf)
|
Embed(PROFILER_FILES FontBold src/font/Roboto-Bold.ttf)
|
||||||
Embed(PROFILER_FILES FontItalic src/font/Roboto-Italic.ttf)
|
Embed(PROFILER_FILES FontItalic src/font/Roboto-Italic.ttf)
|
||||||
Embed(PROFILER_FILES FontBoldItalic src/font/Roboto-BoldItalic.ttf)
|
Embed(PROFILER_FILES FontBoldItalic src/font/Roboto-BoldItalic.ttf)
|
||||||
Embed(PROFILER_FILES FontEmoji src/font/NotoEmoji-Regular.ttf)
|
Embed(PROFILER_FILES FontEmoji src/font/NotoEmoji-Regular.ttf)
|
||||||
|
|
||||||
Embed(PROFILER_FILES Manual ../manual/tracy.md)
|
Embed(PROFILER_FILES Manual ../manual/tracy.md)
|
||||||
|
|
||||||
|
Embed(PROFILER_FILES Text100Million src/achievements/100Million.md)
|
||||||
|
Embed(PROFILER_FILES TextConnectToClient src/achievements/ConnectToClient.md)
|
||||||
|
Embed(PROFILER_FILES TextFindZone src/achievements/FindZone.md)
|
||||||
|
Embed(PROFILER_FILES TextFrameImages src/achievements/FrameImages.md)
|
||||||
|
Embed(PROFILER_FILES TextGlobalSettings src/achievements/GlobalSettings.md)
|
||||||
|
Embed(PROFILER_FILES TextInstrumentationIntro src/achievements/InstrumentationIntro.md)
|
||||||
|
Embed(PROFILER_FILES TextInstrumentationStatistics src/achievements/InstrumentationStatistics.md)
|
||||||
|
Embed(PROFILER_FILES TextInstrumentFrames src/achievements/InstrumentFrames.md)
|
||||||
|
Embed(PROFILER_FILES TextIntro src/achievements/Intro.md)
|
||||||
|
Embed(PROFILER_FILES TextLoadTrace src/achievements/LoadTrace.md)
|
||||||
|
Embed(PROFILER_FILES TextSamplingIntro src/achievements/SamplingIntro.md)
|
||||||
|
Embed(PROFILER_FILES TextSaveTrace src/achievements/SaveTrace.md)
|
||||||
|
|
||||||
set(INCLUDES "${CMAKE_CURRENT_BINARY_DIR}")
|
set(INCLUDES "${CMAKE_CURRENT_BINARY_DIR}")
|
||||||
set(LIBS "")
|
set(LIBS "")
|
||||||
|
|
||||||
|
|||||||
@@ -162,6 +162,15 @@ static ImGuiKey TranslateKeyCode( const char* code )
|
|||||||
return ImGuiKey_None;
|
return ImGuiKey_None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void UpdateKeyModifiers( const EmscriptenKeyboardEvent* e )
|
||||||
|
{
|
||||||
|
ImGuiIO& io = ImGui::GetIO();
|
||||||
|
io.AddKeyEvent( ImGuiMod_Ctrl, e->ctrlKey );
|
||||||
|
io.AddKeyEvent( ImGuiMod_Shift, e->shiftKey );
|
||||||
|
io.AddKeyEvent( ImGuiMod_Alt, e->altKey );
|
||||||
|
io.AddKeyEvent( ImGuiMod_Super, e->metaKey );
|
||||||
|
}
|
||||||
|
|
||||||
Backend::Backend( const char* title, const std::function<void()>& redraw, const std::function<void(float)>& scaleChanged, const std::function<int(void)>& isBusy, RunQueue* mainThreadTasks )
|
Backend::Backend( const char* title, const std::function<void()>& redraw, const std::function<void(float)>& scaleChanged, const std::function<int(void)>& isBusy, RunQueue* mainThreadTasks )
|
||||||
{
|
{
|
||||||
constexpr EGLint eglConfigAttrib[] = {
|
constexpr EGLint eglConfigAttrib[] = {
|
||||||
@@ -243,6 +252,7 @@ Backend::Backend( const char* title, const std::function<void()>& redraw, const
|
|||||||
return EM_TRUE;
|
return EM_TRUE;
|
||||||
} );
|
} );
|
||||||
emscripten_set_keydown_callback( EMSCRIPTEN_EVENT_TARGET_WINDOW, nullptr, EM_TRUE, [] ( int, const EmscriptenKeyboardEvent* e, void* ) -> EM_BOOL {
|
emscripten_set_keydown_callback( EMSCRIPTEN_EVENT_TARGET_WINDOW, nullptr, EM_TRUE, [] ( int, const EmscriptenKeyboardEvent* e, void* ) -> EM_BOOL {
|
||||||
|
UpdateKeyModifiers( e );
|
||||||
const auto code = TranslateKeyCode( e->code );
|
const auto code = TranslateKeyCode( e->code );
|
||||||
if( code == ImGuiKey_None ) return EM_FALSE;
|
if( code == ImGuiKey_None ) return EM_FALSE;
|
||||||
ImGui::GetIO().AddKeyEvent( code, true );
|
ImGui::GetIO().AddKeyEvent( code, true );
|
||||||
@@ -250,6 +260,7 @@ Backend::Backend( const char* title, const std::function<void()>& redraw, const
|
|||||||
return EM_TRUE;
|
return EM_TRUE;
|
||||||
} );
|
} );
|
||||||
emscripten_set_keyup_callback( EMSCRIPTEN_EVENT_TARGET_WINDOW, nullptr, EM_TRUE, [] ( int, const EmscriptenKeyboardEvent* e, void* ) -> EM_BOOL {
|
emscripten_set_keyup_callback( EMSCRIPTEN_EVENT_TARGET_WINDOW, nullptr, EM_TRUE, [] ( int, const EmscriptenKeyboardEvent* e, void* ) -> EM_BOOL {
|
||||||
|
UpdateKeyModifiers( e );
|
||||||
const auto code = TranslateKeyCode( e->code );
|
const auto code = TranslateKeyCode( e->code );
|
||||||
if( code == ImGuiKey_None ) return EM_FALSE;
|
if( code == ImGuiKey_None ) return EM_FALSE;
|
||||||
ImGui::GetIO().AddKeyEvent( code, false );
|
ImGui::GetIO().AddKeyEvent( code, false );
|
||||||
|
|||||||
@@ -4,7 +4,6 @@
|
|||||||
#include <misc/freetype/imgui_freetype.h>
|
#include <misc/freetype/imgui_freetype.h>
|
||||||
|
|
||||||
#include "Fonts.hpp"
|
#include "Fonts.hpp"
|
||||||
#include "profiler/IconsFontAwesome6.h"
|
|
||||||
#include "profiler/TracyEmbed.hpp"
|
#include "profiler/TracyEmbed.hpp"
|
||||||
|
|
||||||
#include "data/FontFixed.hpp"
|
#include "data/FontFixed.hpp"
|
||||||
|
|||||||
12
profiler/src/achievements/100Million.md
Normal file
12
profiler/src/achievements/100Million.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# It's over 100 million!
|
||||||
|
|
||||||
|
Tracy can handle a lot of data. How about 100 million zones in a single trace? Add a lot of zones to your program and see how it handles it!
|
||||||
|
|
||||||
|
Capturing a long-running profile trace is easy. Need to profile an hour of your program execution? You can do it.
|
||||||
|
|
||||||
|
Note that it doesn't make much sense to instrument every little function you might have. The cost of the instrumentation itself will be higher than the cost of the function in such a case.
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> Keep in mind that the more zones you have, the more memory and CPU time the profiler will use. Be careful not to run out of memory.
|
||||||
|
>
|
||||||
|
> To capture 100 million zones, you will need approximately 4 GB of RAM.
|
||||||
10
profiler/src/achievements/ConnectToClient.md
Normal file
10
profiler/src/achievements/ConnectToClient.md
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# First profiling session
|
||||||
|
|
||||||
|
Let's start our adventure by instrumenting your application and connecting it to the profiler. Here's a quick refresher:
|
||||||
|
|
||||||
|
1. Integrate Tracy Profiler into your application. This can be done using CMake, Meson, or simply by adding the source files to your project.
|
||||||
|
2. Make sure that `TracyClient.cpp` (or the Tracy library) is included in your build.
|
||||||
|
3. Define `TRACY_ENABLE` in your build configuration, for the whole application. Do not do it in a single source file because it won't work.
|
||||||
|
4. Start your application, and * Connect* to it with the profiler.
|
||||||
|
|
||||||
|
Please refer to the [user manual](https://github.com/wolfpld/tracy/releases) for more details.
|
||||||
11
profiler/src/achievements/FindZone.md
Normal file
11
profiler/src/achievements/FindZone.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# Find some zones
|
||||||
|
|
||||||
|
You can search for zones in the trace by opening the search window with the * Find zone* button on the top bar. It will ask you for the zone name, which in most cases will be the function name in the code.
|
||||||
|
|
||||||
|
The search may find more than one zone with the same name. A list of all the zones found is displayed, and you can select any of them.
|
||||||
|
|
||||||
|
Alternatively, you can open the Statistics window and click an entry there. This will open the Find zone window as if you had searched for that zone.
|
||||||
|
|
||||||
|
When a zone is selected, a number of statistics are displayed to help you understand the performance of your application. In addition, a histogram of the zone execution times is displayed to make it easier for you to determine the performance of the profiled code. Be sure to select a zone with a large number of calls to make the histogram look interesting!
|
||||||
|
|
||||||
|
Note that you can draw a range on the histogram to limit the number of entries displayed in the zone list below. This list allows you to examine each zone individually. There are also a number of zone groupings that you can select. Each group can be selected and the time associated with the selected group will be highlighted on the histogram.
|
||||||
11
profiler/src/achievements/FrameImages.md
Normal file
11
profiler/src/achievements/FrameImages.md
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# A picture is worth a thousand words
|
||||||
|
|
||||||
|
Tracy allows you to add context to each frame, by attaching a screenshot. You can do this with the `FrameImage` macro.
|
||||||
|
|
||||||
|
You will have to do the screen capture and resizing yourself, which can be a bit complicated. The manual provides a sample code that shows how to do this in a performant way.
|
||||||
|
|
||||||
|
The frame images are displayed in the context of a frame, for example, when you hover over the frame in the timeline or in the frame graph at the top of the screen.
|
||||||
|
|
||||||
|
You can even view a recording of what your application was doing by clicking the * Tools* icon and then selecting the * Playback* option. Try it out!
|
||||||
|
|
||||||
|
The `FrameImage` macro is a great way to see what happened in your application at a particular time. Maybe you have a performance problem that only occurs when a certain object is on the screen?
|
||||||
5
profiler/src/achievements/GlobalSettings.md
Normal file
5
profiler/src/achievements/GlobalSettings.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Global settings
|
||||||
|
|
||||||
|
Tracy has a variety of settings that can be adjusted to suit your needs. These settings can be found by clicking on the * Wrench* icon on the welcome screen. This will open the about window, where you can expand the * Global settings* menu.
|
||||||
|
|
||||||
|
The settings are saved between sessions, so you only need to set them once.
|
||||||
22
profiler/src/achievements/InstrumentFrames.md
Normal file
22
profiler/src/achievements/InstrumentFrames.md
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# Instrumenting frames
|
||||||
|
|
||||||
|
In addition to instrumenting functions, you can also instrument frames. This allows you to see how much time is spent in each frame of your application.
|
||||||
|
|
||||||
|
To instrument frames, you need to add the `FrameMark` macro at the beginning of each frame. This can be done in the main loop of your application, or in a separate function that is called at the beginning of each frame.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
#include "Tracy.hpp"
|
||||||
|
|
||||||
|
void Render()
|
||||||
|
{
|
||||||
|
// Render the frame
|
||||||
|
SwapBuffers();
|
||||||
|
FrameMark;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
When you profile your application, you will see a new frame appear on the timeline each time the `FrameMark` macro is called. This allows you to see how much time is spent in each frame and how many frames are rendered per second.
|
||||||
|
|
||||||
|
The `FrameMark` macro is a great way to see at a glance how your application is performing over time. Maybe there are some performance problems that only appear after a few minutes of running the application? A frame graph is drawn at the top of the profiler window where you can see the timing of all frames.
|
||||||
|
|
||||||
|
Note that some applications do not have a frame-based structure, and in such cases, frame instrumentation may not be useful. That's ok.
|
||||||
22
profiler/src/achievements/InstrumentationIntro.md
Normal file
22
profiler/src/achievements/InstrumentationIntro.md
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# Instrumentating your application
|
||||||
|
|
||||||
|
Instrumentation is a powerful feature that allows you to see the exact runtime of each call to the selected set of functions. The downside is that it takes a bit of manual work to get it set up.
|
||||||
|
|
||||||
|
To get started, open a source file and include the `Tracy.hpp` header. This will give you access to a variety of macros provided by Tracy. Next, add the `ZoneScoped` macro to the beginning of one of your functions, like this:
|
||||||
|
|
||||||
|
```c++
|
||||||
|
#include "Tracy.hpp"
|
||||||
|
|
||||||
|
void SomeFunction()
|
||||||
|
{
|
||||||
|
ZoneScoped;
|
||||||
|
// Your code here
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, when you profile your application, you will see a new zone appear on the timeline for each call to the function. This allows you to see how much time is spent in each call and how many times the function is called.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> The `ZoneScoped` macro is just one of the many macros provided by Tracy. See the documentation for more information.
|
||||||
|
|
||||||
|
The above description applies to C++ code, but things are done similarly in other programming languages. Refer to the documentation for your language for more information.
|
||||||
5
profiler/src/achievements/InstrumentationStatistics.md
Normal file
5
profiler/src/achievements/InstrumentationStatistics.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Show me the stats!
|
||||||
|
|
||||||
|
Once you have instrumented your application, you can view the statistics for each zone in the timeline. This allows you to see how much time is spent in each zone and how many times it is called.
|
||||||
|
|
||||||
|
To view the statistics, click on the * Statistics* button on the top bar. This will open a new window with a list of all zones in the trace.
|
||||||
12
profiler/src/achievements/Intro.md
Normal file
12
profiler/src/achievements/Intro.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# Click here to discover achievements!
|
||||||
|
|
||||||
|
Clicking on the * Achievements* button opens the Achievements List. Here you can see the tasks to be completed along with a short description of what needs to be done.
|
||||||
|
|
||||||
|
As you complete each Achievement, new Achievements will appear, so be sure to keep checking the list for new ones!
|
||||||
|
|
||||||
|
To make the new things easier to spot, the Achievements List will show a marker next to them. The achievements * Achievements* button will glow yellow when there are new things to see.
|
||||||
|
|
||||||
|
- New tasks: orange
|
||||||
|
- Completed tasks: green
|
||||||
|
|
||||||
|
Good luck!
|
||||||
3
profiler/src/achievements/LoadTrace.md
Normal file
3
profiler/src/achievements/LoadTrace.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# Load a trace
|
||||||
|
|
||||||
|
You can open a previously saved trace file (or one received from a friend) with the * Open saved trace* button on the welcome screen.
|
||||||
10
profiler/src/achievements/SamplingIntro.md
Normal file
10
profiler/src/achievements/SamplingIntro.md
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# Sampling program execution
|
||||||
|
|
||||||
|
Sampling program execution is a great way to find out where the hot spots are in your program. It can be used to find out which functions take the most time, or which lines of code are executed the most often.
|
||||||
|
|
||||||
|
While instrumentation requires changes to your code, sampling does not. However, because of the way it works, the results are coarser and it's not possible to know when functions are called or when they return.
|
||||||
|
|
||||||
|
Sampling is automatic on Linux. On Windows, you must run the profiled application as an administrator for it to work.
|
||||||
|
|
||||||
|
> [!WARNING]
|
||||||
|
> Depending on your system configuration, some additional steps may be required. Please refer to the user manual for more information.
|
||||||
12
profiler/src/achievements/SaveTrace.md
Normal file
12
profiler/src/achievements/SaveTrace.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# Save a trace
|
||||||
|
|
||||||
|
Now that you have traced your application (or are in the process of doing so), you can save it to disk for future reference. You can do this by clicking on the * Connection* icon in the top left corner of the screen and then clicking on the * Save trace* button.
|
||||||
|
|
||||||
|
Keeping old traces on hand can be beneficial, as you can compare the performance of your optimizations with what you had before.
|
||||||
|
|
||||||
|
You can also share the trace with your friends or co-workers by sending them the trace file.
|
||||||
|
|
||||||
|
> [!WARNING]
|
||||||
|
> **Warning**
|
||||||
|
>
|
||||||
|
> Trace files can contain sensitive information about your application, such as program code, or even the contents of source files. Be careful when sharing them with others.
|
||||||
Binary file not shown.
BIN
profiler/src/font/Font Awesome 7 Free-Solid-900.otf
Normal file
BIN
profiler/src/font/Font Awesome 7 Free-Solid-900.otf
Normal file
Binary file not shown.
@@ -39,7 +39,7 @@
|
|||||||
#include "profiler/TracyTexture.hpp"
|
#include "profiler/TracyTexture.hpp"
|
||||||
#include "profiler/TracyView.hpp"
|
#include "profiler/TracyView.hpp"
|
||||||
#include "profiler/TracyWeb.hpp"
|
#include "profiler/TracyWeb.hpp"
|
||||||
#include "profiler/IconsFontAwesome6.h"
|
#include "profiler/IconsFontAwesome7.h"
|
||||||
#include "../../server/tracy_pdqsort.h"
|
#include "../../server/tracy_pdqsort.h"
|
||||||
#include "../../server/tracy_robin_hood.h"
|
#include "../../server/tracy_robin_hood.h"
|
||||||
#include "../../server/TracyFileHeader.hpp"
|
#include "../../server/TracyFileHeader.hpp"
|
||||||
@@ -1466,9 +1466,17 @@ Would you like to enable achievements?
|
|||||||
{
|
{
|
||||||
ImGui::Columns( 2 );
|
ImGui::Columns( 2 );
|
||||||
ImGui::SetColumnWidth( 0, 300 * dpiScale );
|
ImGui::SetColumnWidth( 0, 300 * dpiScale );
|
||||||
|
ImGui::BeginChild( "##achievementtoc", ImVec2( 0, 0 ), ImGuiChildFlags_AlwaysUseWindowPadding );
|
||||||
DrawAchievements( c->items );
|
DrawAchievements( c->items );
|
||||||
|
ImGui::EndChild();
|
||||||
ImGui::NextColumn();
|
ImGui::NextColumn();
|
||||||
if( s_achievementItem ) s_achievementItem->description();
|
ImGui::BeginChild( "##achievementtext", ImVec2( 0, 0 ), ImGuiChildFlags_AlwaysUseWindowPadding );
|
||||||
|
if( s_achievementItem )
|
||||||
|
{
|
||||||
|
tracy::Markdown md( nullptr, nullptr );
|
||||||
|
md.Print( s_achievementItem->text.c_str(), s_achievementItem->text.size() );
|
||||||
|
}
|
||||||
|
ImGui::EndChild();
|
||||||
ImGui::EndColumns();
|
ImGui::EndColumns();
|
||||||
ImGui::EndTabItem();
|
ImGui::EndTabItem();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,14 +1,17 @@
|
|||||||
// Generated by https://github.com/juliettef/IconFontCppHeaders script GenerateIconFontCppHeaders.py for languages C and C++
|
// Generated by https://github.com/juliettef/IconFontCppHeaders script GenerateIconFontCppHeaders.py
|
||||||
// from https://github.com/FortAwesome/Font-Awesome/raw/6.x/metadata/icons.yml
|
// for C and C++
|
||||||
// for use with https://github.com/FortAwesome/Font-Awesome/blob/6.x/webfonts/fa-regular-400.ttf, https://github.com/FortAwesome/Font-Awesome/blob/6.x/webfonts/fa-solid-900.ttf
|
// from codepoints https://github.com/FortAwesome/Font-Awesome/raw/7.x/metadata/icons.yml
|
||||||
|
// for use with font https://github.com/FortAwesome/Font-Awesome/blob/7.x/webfonts/fa-regular-400.woff2 (You may need to convert the .woff2 files to .ttf depending upon your loader.), https://github.com/FortAwesome/Font-Awesome/blob/7.x/webfonts/fa-solid-900.woff2 (You may need to convert the .woff2 files to .ttf depending upon your loader.)
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#define FONT_ICON_FILE_NAME_FAR "fa-regular-400.ttf"
|
#define FONT_ICON_FILE_NAME_FAR "fa-regular-400.woff2"
|
||||||
#define FONT_ICON_FILE_NAME_FAS "fa-solid-900.ttf"
|
#define FONT_ICON_FILE_NAME_FAS "fa-solid-900.woff2"
|
||||||
|
|
||||||
#define ICON_MIN_FA 0xe005
|
#define ICON_MIN_FA 0xe005
|
||||||
#define ICON_MAX_16_FA 0xf8ff
|
#define ICON_MAX_16_FA 0xf8ff
|
||||||
#define ICON_MAX_FA 0xf8ff
|
#define ICON_MAX_FA 0xf8ff
|
||||||
|
|
||||||
#define ICON_FA_0 "0" // U+0030
|
#define ICON_FA_0 "0" // U+0030
|
||||||
#define ICON_FA_1 "1" // U+0031
|
#define ICON_FA_1 "1" // U+0031
|
||||||
#define ICON_FA_2 "2" // U+0032
|
#define ICON_FA_2 "2" // U+0032
|
||||||
@@ -22,6 +25,7 @@
|
|||||||
#define ICON_FA_A "A" // U+0041
|
#define ICON_FA_A "A" // U+0041
|
||||||
#define ICON_FA_ADDRESS_BOOK "\xef\x8a\xb9" // U+f2b9
|
#define ICON_FA_ADDRESS_BOOK "\xef\x8a\xb9" // U+f2b9
|
||||||
#define ICON_FA_ADDRESS_CARD "\xef\x8a\xbb" // U+f2bb
|
#define ICON_FA_ADDRESS_CARD "\xef\x8a\xbb" // U+f2bb
|
||||||
|
#define ICON_FA_ALARM_CLOCK "\xef\x8d\x8e" // U+f34e
|
||||||
#define ICON_FA_ALIGN_CENTER "\xef\x80\xb7" // U+f037
|
#define ICON_FA_ALIGN_CENTER "\xef\x80\xb7" // U+f037
|
||||||
#define ICON_FA_ALIGN_JUSTIFY "\xef\x80\xb9" // U+f039
|
#define ICON_FA_ALIGN_JUSTIFY "\xef\x80\xb9" // U+f039
|
||||||
#define ICON_FA_ALIGN_LEFT "\xef\x80\xb6" // U+f036
|
#define ICON_FA_ALIGN_LEFT "\xef\x80\xb6" // U+f036
|
||||||
@@ -41,7 +45,9 @@
|
|||||||
#define ICON_FA_ANGLES_UP "\xef\x84\x82" // U+f102
|
#define ICON_FA_ANGLES_UP "\xef\x84\x82" // U+f102
|
||||||
#define ICON_FA_ANKH "\xef\x99\x84" // U+f644
|
#define ICON_FA_ANKH "\xef\x99\x84" // U+f644
|
||||||
#define ICON_FA_APPLE_WHOLE "\xef\x97\x91" // U+f5d1
|
#define ICON_FA_APPLE_WHOLE "\xef\x97\x91" // U+f5d1
|
||||||
|
#define ICON_FA_AQUARIUS "\xee\xa1\x85" // U+e845
|
||||||
#define ICON_FA_ARCHWAY "\xef\x95\x97" // U+f557
|
#define ICON_FA_ARCHWAY "\xef\x95\x97" // U+f557
|
||||||
|
#define ICON_FA_ARIES "\xee\xa1\x86" // U+e846
|
||||||
#define ICON_FA_ARROW_DOWN "\xef\x81\xa3" // U+f063
|
#define ICON_FA_ARROW_DOWN "\xef\x81\xa3" // U+f063
|
||||||
#define ICON_FA_ARROW_DOWN_1_9 "\xef\x85\xa2" // U+f162
|
#define ICON_FA_ARROW_DOWN_1_9 "\xef\x85\xa2" // U+f162
|
||||||
#define ICON_FA_ARROW_DOWN_9_1 "\xef\xa2\x86" // U+f886
|
#define ICON_FA_ARROW_DOWN_9_1 "\xef\xa2\x86" // U+f886
|
||||||
@@ -116,6 +122,7 @@
|
|||||||
#define ICON_FA_BAN "\xef\x81\x9e" // U+f05e
|
#define ICON_FA_BAN "\xef\x81\x9e" // U+f05e
|
||||||
#define ICON_FA_BAN_SMOKING "\xef\x95\x8d" // U+f54d
|
#define ICON_FA_BAN_SMOKING "\xef\x95\x8d" // U+f54d
|
||||||
#define ICON_FA_BANDAGE "\xef\x91\xa2" // U+f462
|
#define ICON_FA_BANDAGE "\xef\x91\xa2" // U+f462
|
||||||
|
#define ICON_FA_BANGLADESHI_TAKA_SIGN "\xee\x8b\xa6" // U+e2e6
|
||||||
#define ICON_FA_BARCODE "\xef\x80\xaa" // U+f02a
|
#define ICON_FA_BARCODE "\xef\x80\xaa" // U+f02a
|
||||||
#define ICON_FA_BARS "\xef\x83\x89" // U+f0c9
|
#define ICON_FA_BARS "\xef\x83\x89" // U+f0c9
|
||||||
#define ICON_FA_BARS_PROGRESS "\xef\xa0\xa8" // U+f828
|
#define ICON_FA_BARS_PROGRESS "\xef\xa0\xa8" // U+f828
|
||||||
@@ -214,6 +221,7 @@
|
|||||||
#define ICON_FA_BURGER "\xef\xa0\x85" // U+f805
|
#define ICON_FA_BURGER "\xef\xa0\x85" // U+f805
|
||||||
#define ICON_FA_BURST "\xee\x93\x9c" // U+e4dc
|
#define ICON_FA_BURST "\xee\x93\x9c" // U+e4dc
|
||||||
#define ICON_FA_BUS "\xef\x88\x87" // U+f207
|
#define ICON_FA_BUS "\xef\x88\x87" // U+f207
|
||||||
|
#define ICON_FA_BUS_SIDE "\xee\xa0\x9d" // U+e81d
|
||||||
#define ICON_FA_BUS_SIMPLE "\xef\x95\x9e" // U+f55e
|
#define ICON_FA_BUS_SIMPLE "\xef\x95\x9e" // U+f55e
|
||||||
#define ICON_FA_BUSINESS_TIME "\xef\x99\x8a" // U+f64a
|
#define ICON_FA_BUSINESS_TIME "\xef\x99\x8a" // U+f64a
|
||||||
#define ICON_FA_C "C" // U+0043
|
#define ICON_FA_C "C" // U+0043
|
||||||
@@ -232,8 +240,10 @@
|
|||||||
#define ICON_FA_CAMERA_RETRO "\xef\x82\x83" // U+f083
|
#define ICON_FA_CAMERA_RETRO "\xef\x82\x83" // U+f083
|
||||||
#define ICON_FA_CAMERA_ROTATE "\xee\x83\x98" // U+e0d8
|
#define ICON_FA_CAMERA_ROTATE "\xee\x83\x98" // U+e0d8
|
||||||
#define ICON_FA_CAMPGROUND "\xef\x9a\xbb" // U+f6bb
|
#define ICON_FA_CAMPGROUND "\xef\x9a\xbb" // U+f6bb
|
||||||
|
#define ICON_FA_CANCER "\xee\xa1\x87" // U+e847
|
||||||
#define ICON_FA_CANDY_CANE "\xef\x9e\x86" // U+f786
|
#define ICON_FA_CANDY_CANE "\xef\x9e\x86" // U+f786
|
||||||
#define ICON_FA_CANNABIS "\xef\x95\x9f" // U+f55f
|
#define ICON_FA_CANNABIS "\xef\x95\x9f" // U+f55f
|
||||||
|
#define ICON_FA_CAPRICORN "\xee\xa1\x88" // U+e848
|
||||||
#define ICON_FA_CAPSULES "\xef\x91\xab" // U+f46b
|
#define ICON_FA_CAPSULES "\xef\x91\xab" // U+f46b
|
||||||
#define ICON_FA_CAR "\xef\x86\xb9" // U+f1b9
|
#define ICON_FA_CAR "\xef\x86\xb9" // U+f1b9
|
||||||
#define ICON_FA_CAR_BATTERY "\xef\x97\x9f" // U+f5df
|
#define ICON_FA_CAR_BATTERY "\xef\x97\x9f" // U+f5df
|
||||||
@@ -266,6 +276,7 @@
|
|||||||
#define ICON_FA_CHART_AREA "\xef\x87\xbe" // U+f1fe
|
#define ICON_FA_CHART_AREA "\xef\x87\xbe" // U+f1fe
|
||||||
#define ICON_FA_CHART_BAR "\xef\x82\x80" // U+f080
|
#define ICON_FA_CHART_BAR "\xef\x82\x80" // U+f080
|
||||||
#define ICON_FA_CHART_COLUMN "\xee\x83\xa3" // U+e0e3
|
#define ICON_FA_CHART_COLUMN "\xee\x83\xa3" // U+e0e3
|
||||||
|
#define ICON_FA_CHART_DIAGRAM "\xee\x9a\x95" // U+e695
|
||||||
#define ICON_FA_CHART_GANTT "\xee\x83\xa4" // U+e0e4
|
#define ICON_FA_CHART_GANTT "\xee\x83\xa4" // U+e0e4
|
||||||
#define ICON_FA_CHART_LINE "\xef\x88\x81" // U+f201
|
#define ICON_FA_CHART_LINE "\xef\x88\x81" // U+f201
|
||||||
#define ICON_FA_CHART_PIE "\xef\x88\x80" // U+f200
|
#define ICON_FA_CHART_PIE "\xef\x88\x80" // U+f200
|
||||||
@@ -287,9 +298,9 @@
|
|||||||
#define ICON_FA_CHEVRON_RIGHT "\xef\x81\x94" // U+f054
|
#define ICON_FA_CHEVRON_RIGHT "\xef\x81\x94" // U+f054
|
||||||
#define ICON_FA_CHEVRON_UP "\xef\x81\xb7" // U+f077
|
#define ICON_FA_CHEVRON_UP "\xef\x81\xb7" // U+f077
|
||||||
#define ICON_FA_CHILD "\xef\x86\xae" // U+f1ae
|
#define ICON_FA_CHILD "\xef\x86\xae" // U+f1ae
|
||||||
|
#define ICON_FA_CHILD_COMBATANT "\xee\x93\xa0" // U+e4e0
|
||||||
#define ICON_FA_CHILD_DRESS "\xee\x96\x9c" // U+e59c
|
#define ICON_FA_CHILD_DRESS "\xee\x96\x9c" // U+e59c
|
||||||
#define ICON_FA_CHILD_REACHING "\xee\x96\x9d" // U+e59d
|
#define ICON_FA_CHILD_REACHING "\xee\x96\x9d" // U+e59d
|
||||||
#define ICON_FA_CHILD_RIFLE "\xee\x93\xa0" // U+e4e0
|
|
||||||
#define ICON_FA_CHILDREN "\xee\x93\xa1" // U+e4e1
|
#define ICON_FA_CHILDREN "\xee\x93\xa1" // U+e4e1
|
||||||
#define ICON_FA_CHURCH "\xef\x94\x9d" // U+f51d
|
#define ICON_FA_CHURCH "\xef\x94\x9d" // U+f51d
|
||||||
#define ICON_FA_CIRCLE "\xef\x84\x91" // U+f111
|
#define ICON_FA_CIRCLE "\xef\x84\x91" // U+f111
|
||||||
@@ -334,6 +345,7 @@
|
|||||||
#define ICON_FA_CLOCK_ROTATE_LEFT "\xef\x87\x9a" // U+f1da
|
#define ICON_FA_CLOCK_ROTATE_LEFT "\xef\x87\x9a" // U+f1da
|
||||||
#define ICON_FA_CLONE "\xef\x89\x8d" // U+f24d
|
#define ICON_FA_CLONE "\xef\x89\x8d" // U+f24d
|
||||||
#define ICON_FA_CLOSED_CAPTIONING "\xef\x88\x8a" // U+f20a
|
#define ICON_FA_CLOSED_CAPTIONING "\xef\x88\x8a" // U+f20a
|
||||||
|
#define ICON_FA_CLOSED_CAPTIONING_SLASH "\xee\x84\xb5" // U+e135
|
||||||
#define ICON_FA_CLOUD "\xef\x83\x82" // U+f0c2
|
#define ICON_FA_CLOUD "\xef\x83\x82" // U+f0c2
|
||||||
#define ICON_FA_CLOUD_ARROW_DOWN "\xef\x83\xad" // U+f0ed
|
#define ICON_FA_CLOUD_ARROW_DOWN "\xef\x83\xad" // U+f0ed
|
||||||
#define ICON_FA_CLOUD_ARROW_UP "\xef\x83\xae" // U+f0ee
|
#define ICON_FA_CLOUD_ARROW_UP "\xef\x83\xae" // U+f0ee
|
||||||
@@ -360,6 +372,7 @@
|
|||||||
#define ICON_FA_COMMENT_DOLLAR "\xef\x99\x91" // U+f651
|
#define ICON_FA_COMMENT_DOLLAR "\xef\x99\x91" // U+f651
|
||||||
#define ICON_FA_COMMENT_DOTS "\xef\x92\xad" // U+f4ad
|
#define ICON_FA_COMMENT_DOTS "\xef\x92\xad" // U+f4ad
|
||||||
#define ICON_FA_COMMENT_MEDICAL "\xef\x9f\xb5" // U+f7f5
|
#define ICON_FA_COMMENT_MEDICAL "\xef\x9f\xb5" // U+f7f5
|
||||||
|
#define ICON_FA_COMMENT_NODES "\xee\x9a\x96" // U+e696
|
||||||
#define ICON_FA_COMMENT_SLASH "\xef\x92\xb3" // U+f4b3
|
#define ICON_FA_COMMENT_SLASH "\xef\x92\xb3" // U+f4b3
|
||||||
#define ICON_FA_COMMENT_SMS "\xef\x9f\x8d" // U+f7cd
|
#define ICON_FA_COMMENT_SMS "\xef\x9f\x8d" // U+f7cd
|
||||||
#define ICON_FA_COMMENTS "\xef\x82\x86" // U+f086
|
#define ICON_FA_COMMENTS "\xef\x82\x86" // U+f086
|
||||||
@@ -522,6 +535,8 @@
|
|||||||
#define ICON_FA_FILE_CSV "\xef\x9b\x9d" // U+f6dd
|
#define ICON_FA_FILE_CSV "\xef\x9b\x9d" // U+f6dd
|
||||||
#define ICON_FA_FILE_EXCEL "\xef\x87\x83" // U+f1c3
|
#define ICON_FA_FILE_EXCEL "\xef\x87\x83" // U+f1c3
|
||||||
#define ICON_FA_FILE_EXPORT "\xef\x95\xae" // U+f56e
|
#define ICON_FA_FILE_EXPORT "\xef\x95\xae" // U+f56e
|
||||||
|
#define ICON_FA_FILE_FRAGMENT "\xee\x9a\x97" // U+e697
|
||||||
|
#define ICON_FA_FILE_HALF_DASHED "\xee\x9a\x98" // U+e698
|
||||||
#define ICON_FA_FILE_IMAGE "\xef\x87\x85" // U+f1c5
|
#define ICON_FA_FILE_IMAGE "\xef\x87\x85" // U+f1c5
|
||||||
#define ICON_FA_FILE_IMPORT "\xef\x95\xaf" // U+f56f
|
#define ICON_FA_FILE_IMPORT "\xef\x95\xaf" // U+f56f
|
||||||
#define ICON_FA_FILE_INVOICE "\xef\x95\xb0" // U+f570
|
#define ICON_FA_FILE_INVOICE "\xef\x95\xb0" // U+f570
|
||||||
@@ -585,6 +600,7 @@
|
|||||||
#define ICON_FA_GEAR "\xef\x80\x93" // U+f013
|
#define ICON_FA_GEAR "\xef\x80\x93" // U+f013
|
||||||
#define ICON_FA_GEARS "\xef\x82\x85" // U+f085
|
#define ICON_FA_GEARS "\xef\x82\x85" // U+f085
|
||||||
#define ICON_FA_GEM "\xef\x8e\xa5" // U+f3a5
|
#define ICON_FA_GEM "\xef\x8e\xa5" // U+f3a5
|
||||||
|
#define ICON_FA_GEMINI "\xee\xa1\x89" // U+e849
|
||||||
#define ICON_FA_GENDERLESS "\xef\x88\xad" // U+f22d
|
#define ICON_FA_GENDERLESS "\xef\x88\xad" // U+f22d
|
||||||
#define ICON_FA_GHOST "\xef\x9b\xa2" // U+f6e2
|
#define ICON_FA_GHOST "\xef\x9b\xa2" // U+f6e2
|
||||||
#define ICON_FA_GIFT "\xef\x81\xab" // U+f06b
|
#define ICON_FA_GIFT "\xef\x81\xab" // U+f06b
|
||||||
@@ -642,8 +658,6 @@
|
|||||||
#define ICON_FA_HANDS_PRAYING "\xef\x9a\x84" // U+f684
|
#define ICON_FA_HANDS_PRAYING "\xef\x9a\x84" // U+f684
|
||||||
#define ICON_FA_HANDSHAKE "\xef\x8a\xb5" // U+f2b5
|
#define ICON_FA_HANDSHAKE "\xef\x8a\xb5" // U+f2b5
|
||||||
#define ICON_FA_HANDSHAKE_ANGLE "\xef\x93\x84" // U+f4c4
|
#define ICON_FA_HANDSHAKE_ANGLE "\xef\x93\x84" // U+f4c4
|
||||||
#define ICON_FA_HANDSHAKE_SIMPLE "\xef\x93\x86" // U+f4c6
|
|
||||||
#define ICON_FA_HANDSHAKE_SIMPLE_SLASH "\xee\x81\x9f" // U+e05f
|
|
||||||
#define ICON_FA_HANDSHAKE_SLASH "\xee\x81\xa0" // U+e060
|
#define ICON_FA_HANDSHAKE_SLASH "\xee\x81\xa0" // U+e060
|
||||||
#define ICON_FA_HANUKIAH "\xef\x9b\xa6" // U+f6e6
|
#define ICON_FA_HANUKIAH "\xef\x9b\xa6" // U+f6e6
|
||||||
#define ICON_FA_HARD_DRIVE "\xef\x82\xa0" // U+f0a0
|
#define ICON_FA_HARD_DRIVE "\xef\x82\xa0" // U+f0a0
|
||||||
@@ -657,7 +671,6 @@
|
|||||||
#define ICON_FA_HEAD_SIDE_VIRUS "\xee\x81\xa4" // U+e064
|
#define ICON_FA_HEAD_SIDE_VIRUS "\xee\x81\xa4" // U+e064
|
||||||
#define ICON_FA_HEADING "\xef\x87\x9c" // U+f1dc
|
#define ICON_FA_HEADING "\xef\x87\x9c" // U+f1dc
|
||||||
#define ICON_FA_HEADPHONES "\xef\x80\xa5" // U+f025
|
#define ICON_FA_HEADPHONES "\xef\x80\xa5" // U+f025
|
||||||
#define ICON_FA_HEADPHONES_SIMPLE "\xef\x96\x8f" // U+f58f
|
|
||||||
#define ICON_FA_HEADSET "\xef\x96\x90" // U+f590
|
#define ICON_FA_HEADSET "\xef\x96\x90" // U+f590
|
||||||
#define ICON_FA_HEART "\xef\x80\x84" // U+f004
|
#define ICON_FA_HEART "\xef\x80\x84" // U+f004
|
||||||
#define ICON_FA_HEART_CIRCLE_BOLT "\xee\x93\xbc" // U+e4fc
|
#define ICON_FA_HEART_CIRCLE_BOLT "\xee\x93\xbc" // U+e4fc
|
||||||
@@ -672,6 +685,9 @@
|
|||||||
#define ICON_FA_HELICOPTER_SYMBOL "\xee\x94\x82" // U+e502
|
#define ICON_FA_HELICOPTER_SYMBOL "\xee\x94\x82" // U+e502
|
||||||
#define ICON_FA_HELMET_SAFETY "\xef\xa0\x87" // U+f807
|
#define ICON_FA_HELMET_SAFETY "\xef\xa0\x87" // U+f807
|
||||||
#define ICON_FA_HELMET_UN "\xee\x94\x83" // U+e503
|
#define ICON_FA_HELMET_UN "\xee\x94\x83" // U+e503
|
||||||
|
#define ICON_FA_HEXAGON "\xef\x8c\x92" // U+f312
|
||||||
|
#define ICON_FA_HEXAGON_NODES "\xee\x9a\x99" // U+e699
|
||||||
|
#define ICON_FA_HEXAGON_NODES_BOLT "\xee\x9a\x9a" // U+e69a
|
||||||
#define ICON_FA_HIGHLIGHTER "\xef\x96\x91" // U+f591
|
#define ICON_FA_HIGHLIGHTER "\xef\x96\x91" // U+f591
|
||||||
#define ICON_FA_HILL_AVALANCHE "\xee\x94\x87" // U+e507
|
#define ICON_FA_HILL_AVALANCHE "\xee\x94\x87" // U+e507
|
||||||
#define ICON_FA_HILL_ROCKSLIDE "\xee\x94\x88" // U+e508
|
#define ICON_FA_HILL_ROCKSLIDE "\xee\x94\x88" // U+e508
|
||||||
@@ -767,8 +783,10 @@
|
|||||||
#define ICON_FA_LEFT_LONG "\xef\x8c\x8a" // U+f30a
|
#define ICON_FA_LEFT_LONG "\xef\x8c\x8a" // U+f30a
|
||||||
#define ICON_FA_LEFT_RIGHT "\xef\x8c\xb7" // U+f337
|
#define ICON_FA_LEFT_RIGHT "\xef\x8c\xb7" // U+f337
|
||||||
#define ICON_FA_LEMON "\xef\x82\x94" // U+f094
|
#define ICON_FA_LEMON "\xef\x82\x94" // U+f094
|
||||||
|
#define ICON_FA_LEO "\xee\xa1\x8a" // U+e84a
|
||||||
#define ICON_FA_LESS_THAN "<" // U+003c
|
#define ICON_FA_LESS_THAN "<" // U+003c
|
||||||
#define ICON_FA_LESS_THAN_EQUAL "\xef\x94\xb7" // U+f537
|
#define ICON_FA_LESS_THAN_EQUAL "\xef\x94\xb7" // U+f537
|
||||||
|
#define ICON_FA_LIBRA "\xee\xa1\x8b" // U+e84b
|
||||||
#define ICON_FA_LIFE_RING "\xef\x87\x8d" // U+f1cd
|
#define ICON_FA_LIFE_RING "\xef\x87\x8d" // U+f1cd
|
||||||
#define ICON_FA_LIGHTBULB "\xef\x83\xab" // U+f0eb
|
#define ICON_FA_LIGHTBULB "\xef\x83\xab" // U+f0eb
|
||||||
#define ICON_FA_LINES_LEANING "\xee\x94\x9e" // U+e51e
|
#define ICON_FA_LINES_LEANING "\xee\x94\x9e" // U+e51e
|
||||||
@@ -842,6 +860,7 @@
|
|||||||
#define ICON_FA_MOBILE_RETRO "\xee\x94\xa7" // U+e527
|
#define ICON_FA_MOBILE_RETRO "\xee\x94\xa7" // U+e527
|
||||||
#define ICON_FA_MOBILE_SCREEN "\xef\x8f\x8f" // U+f3cf
|
#define ICON_FA_MOBILE_SCREEN "\xef\x8f\x8f" // U+f3cf
|
||||||
#define ICON_FA_MOBILE_SCREEN_BUTTON "\xef\x8f\x8d" // U+f3cd
|
#define ICON_FA_MOBILE_SCREEN_BUTTON "\xef\x8f\x8d" // U+f3cd
|
||||||
|
#define ICON_FA_MOBILE_VIBRATE "\xee\xa0\x96" // U+e816
|
||||||
#define ICON_FA_MONEY_BILL "\xef\x83\x96" // U+f0d6
|
#define ICON_FA_MONEY_BILL "\xef\x83\x96" // U+f0d6
|
||||||
#define ICON_FA_MONEY_BILL_1 "\xef\x8f\x91" // U+f3d1
|
#define ICON_FA_MONEY_BILL_1 "\xef\x8f\x91" // U+f3d1
|
||||||
#define ICON_FA_MONEY_BILL_1_WAVE "\xef\x94\xbb" // U+f53b
|
#define ICON_FA_MONEY_BILL_1_WAVE "\xef\x94\xbb" // U+f53b
|
||||||
@@ -871,6 +890,7 @@
|
|||||||
#define ICON_FA_NETWORK_WIRED "\xef\x9b\xbf" // U+f6ff
|
#define ICON_FA_NETWORK_WIRED "\xef\x9b\xbf" // U+f6ff
|
||||||
#define ICON_FA_NEUTER "\xef\x88\xac" // U+f22c
|
#define ICON_FA_NEUTER "\xef\x88\xac" // U+f22c
|
||||||
#define ICON_FA_NEWSPAPER "\xef\x87\xaa" // U+f1ea
|
#define ICON_FA_NEWSPAPER "\xef\x87\xaa" // U+f1ea
|
||||||
|
#define ICON_FA_NON_BINARY "\xee\xa0\x87" // U+e807
|
||||||
#define ICON_FA_NOT_EQUAL "\xef\x94\xbe" // U+f53e
|
#define ICON_FA_NOT_EQUAL "\xef\x94\xbe" // U+f53e
|
||||||
#define ICON_FA_NOTDEF "\xee\x87\xbe" // U+e1fe
|
#define ICON_FA_NOTDEF "\xee\x87\xbe" // U+e1fe
|
||||||
#define ICON_FA_NOTE_STICKY "\xef\x89\x89" // U+f249
|
#define ICON_FA_NOTE_STICKY "\xef\x89\x89" // U+f249
|
||||||
@@ -878,6 +898,7 @@
|
|||||||
#define ICON_FA_O "O" // U+004f
|
#define ICON_FA_O "O" // U+004f
|
||||||
#define ICON_FA_OBJECT_GROUP "\xef\x89\x87" // U+f247
|
#define ICON_FA_OBJECT_GROUP "\xef\x89\x87" // U+f247
|
||||||
#define ICON_FA_OBJECT_UNGROUP "\xef\x89\x88" // U+f248
|
#define ICON_FA_OBJECT_UNGROUP "\xef\x89\x88" // U+f248
|
||||||
|
#define ICON_FA_OCTAGON "\xef\x8c\x86" // U+f306
|
||||||
#define ICON_FA_OIL_CAN "\xef\x98\x93" // U+f613
|
#define ICON_FA_OIL_CAN "\xef\x98\x93" // U+f613
|
||||||
#define ICON_FA_OIL_WELL "\xee\x94\xb2" // U+e532
|
#define ICON_FA_OIL_WELL "\xee\x94\xb2" // U+e532
|
||||||
#define ICON_FA_OM "\xef\x99\xb9" // U+f679
|
#define ICON_FA_OM "\xef\x99\xb9" // U+f679
|
||||||
@@ -906,6 +927,7 @@
|
|||||||
#define ICON_FA_PEN_RULER "\xef\x96\xae" // U+f5ae
|
#define ICON_FA_PEN_RULER "\xef\x96\xae" // U+f5ae
|
||||||
#define ICON_FA_PEN_TO_SQUARE "\xef\x81\x84" // U+f044
|
#define ICON_FA_PEN_TO_SQUARE "\xef\x81\x84" // U+f044
|
||||||
#define ICON_FA_PENCIL "\xef\x8c\x83" // U+f303
|
#define ICON_FA_PENCIL "\xef\x8c\x83" // U+f303
|
||||||
|
#define ICON_FA_PENTAGON "\xee\x9e\x90" // U+e790
|
||||||
#define ICON_FA_PEOPLE_ARROWS "\xee\x81\xa8" // U+e068
|
#define ICON_FA_PEOPLE_ARROWS "\xee\x81\xa8" // U+e068
|
||||||
#define ICON_FA_PEOPLE_CARRY_BOX "\xef\x93\x8e" // U+f4ce
|
#define ICON_FA_PEOPLE_CARRY_BOX "\xef\x93\x8e" // U+f4ce
|
||||||
#define ICON_FA_PEOPLE_GROUP "\xee\x94\xb3" // U+e533
|
#define ICON_FA_PEOPLE_GROUP "\xee\x94\xb3" // U+e533
|
||||||
@@ -968,8 +990,10 @@
|
|||||||
#define ICON_FA_PHONE_SLASH "\xef\x8f\x9d" // U+f3dd
|
#define ICON_FA_PHONE_SLASH "\xef\x8f\x9d" // U+f3dd
|
||||||
#define ICON_FA_PHONE_VOLUME "\xef\x8a\xa0" // U+f2a0
|
#define ICON_FA_PHONE_VOLUME "\xef\x8a\xa0" // U+f2a0
|
||||||
#define ICON_FA_PHOTO_FILM "\xef\xa1\xbc" // U+f87c
|
#define ICON_FA_PHOTO_FILM "\xef\xa1\xbc" // U+f87c
|
||||||
|
#define ICON_FA_PICTURE_IN_PICTURE "\xee\xa0\x8b" // U+e80b
|
||||||
#define ICON_FA_PIGGY_BANK "\xef\x93\x93" // U+f4d3
|
#define ICON_FA_PIGGY_BANK "\xef\x93\x93" // U+f4d3
|
||||||
#define ICON_FA_PILLS "\xef\x92\x84" // U+f484
|
#define ICON_FA_PILLS "\xef\x92\x84" // U+f484
|
||||||
|
#define ICON_FA_PISCES "\xee\xa1\x8c" // U+e84c
|
||||||
#define ICON_FA_PIZZA_SLICE "\xef\xa0\x98" // U+f818
|
#define ICON_FA_PIZZA_SLICE "\xef\xa0\x98" // U+f818
|
||||||
#define ICON_FA_PLACE_OF_WORSHIP "\xef\x99\xbf" // U+f67f
|
#define ICON_FA_PLACE_OF_WORSHIP "\xef\x99\xbf" // U+f67f
|
||||||
#define ICON_FA_PLANE "\xef\x81\xb2" // U+f072
|
#define ICON_FA_PLANE "\xef\x81\xb2" // U+f072
|
||||||
@@ -1060,6 +1084,7 @@
|
|||||||
#define ICON_FA_S "S" // U+0053
|
#define ICON_FA_S "S" // U+0053
|
||||||
#define ICON_FA_SACK_DOLLAR "\xef\xa0\x9d" // U+f81d
|
#define ICON_FA_SACK_DOLLAR "\xef\xa0\x9d" // U+f81d
|
||||||
#define ICON_FA_SACK_XMARK "\xee\x95\xaa" // U+e56a
|
#define ICON_FA_SACK_XMARK "\xee\x95\xaa" // U+e56a
|
||||||
|
#define ICON_FA_SAGITTARIUS "\xee\xa1\x8d" // U+e84d
|
||||||
#define ICON_FA_SAILBOAT "\xee\x91\x85" // U+e445
|
#define ICON_FA_SAILBOAT "\xee\x91\x85" // U+e445
|
||||||
#define ICON_FA_SATELLITE "\xef\x9e\xbf" // U+f7bf
|
#define ICON_FA_SATELLITE "\xef\x9e\xbf" // U+f7bf
|
||||||
#define ICON_FA_SATELLITE_DISH "\xef\x9f\x80" // U+f7c0
|
#define ICON_FA_SATELLITE_DISH "\xef\x9f\x80" // U+f7c0
|
||||||
@@ -1073,6 +1098,7 @@
|
|||||||
#define ICON_FA_SCHOOL_FLAG "\xee\x95\xae" // U+e56e
|
#define ICON_FA_SCHOOL_FLAG "\xee\x95\xae" // U+e56e
|
||||||
#define ICON_FA_SCHOOL_LOCK "\xee\x95\xaf" // U+e56f
|
#define ICON_FA_SCHOOL_LOCK "\xee\x95\xaf" // U+e56f
|
||||||
#define ICON_FA_SCISSORS "\xef\x83\x84" // U+f0c4
|
#define ICON_FA_SCISSORS "\xef\x83\x84" // U+f0c4
|
||||||
|
#define ICON_FA_SCORPIO "\xee\xa1\x8e" // U+e84e
|
||||||
#define ICON_FA_SCREWDRIVER "\xef\x95\x8a" // U+f54a
|
#define ICON_FA_SCREWDRIVER "\xef\x95\x8a" // U+f54a
|
||||||
#define ICON_FA_SCREWDRIVER_WRENCH "\xef\x9f\x99" // U+f7d9
|
#define ICON_FA_SCREWDRIVER_WRENCH "\xef\x9f\x99" // U+f7d9
|
||||||
#define ICON_FA_SCROLL "\xef\x9c\x8e" // U+f70e
|
#define ICON_FA_SCROLL "\xef\x9c\x8e" // U+f70e
|
||||||
@@ -1080,6 +1106,7 @@
|
|||||||
#define ICON_FA_SD_CARD "\xef\x9f\x82" // U+f7c2
|
#define ICON_FA_SD_CARD "\xef\x9f\x82" // U+f7c2
|
||||||
#define ICON_FA_SECTION "\xee\x91\x87" // U+e447
|
#define ICON_FA_SECTION "\xee\x91\x87" // U+e447
|
||||||
#define ICON_FA_SEEDLING "\xef\x93\x98" // U+f4d8
|
#define ICON_FA_SEEDLING "\xef\x93\x98" // U+f4d8
|
||||||
|
#define ICON_FA_SEPTAGON "\xee\xa0\xa0" // U+e820
|
||||||
#define ICON_FA_SERVER "\xef\x88\xb3" // U+f233
|
#define ICON_FA_SERVER "\xef\x88\xb3" // U+f233
|
||||||
#define ICON_FA_SHAPES "\xef\x98\x9f" // U+f61f
|
#define ICON_FA_SHAPES "\xef\x98\x9f" // U+f61f
|
||||||
#define ICON_FA_SHARE "\xef\x81\xa4" // U+f064
|
#define ICON_FA_SHARE "\xef\x81\xa4" // U+f064
|
||||||
@@ -1108,6 +1135,8 @@
|
|||||||
#define ICON_FA_SIGNATURE "\xef\x96\xb7" // U+f5b7
|
#define ICON_FA_SIGNATURE "\xef\x96\xb7" // U+f5b7
|
||||||
#define ICON_FA_SIGNS_POST "\xef\x89\xb7" // U+f277
|
#define ICON_FA_SIGNS_POST "\xef\x89\xb7" // U+f277
|
||||||
#define ICON_FA_SIM_CARD "\xef\x9f\x84" // U+f7c4
|
#define ICON_FA_SIM_CARD "\xef\x9f\x84" // U+f7c4
|
||||||
|
#define ICON_FA_SINGLE_QUOTE_LEFT "\xee\xa0\x9b" // U+e81b
|
||||||
|
#define ICON_FA_SINGLE_QUOTE_RIGHT "\xee\xa0\x9c" // U+e81c
|
||||||
#define ICON_FA_SINK "\xee\x81\xad" // U+e06d
|
#define ICON_FA_SINK "\xee\x81\xad" // U+e06d
|
||||||
#define ICON_FA_SITEMAP "\xef\x83\xa8" // U+f0e8
|
#define ICON_FA_SITEMAP "\xef\x83\xa8" // U+f0e8
|
||||||
#define ICON_FA_SKULL "\xef\x95\x8c" // U+f54c
|
#define ICON_FA_SKULL "\xef\x95\x8c" // U+f54c
|
||||||
@@ -1131,12 +1160,14 @@
|
|||||||
#define ICON_FA_SPELL_CHECK "\xef\xa2\x91" // U+f891
|
#define ICON_FA_SPELL_CHECK "\xef\xa2\x91" // U+f891
|
||||||
#define ICON_FA_SPIDER "\xef\x9c\x97" // U+f717
|
#define ICON_FA_SPIDER "\xef\x9c\x97" // U+f717
|
||||||
#define ICON_FA_SPINNER "\xef\x84\x90" // U+f110
|
#define ICON_FA_SPINNER "\xef\x84\x90" // U+f110
|
||||||
|
#define ICON_FA_SPIRAL "\xee\xa0\x8a" // U+e80a
|
||||||
#define ICON_FA_SPLOTCH "\xef\x96\xbc" // U+f5bc
|
#define ICON_FA_SPLOTCH "\xef\x96\xbc" // U+f5bc
|
||||||
#define ICON_FA_SPOON "\xef\x8b\xa5" // U+f2e5
|
#define ICON_FA_SPOON "\xef\x8b\xa5" // U+f2e5
|
||||||
#define ICON_FA_SPRAY_CAN "\xef\x96\xbd" // U+f5bd
|
#define ICON_FA_SPRAY_CAN "\xef\x96\xbd" // U+f5bd
|
||||||
#define ICON_FA_SPRAY_CAN_SPARKLES "\xef\x97\x90" // U+f5d0
|
#define ICON_FA_SPRAY_CAN_SPARKLES "\xef\x97\x90" // U+f5d0
|
||||||
#define ICON_FA_SQUARE "\xef\x83\x88" // U+f0c8
|
#define ICON_FA_SQUARE "\xef\x83\x88" // U+f0c8
|
||||||
#define ICON_FA_SQUARE_ARROW_UP_RIGHT "\xef\x85\x8c" // U+f14c
|
#define ICON_FA_SQUARE_ARROW_UP_RIGHT "\xef\x85\x8c" // U+f14c
|
||||||
|
#define ICON_FA_SQUARE_BINARY "\xee\x9a\x9b" // U+e69b
|
||||||
#define ICON_FA_SQUARE_CARET_DOWN "\xef\x85\x90" // U+f150
|
#define ICON_FA_SQUARE_CARET_DOWN "\xef\x85\x90" // U+f150
|
||||||
#define ICON_FA_SQUARE_CARET_LEFT "\xef\x86\x91" // U+f191
|
#define ICON_FA_SQUARE_CARET_LEFT "\xef\x86\x91" // U+f191
|
||||||
#define ICON_FA_SQUARE_CARET_RIGHT "\xef\x85\x92" // U+f152
|
#define ICON_FA_SQUARE_CARET_RIGHT "\xef\x85\x92" // U+f152
|
||||||
@@ -1194,7 +1225,10 @@
|
|||||||
#define ICON_FA_T "T" // U+0054
|
#define ICON_FA_T "T" // U+0054
|
||||||
#define ICON_FA_TABLE "\xef\x83\x8e" // U+f0ce
|
#define ICON_FA_TABLE "\xef\x83\x8e" // U+f0ce
|
||||||
#define ICON_FA_TABLE_CELLS "\xef\x80\x8a" // U+f00a
|
#define ICON_FA_TABLE_CELLS "\xef\x80\x8a" // U+f00a
|
||||||
|
#define ICON_FA_TABLE_CELLS_COLUMN_LOCK "\xee\x99\xb8" // U+e678
|
||||||
#define ICON_FA_TABLE_CELLS_LARGE "\xef\x80\x89" // U+f009
|
#define ICON_FA_TABLE_CELLS_LARGE "\xef\x80\x89" // U+f009
|
||||||
|
#define ICON_FA_TABLE_CELLS_ROW_LOCK "\xee\x99\xba" // U+e67a
|
||||||
|
#define ICON_FA_TABLE_CELLS_ROW_UNLOCK "\xee\x9a\x91" // U+e691
|
||||||
#define ICON_FA_TABLE_COLUMNS "\xef\x83\x9b" // U+f0db
|
#define ICON_FA_TABLE_COLUMNS "\xef\x83\x9b" // U+f0db
|
||||||
#define ICON_FA_TABLE_LIST "\xef\x80\x8b" // U+f00b
|
#define ICON_FA_TABLE_LIST "\xef\x80\x8b" // U+f00b
|
||||||
#define ICON_FA_TABLE_TENNIS_PADDLE_BALL "\xef\x91\x9d" // U+f45d
|
#define ICON_FA_TABLE_TENNIS_PADDLE_BALL "\xef\x91\x9d" // U+f45d
|
||||||
@@ -1208,6 +1242,7 @@
|
|||||||
#define ICON_FA_TAPE "\xef\x93\x9b" // U+f4db
|
#define ICON_FA_TAPE "\xef\x93\x9b" // U+f4db
|
||||||
#define ICON_FA_TARP "\xee\x95\xbb" // U+e57b
|
#define ICON_FA_TARP "\xee\x95\xbb" // U+e57b
|
||||||
#define ICON_FA_TARP_DROPLET "\xee\x95\xbc" // U+e57c
|
#define ICON_FA_TARP_DROPLET "\xee\x95\xbc" // U+e57c
|
||||||
|
#define ICON_FA_TAURUS "\xee\xa1\x8f" // U+e84f
|
||||||
#define ICON_FA_TAXI "\xef\x86\xba" // U+f1ba
|
#define ICON_FA_TAXI "\xef\x86\xba" // U+f1ba
|
||||||
#define ICON_FA_TEETH "\xef\x98\xae" // U+f62e
|
#define ICON_FA_TEETH "\xef\x98\xae" // U+f62e
|
||||||
#define ICON_FA_TEETH_OPEN "\xef\x98\xaf" // U+f62f
|
#define ICON_FA_TEETH_OPEN "\xef\x98\xaf" // U+f62f
|
||||||
@@ -1235,6 +1270,7 @@
|
|||||||
#define ICON_FA_THUMBS_DOWN "\xef\x85\xa5" // U+f165
|
#define ICON_FA_THUMBS_DOWN "\xef\x85\xa5" // U+f165
|
||||||
#define ICON_FA_THUMBS_UP "\xef\x85\xa4" // U+f164
|
#define ICON_FA_THUMBS_UP "\xef\x85\xa4" // U+f164
|
||||||
#define ICON_FA_THUMBTACK "\xef\x82\x8d" // U+f08d
|
#define ICON_FA_THUMBTACK "\xef\x82\x8d" // U+f08d
|
||||||
|
#define ICON_FA_THUMBTACK_SLASH "\xee\x9a\x8f" // U+e68f
|
||||||
#define ICON_FA_TICKET "\xef\x85\x85" // U+f145
|
#define ICON_FA_TICKET "\xef\x85\x85" // U+f145
|
||||||
#define ICON_FA_TICKET_SIMPLE "\xef\x8f\xbf" // U+f3ff
|
#define ICON_FA_TICKET_SIMPLE "\xef\x8f\xbf" // U+f3ff
|
||||||
#define ICON_FA_TIMELINE "\xee\x8a\x9c" // U+e29c
|
#define ICON_FA_TIMELINE "\xee\x8a\x9c" // U+e29c
|
||||||
@@ -1310,8 +1346,6 @@
|
|||||||
#define ICON_FA_USER_GRADUATE "\xef\x94\x81" // U+f501
|
#define ICON_FA_USER_GRADUATE "\xef\x94\x81" // U+f501
|
||||||
#define ICON_FA_USER_GROUP "\xef\x94\x80" // U+f500
|
#define ICON_FA_USER_GROUP "\xef\x94\x80" // U+f500
|
||||||
#define ICON_FA_USER_INJURED "\xef\x9c\xa8" // U+f728
|
#define ICON_FA_USER_INJURED "\xef\x9c\xa8" // U+f728
|
||||||
#define ICON_FA_USER_LARGE "\xef\x90\x86" // U+f406
|
|
||||||
#define ICON_FA_USER_LARGE_SLASH "\xef\x93\xba" // U+f4fa
|
|
||||||
#define ICON_FA_USER_LOCK "\xef\x94\x82" // U+f502
|
#define ICON_FA_USER_LOCK "\xef\x94\x82" // U+f502
|
||||||
#define ICON_FA_USER_MINUS "\xef\x94\x83" // U+f503
|
#define ICON_FA_USER_MINUS "\xef\x94\x83" // U+f503
|
||||||
#define ICON_FA_USER_NINJA "\xef\x94\x84" // U+f504
|
#define ICON_FA_USER_NINJA "\xef\x94\x84" // U+f504
|
||||||
@@ -1336,7 +1370,6 @@
|
|||||||
#define ICON_FA_V "V" // U+0056
|
#define ICON_FA_V "V" // U+0056
|
||||||
#define ICON_FA_VAN_SHUTTLE "\xef\x96\xb6" // U+f5b6
|
#define ICON_FA_VAN_SHUTTLE "\xef\x96\xb6" // U+f5b6
|
||||||
#define ICON_FA_VAULT "\xee\x8b\x85" // U+e2c5
|
#define ICON_FA_VAULT "\xee\x8b\x85" // U+e2c5
|
||||||
#define ICON_FA_VECTOR_SQUARE "\xef\x97\x8b" // U+f5cb
|
|
||||||
#define ICON_FA_VENUS "\xef\x88\xa1" // U+f221
|
#define ICON_FA_VENUS "\xef\x88\xa1" // U+f221
|
||||||
#define ICON_FA_VENUS_DOUBLE "\xef\x88\xa6" // U+f226
|
#define ICON_FA_VENUS_DOUBLE "\xef\x88\xa6" // U+f226
|
||||||
#define ICON_FA_VENUS_MARS "\xef\x88\xa8" // U+f228
|
#define ICON_FA_VENUS_MARS "\xef\x88\xa8" // U+f228
|
||||||
@@ -1349,6 +1382,7 @@
|
|||||||
#define ICON_FA_VIDEO "\xef\x80\xbd" // U+f03d
|
#define ICON_FA_VIDEO "\xef\x80\xbd" // U+f03d
|
||||||
#define ICON_FA_VIDEO_SLASH "\xef\x93\xa2" // U+f4e2
|
#define ICON_FA_VIDEO_SLASH "\xef\x93\xa2" // U+f4e2
|
||||||
#define ICON_FA_VIHARA "\xef\x9a\xa7" // U+f6a7
|
#define ICON_FA_VIHARA "\xef\x9a\xa7" // U+f6a7
|
||||||
|
#define ICON_FA_VIRGO "\xee\xa1\x90" // U+e850
|
||||||
#define ICON_FA_VIRUS "\xee\x81\xb4" // U+e074
|
#define ICON_FA_VIRUS "\xee\x81\xb4" // U+e074
|
||||||
#define ICON_FA_VIRUS_COVID "\xee\x92\xa8" // U+e4a8
|
#define ICON_FA_VIRUS_COVID "\xee\x92\xa8" // U+e4a8
|
||||||
#define ICON_FA_VIRUS_COVID_SLASH "\xee\x92\xa9" // U+e4a9
|
#define ICON_FA_VIRUS_COVID_SLASH "\xee\x92\xa9" // U+e4a9
|
||||||
@@ -1357,6 +1391,7 @@
|
|||||||
#define ICON_FA_VOICEMAIL "\xef\xa2\x97" // U+f897
|
#define ICON_FA_VOICEMAIL "\xef\xa2\x97" // U+f897
|
||||||
#define ICON_FA_VOLCANO "\xef\x9d\xb0" // U+f770
|
#define ICON_FA_VOLCANO "\xef\x9d\xb0" // U+f770
|
||||||
#define ICON_FA_VOLLEYBALL "\xef\x91\x9f" // U+f45f
|
#define ICON_FA_VOLLEYBALL "\xef\x91\x9f" // U+f45f
|
||||||
|
#define ICON_FA_VOLUME "\xef\x9a\xa8" // U+f6a8
|
||||||
#define ICON_FA_VOLUME_HIGH "\xef\x80\xa8" // U+f028
|
#define ICON_FA_VOLUME_HIGH "\xef\x80\xa8" // U+f028
|
||||||
#define ICON_FA_VOLUME_LOW "\xef\x80\xa7" // U+f027
|
#define ICON_FA_VOLUME_LOW "\xef\x80\xa7" // U+f027
|
||||||
#define ICON_FA_VOLUME_OFF "\xef\x80\xa6" // U+f026
|
#define ICON_FA_VOLUME_OFF "\xef\x80\xa6" // U+f026
|
||||||
@@ -1372,6 +1407,7 @@
|
|||||||
#define ICON_FA_WATER "\xef\x9d\xb3" // U+f773
|
#define ICON_FA_WATER "\xef\x9d\xb3" // U+f773
|
||||||
#define ICON_FA_WATER_LADDER "\xef\x97\x85" // U+f5c5
|
#define ICON_FA_WATER_LADDER "\xef\x97\x85" // U+f5c5
|
||||||
#define ICON_FA_WAVE_SQUARE "\xef\xa0\xbe" // U+f83e
|
#define ICON_FA_WAVE_SQUARE "\xef\xa0\xbe" // U+f83e
|
||||||
|
#define ICON_FA_WEB_AWESOME "\xee\x9a\x82" // U+e682
|
||||||
#define ICON_FA_WEIGHT_HANGING "\xef\x97\x8d" // U+f5cd
|
#define ICON_FA_WEIGHT_HANGING "\xef\x97\x8d" // U+f5cd
|
||||||
#define ICON_FA_WEIGHT_SCALE "\xef\x92\x96" // U+f496
|
#define ICON_FA_WEIGHT_SCALE "\xef\x92\x96" // U+f496
|
||||||
#define ICON_FA_WHEAT_AWN "\xee\x8b\x8d" // U+e2cd
|
#define ICON_FA_WHEAT_AWN "\xee\x8b\x8d" // U+e2cd
|
||||||
@@ -1,52 +1,60 @@
|
|||||||
#include "IconsFontAwesome6.h"
|
|
||||||
#include "TracyAchievements.hpp"
|
#include "TracyAchievements.hpp"
|
||||||
#include "TracyImGui.hpp"
|
#include "TracyEmbed.hpp"
|
||||||
#include "TracySourceContents.hpp"
|
|
||||||
#include "TracyWeb.hpp"
|
#include "data/Text100Million.hpp"
|
||||||
#include "../Fonts.hpp"
|
#include "data/TextConnectToClient.hpp"
|
||||||
|
#include "data/TextFindZone.hpp"
|
||||||
|
#include "data/TextFrameImages.hpp"
|
||||||
|
#include "data/TextGlobalSettings.hpp"
|
||||||
|
#include "data/TextInstrumentFrames.hpp"
|
||||||
|
#include "data/TextInstrumentationIntro.hpp"
|
||||||
|
#include "data/TextInstrumentationStatistics.hpp"
|
||||||
|
#include "data/TextIntro.hpp"
|
||||||
|
#include "data/TextLoadTrace.hpp"
|
||||||
|
#include "data/TextSamplingIntro.hpp"
|
||||||
|
#include "data/TextSaveTrace.hpp"
|
||||||
|
|
||||||
namespace tracy::data
|
namespace tracy::data
|
||||||
{
|
{
|
||||||
|
|
||||||
AchievementItem ai_samplingIntro = { "samplingIntro", "Sampling program execution", [](){
|
static std::string UnpackImpl( size_t size, size_t lz4Size, const uint8_t* data )
|
||||||
ImGui::TextWrapped( "Sampling program execution is a great way to find out where the hot spots are in your program. It can be used to find out which functions take the most time, or which lines of code are executed the most often." );
|
{
|
||||||
ImGui::TextWrapped( "While instrumentation requires changes to your code, sampling does not. However, because of the way it works, the results are coarser and it's not possible to know when functions are called or when they return." );
|
std::string ret;
|
||||||
ImGui::TextWrapped( "Sampling is automatic on Linux. On Windows, you must run the profiled application as an administrator for it to work." );
|
const EmbedData unembed( size, lz4Size, data );
|
||||||
ImGui::PushFont( g_fonts.normal, FontSmall );
|
ret.assign( unembed.data(), unembed.size() );
|
||||||
ImGui::PushStyleColor( ImGuiCol_Text, GImGui->Style.Colors[ImGuiCol_TextDisabled] );
|
return ret;
|
||||||
ImGui::TextWrapped( "Depending on your system configuration, some additional steps may be required. Please refer to the user manual for more information." );
|
}
|
||||||
ImGui::PopStyleColor();
|
|
||||||
ImGui::PopFont();
|
#define Unpack( name ) UnpackImpl( Embed::name##Size, Embed::name##Lz4Size, Embed::name##Data )
|
||||||
} };
|
|
||||||
|
|
||||||
|
AchievementItem ai_samplingIntro = {
|
||||||
|
.id = "samplingIntro",
|
||||||
|
.name = "Sampling program execution",
|
||||||
|
.text = Unpack( TextSamplingIntro ),
|
||||||
|
};
|
||||||
|
|
||||||
AchievementItem* ac_samplingItems[] = { &ai_samplingIntro, nullptr };
|
AchievementItem* ac_samplingItems[] = { &ai_samplingIntro, nullptr };
|
||||||
AchievementCategory ac_sampling = { "sampling", "Sampling", ac_samplingItems };
|
AchievementCategory ac_sampling = { "sampling", "Sampling", ac_samplingItems };
|
||||||
|
|
||||||
|
|
||||||
AchievementItem ai_100million = { "100million", "It's over 100 million!", [](){
|
AchievementItem ai_100million = {
|
||||||
ImGui::TextWrapped( "Tracy can handle a lot of data. How about 100 million zones in a single trace? Add a lot of zones to your program and see how it handles it!" );
|
.id = "100million",
|
||||||
ImGui::TextWrapped( "Capturing a long-running profile trace is easy. Need to profile an hour of your program execution? You can do it." );
|
.name = "It's over 100 million!",
|
||||||
ImGui::TextWrapped( "Note that it doesn't make much sense to instrument every little function you might have. The cost of the instrumentation itself will be higher than the cost of the function in such a case." );
|
.text = Unpack( Text100Million )
|
||||||
ImGui::PushFont( g_fonts.normal, FontSmall );
|
};
|
||||||
ImGui::PushStyleColor( ImGuiCol_Text, GImGui->Style.Colors[ImGuiCol_TextDisabled] );
|
|
||||||
ImGui::TextWrapped( "Keep in mind that the more zones you have, the more memory and CPU time the profiler will use. Be careful not to run out of memory." );
|
|
||||||
ImGui::TextWrapped( "To capture 100 million zones, you will need approximately 4 GB of RAM." );
|
|
||||||
ImGui::PopStyleColor();
|
|
||||||
ImGui::PopFont();
|
|
||||||
} };
|
|
||||||
|
|
||||||
AchievementItem ai_instrumentationStatistics = { "instrumentationStatistics", "Show me the stats!", [](){
|
AchievementItem ai_instrumentationStatistics = {
|
||||||
ImGui::TextWrapped( "Once you have instrumented your application, you can view the statistics for each zone in the timeline. This allows you to see how much time is spent in each zone and how many times it is called." );
|
.id = "instrumentationStatistics",
|
||||||
ImGui::TextWrapped( "To view the statistics, click on the \"" ICON_FA_ARROW_UP_WIDE_SHORT " Statistics\" button on the top bar. This will open a new window with a list of all zones in the trace." );
|
.name = "Show me the stats!",
|
||||||
} };
|
.text = Unpack( TextInstrumentationStatistics )
|
||||||
|
};
|
||||||
|
|
||||||
AchievementItem ai_findZone = { "findZone", "Find some zones", [](){
|
AchievementItem ai_findZone = {
|
||||||
ImGui::TextWrapped( "You can search for zones in the trace by opening the search window with the \"" ICON_FA_MAGNIFYING_GLASS " Find zone\" button on the top bar. It will ask you for the zone name, which in most cases will be the function name in the code." );
|
.id = "findZone",
|
||||||
ImGui::TextWrapped( "The search may find more than one zone with the same name. A list of all the zones found is displayed, and you can select any of them." );
|
.name = "Find some zones",
|
||||||
ImGui::TextWrapped( "Alternatively, you can open the Statistics window and click an entry there. This will open the Find zone window as if you had searched for that zone." );
|
.text = Unpack( TextFindZone )
|
||||||
ImGui::TextWrapped( "When a zone is selected, a number of statistics are displayed to help you understand the performance of your application. In addition, a histogram of the zone execution times is displayed to make it easier for you to determine the performance of the profiled code. Be sure to select a zone with a large number of calls to make the histogram look interesting!" );
|
};
|
||||||
ImGui::TextWrapped( "Note that you can draw a range on the histogram to limit the number of entries displayed in the zone list below. This list allows you to examine each zone individually. There are also a number of zone groupings that you can select. Each group can be selected and the time associated with the selected group will be highlighted on the histogram." );
|
|
||||||
} };
|
|
||||||
|
|
||||||
AchievementItem* ac_instrumentationIntroItems[] = {
|
AchievementItem* ac_instrumentationIntroItems[] = {
|
||||||
&ai_100million,
|
&ai_100million,
|
||||||
@@ -55,90 +63,46 @@ AchievementItem* ac_instrumentationIntroItems[] = {
|
|||||||
nullptr
|
nullptr
|
||||||
};
|
};
|
||||||
|
|
||||||
AchievementItem ai_instrumentationIntro = { "instrumentationIntro", "Instrumentating your application", [](){
|
AchievementItem ai_instrumentationIntro = {
|
||||||
constexpr const char* src = R"(#include "Tracy.hpp"
|
.id = "instrumentationIntro",
|
||||||
|
.name = "Instrumentating your application",
|
||||||
|
.text = Unpack( TextInstrumentationIntro ),
|
||||||
|
.items = ac_instrumentationIntroItems
|
||||||
|
};
|
||||||
|
|
||||||
void SomeFunction()
|
AchievementItem ai_frameImages = {
|
||||||
{
|
.id = "frameImages",
|
||||||
ZoneScoped;
|
.name = "A picture is worth a thousand words",
|
||||||
// Your code here
|
.text = Unpack( TextFrameImages )
|
||||||
}
|
};
|
||||||
)";
|
|
||||||
|
|
||||||
static SourceContents sc;
|
|
||||||
sc.Parse( src );
|
|
||||||
|
|
||||||
ImGui::TextWrapped( "Instrumentation is a powerful feature that allows you to see the exact runtime of each call to the selected set of functions. The downside is that it takes a bit of manual work to get it set up." );
|
|
||||||
ImGui::TextWrapped( "To get started, open a source file and include the Tracy.hpp header. This will give you access to a variety of macros provided by Tracy. Next, add the ZoneScoped macro to the beginning of one of your functions, like this:" );
|
|
||||||
ImGui::PushFont( g_fonts.mono, FontNormal );
|
|
||||||
PrintSource( sc.get() );
|
|
||||||
ImGui::PopFont();
|
|
||||||
ImGui::TextWrapped( "Now, when you profile your application, you will see a new zone appear on the timeline for each call to the function. This allows you to see how much time is spent in each call and how many times the function is called." );
|
|
||||||
ImGui::PushFont( g_fonts.normal, FontSmall );
|
|
||||||
ImGui::PushStyleColor( ImGuiCol_Text, GImGui->Style.Colors[ImGuiCol_TextDisabled] );
|
|
||||||
ImGui::TextWrapped( "Note: The ZoneScoped macro is just one of the many macros provided by Tracy. See the documentation for more information." );
|
|
||||||
ImGui::TextWrapped( "The above description applies to C++ code, but things are done similarly in other programming languages. Refer to the documentation for your language for more information." );
|
|
||||||
ImGui::PopStyleColor();
|
|
||||||
ImGui::PopFont();
|
|
||||||
}, ac_instrumentationIntroItems };
|
|
||||||
|
|
||||||
AchievementItem ai_frameImages = { "frameImages", "A picture is worth a thousand words", [](){
|
|
||||||
ImGui::TextWrapped( "Tracy allows you to add context to each frame, by attaching a screenshot. You can do this with the FrameImage macro." );
|
|
||||||
ImGui::TextWrapped( "You will have to do the screen capture and resizing yourself, which can be a bit complicated. The manual provides a sample code that shows how to do this in a performant way." );
|
|
||||||
ImGui::TextWrapped( "The frame images are displayed in the context of a frame, for example, when you hover over the frame in the timeline or in the frame graph at the top of the screen." );
|
|
||||||
ImGui::TextWrapped( "You can even view a recording of what your application was doing by clicking the " ICON_FA_SCREWDRIVER_WRENCH " icon and then selecting the \"" ICON_FA_PLAY " Playback\" option. Try it out!" );
|
|
||||||
ImGui::TextWrapped( "The FrameImage macro is a great way to see what happened in your application at a particular time. Maybe you have a performance problem that only occurs when a certain object is on the screen?" );
|
|
||||||
} };
|
|
||||||
|
|
||||||
AchievementItem* ac_instrumentFramesItems[] = {
|
AchievementItem* ac_instrumentFramesItems[] = {
|
||||||
&ai_frameImages,
|
&ai_frameImages,
|
||||||
nullptr
|
nullptr
|
||||||
};
|
};
|
||||||
|
|
||||||
AchievementItem ai_instrumentFrames = { "instrumentFrames", "Instrumenting frames", [](){
|
AchievementItem ai_instrumentFrames = {
|
||||||
constexpr const char* src = R"(#include "Tracy.hpp"
|
.id = "instrumentFrames",
|
||||||
|
.name = "Instrumenting frames",
|
||||||
void Render()
|
.text = Unpack( TextInstrumentFrames ),
|
||||||
{
|
.items = ac_instrumentFramesItems
|
||||||
// Render the frame
|
};
|
||||||
SwapBuffers();
|
|
||||||
FrameMark;
|
|
||||||
}
|
|
||||||
)";
|
|
||||||
|
|
||||||
static SourceContents sc;
|
|
||||||
sc.Parse( src );
|
|
||||||
|
|
||||||
ImGui::TextWrapped( "In addition to instrumenting functions, you can also instrument frames. This allows you to see how much time is spent in each frame of your application." );
|
|
||||||
ImGui::TextWrapped( "To instrument frames, you need to add the FrameMark macro at the beginning of each frame. This can be done in the main loop of your application, or in a separate function that is called at the beginning of each frame." );
|
|
||||||
ImGui::PushFont( g_fonts.mono, FontNormal );
|
|
||||||
PrintSource( sc.get() );
|
|
||||||
ImGui::PopFont();
|
|
||||||
ImGui::TextWrapped( "When you profile your application, you will see a new frame appear on the timeline each time the FrameMark macro is called. This allows you to see how much time is spent in each frame and how many frames are rendered per second." );
|
|
||||||
ImGui::TextWrapped( "The FrameMark macro is a great way to see at a glance how your application is performing over time. Maybe there are some performance problems that only appear after a few minutes of running the application? A frame graph is drawn at the top of the profiler window where you can see the timing of all frames." );
|
|
||||||
ImGui::TextWrapped( "Note that some applications do not have a frame-based structure, and in such cases, frame instrumentation may not be useful. That's ok." );
|
|
||||||
}, ac_instrumentFramesItems };
|
|
||||||
|
|
||||||
AchievementItem* ac_instrumentationItems[] = { &ai_instrumentationIntro, &ai_instrumentFrames, nullptr };
|
AchievementItem* ac_instrumentationItems[] = { &ai_instrumentationIntro, &ai_instrumentFrames, nullptr };
|
||||||
AchievementCategory ac_instrumentation = { "instrumentation", "Instrumentation", ac_instrumentationItems };
|
AchievementCategory ac_instrumentation = { "instrumentation", "Instrumentation", ac_instrumentationItems };
|
||||||
|
|
||||||
|
|
||||||
AchievementItem ai_loadTrace = { "loadTrace", "Load a trace", [](){
|
AchievementItem ai_loadTrace = {
|
||||||
ImGui::TextWrapped( "You can open a previously saved trace file (or one received from a friend) with the \"" ICON_FA_FOLDER_OPEN " Open saved trace\" button on the welcome screen." );
|
.id = "loadTrace",
|
||||||
} };
|
.name = "Load a trace",
|
||||||
|
.text = Unpack( TextLoadTrace )
|
||||||
|
};
|
||||||
|
|
||||||
AchievementItem ai_saveTrace = { "saveTrace", "Save a trace", [](){
|
AchievementItem ai_saveTrace = {
|
||||||
ImGui::TextWrapped( "Now that you have traced your application (or are in the process of doing so), you can save it to disk for future reference. You can do this by clicking on the " ICON_FA_WIFI " icon in the top left corner of the screen and then clicking on the \"" ICON_FA_FLOPPY_DISK " Save trace\" button." );
|
.id = "saveTrace",
|
||||||
ImGui::TextWrapped( "Keeping old traces on hand can be beneficial, as you can compare the performance of your optimizations with what you had before." );
|
.name = "Save a trace",
|
||||||
ImGui::TextWrapped( "You can also share the trace with your friends or co-workers by sending them the trace file." );
|
.text = Unpack( TextSaveTrace )
|
||||||
ImGui::Spacing();
|
};
|
||||||
tracy::TextColoredUnformatted( 0xFF44FFFF, ICON_FA_TRIANGLE_EXCLAMATION );
|
|
||||||
ImGui::SameLine();
|
|
||||||
ImGui::TextUnformatted( "Warning" );
|
|
||||||
ImGui::SameLine();
|
|
||||||
tracy::TextColoredUnformatted( 0xFF44FFFF, ICON_FA_TRIANGLE_EXCLAMATION );
|
|
||||||
ImGui::TextWrapped( "Trace files can contain sensitive information about your application, such as program code, or even the contents of source files. Be careful when sharing them with others." );
|
|
||||||
} };
|
|
||||||
|
|
||||||
AchievementItem* ac_connectToServerItems[] = {
|
AchievementItem* ac_connectToServerItems[] = {
|
||||||
&ai_saveTrace,
|
&ai_saveTrace,
|
||||||
@@ -152,23 +116,19 @@ AchievementItem* ac_connectToServerUnlock[] = {
|
|||||||
nullptr
|
nullptr
|
||||||
};
|
};
|
||||||
|
|
||||||
AchievementItem ai_connectToServer = { "connectToClient", "First profiling session", [](){
|
AchievementItem ai_connectToServer = {
|
||||||
ImGui::TextWrapped( "Let's start our adventure by instrumenting your application and connecting it to the profiler. Here's a quick refresher:" );
|
.id = "connectToClient",
|
||||||
ImGui::TextWrapped( " 1. Integrate Tracy Profiler into your application. This can be done using CMake, Meson, or simply by adding the source files to your project." );
|
.name = "First profiling session",
|
||||||
ImGui::TextWrapped( " 2. Make sure that TracyClient.cpp (or the Tracy library) is included in your build." );
|
.text = Unpack( TextConnectToClient ),
|
||||||
ImGui::TextWrapped( " 3. Define TRACY_ENABLE in your build configuration, for the whole application. Do not do it in a single source file because it won't work." );
|
.items = ac_connectToServerItems,
|
||||||
ImGui::TextWrapped( " 4. Start your application, and \"" ICON_FA_WIFI " Connect\" to it with the profiler." );
|
.unlocks = ac_connectToServerUnlock
|
||||||
ImGui::TextWrapped( "Please refer to the user manual for more details." );
|
};
|
||||||
if( ImGui::SmallButton( "Download the user manual" ) )
|
|
||||||
{
|
|
||||||
tracy::OpenWebpage( "https://github.com/wolfpld/tracy/releases" );
|
|
||||||
}
|
|
||||||
}, ac_connectToServerItems, ac_connectToServerUnlock };
|
|
||||||
|
|
||||||
AchievementItem ai_globalSettings = { "globalSettings", "Global settings", [](){
|
AchievementItem ai_globalSettings = {
|
||||||
ImGui::TextWrapped( "Tracy has a variety of settings that can be adjusted to suit your needs. These settings can be found by clicking on the " ICON_FA_WRENCH " icon on the welcome screen. This will open the about window, where you can expand the \"" ICON_FA_TOOLBOX " Global settings\" menu." );
|
.id = "globalSettings",
|
||||||
ImGui::TextWrapped( "The settings are saved between sessions, so you only need to set them once." );
|
.name = "Global settings",
|
||||||
} };
|
.text = Unpack( TextGlobalSettings )
|
||||||
|
};
|
||||||
|
|
||||||
AchievementItem* ac_achievementsIntroItems[] = {
|
AchievementItem* ac_achievementsIntroItems[] = {
|
||||||
&ai_connectToServer,
|
&ai_connectToServer,
|
||||||
@@ -176,18 +136,14 @@ AchievementItem* ac_achievementsIntroItems[] = {
|
|||||||
nullptr
|
nullptr
|
||||||
};
|
};
|
||||||
|
|
||||||
AchievementItem ai_achievementsIntro = { "achievementsIntro", "Click here to discover achievements!", [](){
|
AchievementItem ai_achievementsIntro = {
|
||||||
ImGui::TextWrapped( "Clicking on the " ICON_FA_STAR " button opens the Achievements List. Here you can see the tasks to be completed along with a short description of what needs to be done." );
|
.id = "achievementsIntro",
|
||||||
ImGui::TextWrapped( "As you complete each Achievement, new Achievements will appear, so be sure to keep checking the list for new ones!" );
|
.name = "Click here to discover achievements!",
|
||||||
ImGui::TextWrapped( "To make the new things easier to spot, the Achievements List will show a marker next to them. The achievements " ICON_FA_STAR " button will glow yellow when there are new things to see." );
|
.text = Unpack( TextIntro ),
|
||||||
ImGui::TextUnformatted( "New tasks:" );
|
.items = ac_achievementsIntroItems,
|
||||||
ImGui::SameLine();
|
.keepOpen = true,
|
||||||
TextColoredUnformatted( 0xFF4488FF, ICON_FA_CIRCLE_EXCLAMATION );
|
.unlockTime = 1
|
||||||
ImGui::TextUnformatted( "Completed tasks:" );
|
};
|
||||||
ImGui::SameLine();
|
|
||||||
TextColoredUnformatted( 0xFF44FF44, ICON_FA_CIRCLE_CHECK );
|
|
||||||
ImGui::TextWrapped( "Good luck!" );
|
|
||||||
}, ac_achievementsIntroItems, nullptr, true, 1 };
|
|
||||||
|
|
||||||
AchievementItem* ac_firstStepsItems[] = { &ai_achievementsIntro, nullptr };
|
AchievementItem* ac_firstStepsItems[] = { &ai_achievementsIntro, nullptr };
|
||||||
AchievementCategory ac_firstSteps = { "firstSteps", "First steps", ac_firstStepsItems, 1 };
|
AchievementCategory ac_firstSteps = { "firstSteps", "First steps", ac_firstStepsItems, 1 };
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ struct AchievementItem
|
|||||||
{
|
{
|
||||||
const char* id;
|
const char* id;
|
||||||
const char* name;
|
const char* name;
|
||||||
void(*description)();
|
std::string text;
|
||||||
AchievementItem** items;
|
AchievementItem** items;
|
||||||
AchievementItem** unlocks;
|
AchievementItem** unlocks;
|
||||||
bool keepOpen;
|
bool keepOpen;
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
#include "imgui.h"
|
#include "imgui.h"
|
||||||
#include "../Fonts.hpp"
|
#include "../Fonts.hpp"
|
||||||
|
|
||||||
#include "IconsFontAwesome6.h"
|
#include "IconsFontAwesome7.h"
|
||||||
#include "TracyBadVersion.hpp"
|
#include "TracyBadVersion.hpp"
|
||||||
#include "TracyImGui.hpp"
|
#include "TracyImGui.hpp"
|
||||||
#include "TracyWeb.hpp"
|
#include "TracyWeb.hpp"
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
#include "imgui_internal.h"
|
#include "imgui_internal.h"
|
||||||
|
|
||||||
#include "../public/common/TracyForceInline.hpp"
|
#include "../public/common/TracyForceInline.hpp"
|
||||||
#include "IconsFontAwesome6.h"
|
#include "IconsFontAwesome7.h"
|
||||||
#include "TracySourceTokenizer.hpp"
|
#include "TracySourceTokenizer.hpp"
|
||||||
|
|
||||||
ImTextureID GetProfilerIconTexture();
|
ImTextureID GetProfilerIconTexture();
|
||||||
@@ -290,7 +290,7 @@ static constexpr const uint32_t AsmSyntaxColors[] = {
|
|||||||
|
|
||||||
[[maybe_unused]] static tracy_force_inline void TooltipIfHovered( const char* text )
|
[[maybe_unused]] static tracy_force_inline void TooltipIfHovered( const char* text )
|
||||||
{
|
{
|
||||||
if( !ImGui::IsItemHovered() ) return;
|
if( !ImGui::IsItemHovered( ImGuiHoveredFlags_AllowWhenDisabled ) ) return;
|
||||||
ImGui::BeginTooltip();
|
ImGui::BeginTooltip();
|
||||||
ImGui::TextUnformatted( text );
|
ImGui::TextUnformatted( text );
|
||||||
ImGui::EndTooltip();
|
ImGui::EndTooltip();
|
||||||
|
|||||||
@@ -166,6 +166,40 @@ public:
|
|||||||
ImGui::TextUnformatted( ". " );
|
ImGui::TextUnformatted( ". " );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case MD_BLOCK_ADMONITION:
|
||||||
|
{
|
||||||
|
Separate();
|
||||||
|
ImGui::Indent();
|
||||||
|
origin = ImGui::GetCursorScreenPos();
|
||||||
|
auto admonition = ((MD_BLOCK_ADMONITION_DETAIL*)detail);
|
||||||
|
switch( admonition->type.text[0] )
|
||||||
|
{
|
||||||
|
case 'n': // note
|
||||||
|
color = 0xFFEB6F1F;
|
||||||
|
TextColoredUnformatted( color, ICON_FA_CIRCLE_INFO " " );
|
||||||
|
break;
|
||||||
|
case 't': // tip
|
||||||
|
color = 0xFF368623;
|
||||||
|
TextColoredUnformatted( color, ICON_FA_LIGHTBULB " " );
|
||||||
|
break;
|
||||||
|
case 'i': // important
|
||||||
|
color = 0xFFE55789;
|
||||||
|
TextColoredUnformatted( color, ICON_FA_MESSAGE " " );
|
||||||
|
break;
|
||||||
|
case 'w': // warning
|
||||||
|
color = 0xFF036A9E;
|
||||||
|
TextColoredUnformatted( color, ICON_FA_TRIANGLE_EXCLAMATION " " );
|
||||||
|
break;
|
||||||
|
case 'c': // caution
|
||||||
|
color = 0xFF3336DA;
|
||||||
|
TextColoredUnformatted( color, ICON_FA_HAND " " );
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert( false );
|
||||||
|
}
|
||||||
|
Glue();
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -223,6 +257,15 @@ public:
|
|||||||
case MD_BLOCK_FOOTNOTE_DEF:
|
case MD_BLOCK_FOOTNOTE_DEF:
|
||||||
ImGui::PopFont();
|
ImGui::PopFont();
|
||||||
break;
|
break;
|
||||||
|
case MD_BLOCK_ADMONITION:
|
||||||
|
{
|
||||||
|
const auto scale = GetScale();
|
||||||
|
const auto pos = ImGui::GetCursorScreenPos();
|
||||||
|
const auto offset = ImVec2( 8.f * scale, 0 );
|
||||||
|
ImGui::Unindent();
|
||||||
|
ImGui::GetWindowDrawList()->AddLine( origin - offset, pos - offset, color, 2.f * scale );
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -493,6 +536,9 @@ private:
|
|||||||
|
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
|
|
||||||
|
uint32_t color;
|
||||||
|
ImVec2 origin;
|
||||||
|
|
||||||
std::vector<List> lists;
|
std::vector<List> lists;
|
||||||
std::string link;
|
std::string link;
|
||||||
|
|
||||||
@@ -507,7 +553,7 @@ Markdown::Markdown( View* view, Worker* worker )
|
|||||||
, m_worker( worker )
|
, m_worker( worker )
|
||||||
{
|
{
|
||||||
memset( m_parser, 0, sizeof( MD_PARSER ) );
|
memset( m_parser, 0, sizeof( MD_PARSER ) );
|
||||||
m_parser->flags = MD_FLAG_COLLAPSEWHITESPACE | MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_NOHTML | MD_FLAG_TABLES | MD_FLAG_TASKLISTS | MD_FLAG_STRIKETHROUGH | MD_FLAG_FOOTNOTES;
|
m_parser->flags = MD_FLAG_COLLAPSEWHITESPACE | MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_NOHTML | MD_FLAG_TABLES | MD_FLAG_TASKLISTS | MD_FLAG_STRIKETHROUGH | MD_FLAG_FOOTNOTES | MD_FLAG_ADMONITIONS;
|
||||||
m_parser->enter_block = []( MD_BLOCKTYPE type, void* detail, void* ud ) -> int { return ((MarkdownContext*)ud)->EnterBlock( type, detail ); };
|
m_parser->enter_block = []( MD_BLOCKTYPE type, void* detail, void* ud ) -> int { return ((MarkdownContext*)ud)->EnterBlock( type, detail ); };
|
||||||
m_parser->leave_block = []( MD_BLOCKTYPE type, void* detail, void* ud ) -> int { return ((MarkdownContext*)ud)->LeaveBlock( type, detail ); };
|
m_parser->leave_block = []( MD_BLOCKTYPE type, void* detail, void* ud ) -> int { return ((MarkdownContext*)ud)->LeaveBlock( type, detail ); };
|
||||||
m_parser->enter_span = []( MD_SPANTYPE type, void* detail, void* ud ) -> int { return ((MarkdownContext*)ud)->EnterSpan( type, detail ); };
|
m_parser->enter_span = []( MD_SPANTYPE type, void* detail, void* ud ) -> int { return ((MarkdownContext*)ud)->EnterSpan( type, detail ); };
|
||||||
|
|||||||
221
profiler/src/profiler/TracyNameGen.cpp
Normal file
221
profiler/src/profiler/TracyNameGen.cpp
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <random>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "TracyNameGen.hpp"
|
||||||
|
|
||||||
|
namespace tracy
|
||||||
|
{
|
||||||
|
|
||||||
|
struct NameBank
|
||||||
|
{
|
||||||
|
const char* const* adjectives;
|
||||||
|
const char* const* nouns;
|
||||||
|
size_t numAdjectives;
|
||||||
|
size_t numNouns;
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const char* AnalysisAdjectives[] = {
|
||||||
|
"Granular", "Forensic", "Acute", "Lucid", "Precise",
|
||||||
|
"Deep", "Exact", "Critical", "Analytical", "Transparent",
|
||||||
|
"Subtle", "Sharp", "Rigid", "Focused", "Absolute",
|
||||||
|
"Meticulous", "Spectral", "Diagnostic", "Pervasive", "Introspective",
|
||||||
|
"Systematic", "Optical", "Minute", "Piercing", "Detailed",
|
||||||
|
"Scrutinized", "Clear", "Keen", "Rigorous", "Vast",
|
||||||
|
"Incisive", "Exhaustive", "Lateral", "Prismatic", "Observant"
|
||||||
|
};
|
||||||
|
constexpr const char* AnalysisNouns[] = {
|
||||||
|
"Probe", "Trace", "Lens", "Scope", "Metric",
|
||||||
|
"Insight", "Scan", "Audit", "Point", "Vector",
|
||||||
|
"Signal", "Marker", "Frame", "Detail", "View",
|
||||||
|
"Spectrum", "Snapshot", "Blueprint", "Aperture", "Index",
|
||||||
|
"Radar", "Prism", "Gauge", "Focal", "Pattern",
|
||||||
|
"Echo", "Signature", "Horizon", "Mirror", "Scale",
|
||||||
|
"Telemetry", "Graph", "Stratum", "Artifact", "Aspect"
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const char* PerformanceAdjectives[] = {
|
||||||
|
"Swift", "Lean", "Kinetic", "Agile", "Hyper",
|
||||||
|
"Rapid", "Fluid", "Peak", "Instant", "Nimble",
|
||||||
|
"Optimal", "Sonic", "Linear", "Warp", "Turbo",
|
||||||
|
"Frictionless", "Seamless", "Electric", "Blazing", "Aerodynamic",
|
||||||
|
"Quantum", "Prompt", "Direct", "Streamlined", "Volatile",
|
||||||
|
"Highgain", "Rapidfire", "Torrential", "Sleek", "Velocity",
|
||||||
|
"Dynamic", "Active", "Persistent", "Lightweight", "Snappy"
|
||||||
|
};
|
||||||
|
constexpr const char* PerformanceNouns[] = {
|
||||||
|
"Pulse", "Flow", "Cycle", "Burst", "Stream",
|
||||||
|
"Tick", "Glide", "Shift", "Velocity", "Spike",
|
||||||
|
"Pace", "Rhythm", "Drive", "Path", "Edge",
|
||||||
|
"Sprint", "Torrent", "Current", "Surge", "Momentum",
|
||||||
|
"Flux", "Wave", "Accelerator", "Spark", "Jet",
|
||||||
|
"Thrust", "Orbit", "Apex", "Bolt", "Phase",
|
||||||
|
"Rush", "Impact", "Frequency", "Lapse", "Kick"
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const char* CoreAdjectives[] = {
|
||||||
|
"Binary", "Raw", "Atomic", "Static", "Core",
|
||||||
|
"Virtual", "Base", "Solid", "Dense", "Opaque",
|
||||||
|
"Primitive", "Native", "Hard", "Stable", "Immutable",
|
||||||
|
"Monolithic", "Bare", "Rigid", "Concrete", "Fundamental",
|
||||||
|
"Discrete", "Fixed", "Heavy", "Latent", "Symmetric",
|
||||||
|
"Implicit", "Explicit", "Cold", "Basic", "Granite",
|
||||||
|
"Stark", "Brute", "Firm", "Stout", "Coarse"
|
||||||
|
};
|
||||||
|
constexpr const char* CoreNouns[] = {
|
||||||
|
"Stack", "Heap", "Node", "Buffer", "Segment",
|
||||||
|
"Thread", "Kernel", "Block", "Page", "Shell",
|
||||||
|
"Layer", "Bit", "Logic", "Port", "Root",
|
||||||
|
"Register", "Pointer", "Address", "Cache", "Opcode",
|
||||||
|
"Slab", "Pipeline", "Bus", "Socket", "Sector",
|
||||||
|
"Vault", "Anchor", "Pillar", "Base", "Primitive",
|
||||||
|
"Offset", "Handle", "Struct", "Memory", "Word"
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const char* ModernAdjectives[] = {
|
||||||
|
"Synthetic", "Neural", "Async", "Elastic", "Cloud",
|
||||||
|
"Distributed", "Reactive", "Orbital", "Poly", "Infinite",
|
||||||
|
"Parallel", "Modular", "Virtualized", "Scalable", "Agnostic",
|
||||||
|
"Adaptive", "Hybrid", "Autonomous", "Global", "Synergic",
|
||||||
|
"Omnipresent", "Evolving", "Abstract", "Unified", "Concurrent",
|
||||||
|
"Remote", "Digital", "Cluster", "Ephemeral", "Stateful",
|
||||||
|
"Stateless", "Serverless", "Decoupled", "Fluent", "Native"
|
||||||
|
};
|
||||||
|
constexpr const char* ModernNouns[] = {
|
||||||
|
"Nexus", "Grid", "Matrix", "Vertex", "Sync",
|
||||||
|
"Axiom", "Sphere", "Hub", "Mesh", "Bridge",
|
||||||
|
"Link", "Unit", "Fabric", "Cluster", "Portal",
|
||||||
|
"Ecosystem", "Catalyst", "Interface", "Domain", "Gateway",
|
||||||
|
"Lattice", "Cloud", "Instance", "Schema", "Registry",
|
||||||
|
"Tenant", "Namespace", "Pod", "Stream", "Endpoint",
|
||||||
|
"Payload", "Relay", "Orchestrator", "Broker", "Agent"
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const char* FailureAdjectives[] = {
|
||||||
|
"Clumsy", "Wobbly", "Confused", "Chaotic", "Sneaky",
|
||||||
|
"Lazy", "Dizzy", "Broken", "Leaky", "Fragile",
|
||||||
|
"Shaky", "Erratic", "Sleepy", "Lost", "Random",
|
||||||
|
"Glitchy", "Unstable", "Paradoxical", "Cluttery", "Hiccupy",
|
||||||
|
"Wonky", "Flaky", "Stubborn", "Moody", "Nervous",
|
||||||
|
"Fumbling", "Drifting", "Tangled", "Blurred", "Absent",
|
||||||
|
"Haphazard", "Spasmodic", "Clunky", "Jittery", "Bewildered"
|
||||||
|
};
|
||||||
|
constexpr const char* FailureNouns[] = {
|
||||||
|
"Crash", "Bug", "Leak", "Hang", "Timeout",
|
||||||
|
"Panic", "Loop", "Spill", "Hiccup", "Glitch",
|
||||||
|
"Wobble", "Tumble", "Void", "Abyss", "Maze",
|
||||||
|
"Knot", "Static", "Noise", "Drift", "Stumble",
|
||||||
|
"Gap", "Fragment", "Shard", "Spark", "Bubble",
|
||||||
|
"Slip", "Trip", "Fall", "Ghost", "Shadow",
|
||||||
|
"Blur", "Overflow", "Sinkhole", "Echo", "Mirage"
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const char* MythicAdjectives[] = {
|
||||||
|
"Mythic", "Arcane", "Ancient", "Eternal", "Sacred",
|
||||||
|
"Divine", "Forgotten", "Elder", "Primordial", "Venerable",
|
||||||
|
"Runic", "Prophetic", "Colossal", "Imperial", "Regal",
|
||||||
|
"Sovereign", "Mystic", "Occult", "Hidden", "Cryptic",
|
||||||
|
"Ethereal", "Celestial", "Gnostic", "Hermetic", "Alchemical",
|
||||||
|
"Astral", "Golden", "Iron", "Bronze", "Obsidian",
|
||||||
|
"Silver", "Timeless", "Boundless", "Omnipotent", "Everlasting"
|
||||||
|
};
|
||||||
|
constexpr const char* MythicNouns[] = {
|
||||||
|
"Aegis", "Helios", "Oracle", "Titan", "Rune",
|
||||||
|
"Lex", "Codex", "Obelisk", "Monolith", "Temple",
|
||||||
|
"Altar", "Scepter", "Crown", "Sigil", "Glyph",
|
||||||
|
"Tome", "Relic", "Artifact", "Sanctum", "Citadel",
|
||||||
|
"Bastion", "Spire", "Pillar", "Throne", "Vault",
|
||||||
|
"Key", "Gate", "Bridge", "Seal", "Pact",
|
||||||
|
"Covenant", "Legacy", "Epoch", "Era", "Myth"
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const char* CosmosAdjectives[] = {
|
||||||
|
"Relativistic", "Baryonic", "Intergalactic", "Event-Horizon", "Singular",
|
||||||
|
"Celestial", "Nebular", "Void-Born", "Astral", "Luminous",
|
||||||
|
"Spectral", "Ionized", "Gravitational", "Ecliptic", "Zenithal",
|
||||||
|
"Stellar", "Cosmological", "Parallactic", "Zero-Point", "Dark-Matter",
|
||||||
|
"Radiant", "Orbital", "Supernova", "Hyper-Spatial", "Aetheric",
|
||||||
|
"Cold-Void", "Infinite", "Dimensional", "Crystalline", "Tidal",
|
||||||
|
"Planetary", "Solar", "Lunar", "Galactic", "Oblique"
|
||||||
|
};
|
||||||
|
constexpr const char* CosmosNouns[] = {
|
||||||
|
"Pulsar", "Quasar", "Singularity", "Void", "Nebula",
|
||||||
|
"Horizon", "Apex", "Zenith", "Equinox", "Corona",
|
||||||
|
"Aperture", "Axis", "Parallax", "Cluster", "Constellation",
|
||||||
|
"Vacuum", "Symmetry", "Continuum", "Flux", "Vortex",
|
||||||
|
"Nova", "Eclipse", "Solenoid", "Sphere", "Vector",
|
||||||
|
"Siderostat", "Sextant", "Obliquity", "Precession", "Azimuth",
|
||||||
|
"Wavelength", "Frequency", "Radiance", "Entropy", "Magnitude"
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr const char* GameAdjectives[] = {
|
||||||
|
"Frame-Locked", "Pixel-Perfect", "Arcade", "Retro", "Hardcore",
|
||||||
|
"Unlocked", "Godlike", "Buffed", "Nerfed", "Overclocked",
|
||||||
|
"Clutch", "Lagless", "Sweaty", "Tryhard", "Broken",
|
||||||
|
"Turbo", "Min-Max", "Rage-Quit", "No-Scope", "Frame-Perfect",
|
||||||
|
"Savescum", "Co-Op", "Modded", "Patched", "Hotfixed",
|
||||||
|
"Debugged", "Optimized", "Smoothed", "Playtest", "Sandbox",
|
||||||
|
"Scripted", "Speedrun", "Cheat-Code", "Invincible", "Flawless"
|
||||||
|
};
|
||||||
|
constexpr const char* GameNouns[] = {
|
||||||
|
"Frame", "Tick", "Sprite", "Polygon", "Shader",
|
||||||
|
"Texture", "Voxel", "Render", "Hitbox", "Hurtbox",
|
||||||
|
"Collision", "Input", "Viewport", "Level", "Checkpoint",
|
||||||
|
"Boss", "Loot", "Quest", "Spawn", "Respawn",
|
||||||
|
"Grind", "Scroll", "Tilemap", "Backdrop", "Rig",
|
||||||
|
"Build", "Frag", "Gib", "Drawcall", "Pass",
|
||||||
|
"Batch", "Delta", "Pool", "Arena", "Worker"
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr std::array NameBanks = {
|
||||||
|
NameBank { AnalysisAdjectives, AnalysisNouns, sizeof(AnalysisAdjectives) / sizeof(AnalysisAdjectives[0]), sizeof(AnalysisNouns) / sizeof(AnalysisNouns[0]) },
|
||||||
|
NameBank { PerformanceAdjectives, PerformanceNouns, sizeof(PerformanceAdjectives) / sizeof(PerformanceAdjectives[0]), sizeof(PerformanceNouns) / sizeof(PerformanceNouns[0]) },
|
||||||
|
NameBank { CoreAdjectives, CoreNouns, sizeof(CoreAdjectives) / sizeof(CoreAdjectives[0]), sizeof(CoreNouns) / sizeof(CoreNouns[0]) },
|
||||||
|
NameBank { ModernAdjectives, ModernNouns, sizeof(ModernAdjectives) / sizeof(ModernAdjectives[0]), sizeof(ModernNouns) / sizeof(ModernNouns[0]) },
|
||||||
|
NameBank { FailureAdjectives, FailureNouns, sizeof(FailureAdjectives) / sizeof(FailureAdjectives[0]), sizeof(FailureNouns) / sizeof(FailureNouns[0]) },
|
||||||
|
NameBank { MythicAdjectives, MythicNouns, sizeof(MythicAdjectives) / sizeof(MythicAdjectives[0]), sizeof(MythicNouns) / sizeof(MythicNouns[0]) },
|
||||||
|
NameBank { CosmosAdjectives, CosmosNouns, sizeof(CosmosAdjectives) / sizeof(CosmosAdjectives[0]), sizeof(CosmosNouns) / sizeof(CosmosNouns[0]) },
|
||||||
|
NameBank { GameAdjectives, GameNouns, sizeof(GameAdjectives) / sizeof(GameAdjectives[0]), sizeof(GameNouns) / sizeof(GameNouns[0]) },
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr std::array NameStructure = { "an", "aan", "nn" };
|
||||||
|
|
||||||
|
|
||||||
|
std::string GenerateAbstractName()
|
||||||
|
{
|
||||||
|
std::random_device rd;
|
||||||
|
std::default_random_engine gen( rd() );
|
||||||
|
std::uniform_int_distribution<uint32_t> dist( 0, UINT32_MAX );
|
||||||
|
|
||||||
|
const auto baseBank = NameBanks[dist( gen ) % NameBanks.size()];
|
||||||
|
const char* structure = NameStructure[dist( gen ) % NameStructure.size()];
|
||||||
|
|
||||||
|
std::vector<std::string> parts;
|
||||||
|
while( *structure )
|
||||||
|
{
|
||||||
|
const auto type = *structure++;
|
||||||
|
assert( type == 'a' || type == 'n' );
|
||||||
|
const auto bank = dist( gen ) % 6 == 0 ? NameBanks[dist( gen ) % NameBanks.size()] : baseBank;
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
auto part = std::string( type == 'a' ? bank.adjectives[dist( gen ) % bank.numAdjectives] : bank.nouns[dist( gen ) % bank.numNouns] );
|
||||||
|
if( std::ranges::find( parts, part ) == parts.end() )
|
||||||
|
{
|
||||||
|
parts.emplace_back( std::move( part ) );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string ret = parts[0];
|
||||||
|
for( size_t i=1; i<parts.size(); i++ )
|
||||||
|
{
|
||||||
|
ret += " " + parts[i];
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
13
profiler/src/profiler/TracyNameGen.hpp
Normal file
13
profiler/src/profiler/TracyNameGen.hpp
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
#ifndef __TRACYNAMEGEN_HPP__
|
||||||
|
#define __TRACYNAMEGEN_HPP__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace tracy
|
||||||
|
{
|
||||||
|
|
||||||
|
std::string GenerateAbstractName();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -20,7 +20,7 @@
|
|||||||
#include "tracy_pdqsort.h"
|
#include "tracy_pdqsort.h"
|
||||||
#include "../Fonts.hpp"
|
#include "../Fonts.hpp"
|
||||||
|
|
||||||
#include "IconsFontAwesome6.h"
|
#include "IconsFontAwesome7.h"
|
||||||
|
|
||||||
namespace tracy
|
namespace tracy
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -295,6 +295,7 @@ bool UserData::Load()
|
|||||||
LoadValue( v, "min", a->range.min );
|
LoadValue( v, "min", a->range.min );
|
||||||
LoadValue( v, "max", a->range.max );
|
LoadValue( v, "max", a->range.max );
|
||||||
LoadValue( v, "color", a->color );
|
LoadValue( v, "color", a->color );
|
||||||
|
a->range.active = true;
|
||||||
m_annotations.emplace_back( std::move( a ) );
|
m_annotations.emplace_back( std::move( a ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,7 +26,7 @@
|
|||||||
#include "../Fonts.hpp"
|
#include "../Fonts.hpp"
|
||||||
|
|
||||||
#include "imgui_internal.h"
|
#include "imgui_internal.h"
|
||||||
#include "IconsFontAwesome6.h"
|
#include "IconsFontAwesome7.h"
|
||||||
|
|
||||||
namespace tracy
|
namespace tracy
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -49,7 +49,8 @@ constexpr const char* GpuContextNames[] = {
|
|||||||
"Metal",
|
"Metal",
|
||||||
"Custom",
|
"Custom",
|
||||||
"CUDA",
|
"CUDA",
|
||||||
"Rocprof"
|
"Rocprof",
|
||||||
|
"WebGPU"
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MemoryPage;
|
struct MemoryPage;
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include "TracyImGui.hpp"
|
#include "TracyImGui.hpp"
|
||||||
|
#include "TracyNameGen.hpp"
|
||||||
#include "TracyPrint.hpp"
|
#include "TracyPrint.hpp"
|
||||||
#include "TracyView.hpp"
|
#include "TracyView.hpp"
|
||||||
#include "tracy_pdqsort.h"
|
#include "tracy_pdqsort.h"
|
||||||
@@ -10,6 +13,7 @@ namespace tracy
|
|||||||
void View::AddAnnotation( int64_t start, int64_t end )
|
void View::AddAnnotation( int64_t start, int64_t end )
|
||||||
{
|
{
|
||||||
auto ann = std::make_shared<Annotation>();
|
auto ann = std::make_shared<Annotation>();
|
||||||
|
ann->text = GenerateAbstractName();
|
||||||
ann->range.active = true;
|
ann->range.active = true;
|
||||||
ann->range.min = start;
|
ann->range.min = start;
|
||||||
ann->range.max = end;
|
ann->range.max = end;
|
||||||
@@ -52,7 +56,22 @@ void View::DrawSelectedAnnotation()
|
|||||||
char buf[1024];
|
char buf[1024];
|
||||||
buf[descsz] = '\0';
|
buf[descsz] = '\0';
|
||||||
memcpy( buf, desc, descsz );
|
memcpy( buf, desc, descsz );
|
||||||
if( ImGui::InputTextWithHint( "##anndesc", "Describe annotation", buf, 256 ) )
|
|
||||||
|
const char* buttonText = ICON_FA_DICE;
|
||||||
|
auto buttonSize = ImGui::CalcTextSize( buttonText );
|
||||||
|
buttonSize.x += ImGui::GetStyle().FramePadding.x * 2.0f + ImGui::GetStyle().ItemSpacing.x;
|
||||||
|
ImGui::SetNextItemWidth( ImGui::GetContentRegionAvail().x - buttonSize.x );
|
||||||
|
bool changed = ImGui::InputTextWithHint( "##anndesc", "Describe annotation", buf, 256 );
|
||||||
|
ImGui::SameLine();
|
||||||
|
if( ImGui::Button( buttonText ) )
|
||||||
|
{
|
||||||
|
changed = true;
|
||||||
|
const auto name = GenerateAbstractName();
|
||||||
|
const auto len = std::min( sizeof( buf ) - 1, name.size() );
|
||||||
|
memcpy( buf, name.c_str(), len );
|
||||||
|
buf[len] = '\0';
|
||||||
|
}
|
||||||
|
if( changed )
|
||||||
{
|
{
|
||||||
m_selectedAnnotation->text.assign( buf );
|
m_selectedAnnotation->text.assign( buf );
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ void View::DrawManual()
|
|||||||
ImGui::PopStyleColor();
|
ImGui::PopStyleColor();
|
||||||
ImGui::SameLine();
|
ImGui::SameLine();
|
||||||
TextDisabledUnformatted( "This user manual is missing features. See the PDF file for the proper version." );
|
TextDisabledUnformatted( "This user manual is missing features. See the PDF file for the proper version." );
|
||||||
|
ImGui::SameLine();
|
||||||
|
if( ImGui::Button( ICON_FA_BOOK " PDF Manual" ) ) OpenWebpage( "https://github.com/wolfpld/tracy/releases" );
|
||||||
|
|
||||||
ImGui::Separator();
|
ImGui::Separator();
|
||||||
ImGui::BeginChild( "##usermanual" );
|
ImGui::BeginChild( "##usermanual" );
|
||||||
@@ -88,6 +90,7 @@ void View::DrawManual()
|
|||||||
if( ImGui::IsItemClicked() && !ImGui::IsItemToggledOpen() )
|
if( ImGui::IsItemClicked() && !ImGui::IsItemToggledOpen() )
|
||||||
{
|
{
|
||||||
m_activeManualChunk = i;
|
m_activeManualChunk = i;
|
||||||
|
m_manualPositionReset = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while( level-- > 0 ) ImGui::TreePop();
|
while( level-- > 0 ) ImGui::TreePop();
|
||||||
@@ -142,8 +145,8 @@ void View::DrawManual()
|
|||||||
ImGui::Dummy( ImVec2( 0, ImGui::GetTextLineHeight() * 0.25f ) );
|
ImGui::Dummy( ImVec2( 0, ImGui::GetTextLineHeight() * 0.25f ) );
|
||||||
ImGui::PopFont();
|
ImGui::PopFont();
|
||||||
|
|
||||||
const auto separator = chunk.text.find( "-----" );
|
const auto separator = chunk.text.find( "\n-----" );
|
||||||
const auto size = separator == std::string::npos ? chunk.text.size() : separator;
|
const auto size = separator == std::string::npos ? chunk.text.size() : ( separator + 1 );
|
||||||
|
|
||||||
m_markdown.Print( chunk.text.c_str(), size );
|
m_markdown.Print( chunk.text.c_str(), size );
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -299,6 +299,22 @@ void View::DrawTimeline()
|
|||||||
v->range.StartFrame();
|
v->range.StartFrame();
|
||||||
HandleRange( v->range, timespan, ImGui::GetCursorScreenPos(), w );
|
HandleRange( v->range, timespan, ImGui::GetCursorScreenPos(), w );
|
||||||
}
|
}
|
||||||
|
if( IsMouseClicked( 0 ) )
|
||||||
|
{
|
||||||
|
const auto ty = ImGui::GetTextLineHeight();
|
||||||
|
for( auto& ann : m_annotations )
|
||||||
|
{
|
||||||
|
if( ann->range.min >= m_vd.zvEnd || ann->range.max <= m_vd.zvStart ) continue;
|
||||||
|
const auto aMin = ( ann->range.min - m_vd.zvStart ) * pxns;
|
||||||
|
const auto aMax = ( ann->range.max - m_vd.zvStart ) * pxns;
|
||||||
|
if( ImGui::IsMouseHoveringRect( linepos + ImVec2( aMin, lineh - ty * 1.5f ), linepos + ImVec2( aMax, lineh ) ) )
|
||||||
|
{
|
||||||
|
m_selectedAnnotation = ann.get();
|
||||||
|
ConsumeMouseEvents( 0 );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
HandleTimelineMouse( timespan, ImGui::GetCursorScreenPos(), w );
|
HandleTimelineMouse( timespan, ImGui::GetCursorScreenPos(), w );
|
||||||
}
|
}
|
||||||
if( ImGui::IsWindowFocused( ImGuiHoveredFlags_ChildWindows | ImGuiHoveredFlags_AllowWhenBlockedByActiveItem ) )
|
if( ImGui::IsWindowFocused( ImGuiHoveredFlags_ChildWindows | ImGuiHoveredFlags_AllowWhenBlockedByActiveItem ) )
|
||||||
@@ -360,9 +376,8 @@ void View::DrawTimeline()
|
|||||||
bool hover = ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect( wpos, wpos + ImVec2( w, h ) );
|
bool hover = ImGui::IsWindowHovered() && ImGui::IsMouseHoveringRect( wpos, wpos + ImVec2( w, h ) );
|
||||||
draw = ImGui::GetWindowDrawList();
|
draw = ImGui::GetWindowDrawList();
|
||||||
|
|
||||||
|
const auto scale = GetScale();
|
||||||
const auto ty = ImGui::GetTextLineHeight();
|
const auto ty = ImGui::GetTextLineHeight();
|
||||||
const auto to = 9.f;
|
|
||||||
const auto th = ( ty - to ) * sqrt( 3 ) * 0.5;
|
|
||||||
|
|
||||||
if( m_vd.drawGpuZones )
|
if( m_vd.drawGpuZones )
|
||||||
{
|
{
|
||||||
@@ -415,17 +430,24 @@ void View::DrawTimeline()
|
|||||||
|
|
||||||
m_lockHighlight = m_nextLockHighlight;
|
m_lockHighlight = m_nextLockHighlight;
|
||||||
|
|
||||||
|
const auto iconSize = ImGui::CalcTextSize( ICON_FA_NOTE_STICKY );
|
||||||
for( auto& ann : m_annotations )
|
for( auto& ann : m_annotations )
|
||||||
{
|
{
|
||||||
if( ann->range.min < m_vd.zvEnd && ann->range.max > m_vd.zvStart )
|
if( ann->range.min < m_vd.zvEnd && ann->range.max > m_vd.zvStart )
|
||||||
{
|
{
|
||||||
uint32_t c0 = ( ann->color & 0xFFFFFF ) | ( m_selectedAnnotation == ann.get() ? 0x44000000 : 0x22000000 );
|
uint32_t c0 = ( ann->color & 0xFFFFFF ) | ( m_selectedAnnotation == ann.get() ? 0x22000000 : 0x11000000 );
|
||||||
uint32_t c1 = ( ann->color & 0xFFFFFF ) | ( m_selectedAnnotation == ann.get() ? 0x66000000 : 0x44000000 );
|
uint32_t c1 = ( ann->color & 0xFFFFFF ) | ( m_selectedAnnotation == ann.get() ? 0x88000000 : 0x66000000 );
|
||||||
uint32_t c2 = ( ann->color & 0xFFFFFF ) | ( m_selectedAnnotation == ann.get() ? 0xCC000000 : 0xAA000000 );
|
uint32_t c2 = ( ann->color & 0xFFFFFF ) | ( m_selectedAnnotation == ann.get() ? 0xDD000000 : 0xBB000000 );
|
||||||
draw->AddRectFilled( linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns, 0 ), linepos + ImVec2( ( ann->range.max - m_vd.zvStart ) * pxns, lineh ), c0 );
|
|
||||||
DrawLine( draw, linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns + 0.5f, 0.5f ), linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns + 0.5f, lineh + 0.5f ), ann->range.hiMin ? c2 : c1, ann->range.hiMin ? 2 : 1 );
|
const auto aMin = ( ann->range.min - m_vd.zvStart ) * pxns;
|
||||||
DrawLine( draw, linepos + ImVec2( ( ann->range.max - m_vd.zvStart ) * pxns + 0.5f, 0.5f ), linepos + ImVec2( ( ann->range.max - m_vd.zvStart ) * pxns + 0.5f, lineh + 0.5f ), ann->range.hiMax ? c2 : c1, ann->range.hiMax ? 2 : 1 );
|
const auto aMax = ( ann->range.max - m_vd.zvStart ) * pxns;
|
||||||
if( drawMouseLine && ImGui::IsMouseHoveringRect( linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns, 0 ), linepos + ImVec2( ( ann->range.max - m_vd.zvStart ) * pxns, lineh ) ) )
|
|
||||||
|
draw->AddRectFilled( linepos + ImVec2( aMin, 0 ), linepos + ImVec2( aMax, lineh ), c0 );
|
||||||
|
draw->AddRectFilled( linepos + ImVec2( aMin + 1, lineh - ty * 1.5f ), linepos + ImVec2( aMax - 1, lineh ), 0x88000000 );
|
||||||
|
DrawLine( draw, linepos + ImVec2( aMin + 0.5f, 0.5f ), linepos + ImVec2( aMin + 0.5f, lineh + 0.5f ), ann->range.hiMin ? c2 : c1, ann->range.hiMin ? 2 : 1 );
|
||||||
|
DrawLine( draw, linepos + ImVec2( aMax - 0.5f, 0.5f ), linepos + ImVec2( aMax - 0.5f, lineh + 0.5f ), ann->range.hiMax ? c2 : c1, ann->range.hiMax ? 2 : 1 );
|
||||||
|
|
||||||
|
if( drawMouseLine && ImGui::IsMouseHoveringRect( linepos + ImVec2( aMin, 0 ), linepos + ImVec2( aMax, lineh ) ) )
|
||||||
{
|
{
|
||||||
ImGui::BeginTooltip();
|
ImGui::BeginTooltip();
|
||||||
if( ann->text.empty() )
|
if( ann->text.empty() )
|
||||||
@@ -442,27 +464,22 @@ void View::DrawTimeline()
|
|||||||
TextFocused( "Annotation length:", TimeToString( ann->range.max - ann->range.min ) );
|
TextFocused( "Annotation length:", TimeToString( ann->range.max - ann->range.min ) );
|
||||||
ImGui::EndTooltip();
|
ImGui::EndTooltip();
|
||||||
}
|
}
|
||||||
const auto aw = ( ann->range.max - ann->range.min ) * pxns;
|
|
||||||
if( aw > th * 4 )
|
|
||||||
{
|
|
||||||
draw->AddCircleFilled( linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns + th * 2, th * 2 ), th, 0x88AABB22 );
|
|
||||||
draw->AddCircle( linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns + th * 2, th * 2 ), th, 0xAAAABB22 );
|
|
||||||
if( drawMouseLine && IsMouseClicked( 0 ) && ImGui::IsMouseHoveringRect( linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns + th, th ), linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns + th * 3, th * 3 ) ) )
|
|
||||||
{
|
|
||||||
m_selectedAnnotation = ann.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
const auto aw = ( ann->range.max - ann->range.min ) * pxns;
|
||||||
|
if( aw > ty + iconSize.x )
|
||||||
|
{
|
||||||
|
draw->AddText( linepos + ImVec2( aMin + ty * 0.5f, lineh - ty * 1.25f ), ann->color | 0xFF000000, ICON_FA_NOTE_STICKY );
|
||||||
if( !ann->text.empty() )
|
if( !ann->text.empty() )
|
||||||
{
|
{
|
||||||
const auto tw = ImGui::CalcTextSize( ann->text.c_str() ).x;
|
const auto tw = ImGui::CalcTextSize( ann->text.c_str() ).x;
|
||||||
if( aw - th*4 > tw )
|
if( aw > ty + iconSize.x + tw )
|
||||||
{
|
{
|
||||||
draw->AddText( linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns + th * 4, th * 0.5 ), 0xFFFFFFFF, ann->text.c_str() );
|
draw->AddText( linepos + ImVec2( aMin + ty + iconSize.x, lineh - ty * 1.25f ), 0xFFFFFFFF, ann->text.c_str() );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
draw->PushClipRect( linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns, 0 ), linepos + ImVec2( ( ann->range.max - m_vd.zvStart ) * pxns, lineh ), true );
|
draw->PushClipRect( linepos + ImVec2( aMin + 1, lineh - ty * 1.5f ), linepos + ImVec2( aMax - 1, lineh ) );
|
||||||
draw->AddText( linepos + ImVec2( ( ann->range.min - m_vd.zvStart ) * pxns + th * 4, th * 0.5 ), 0xFFFFFFFF, ann->text.c_str() );
|
draw->AddText( linepos + ImVec2( aMin + ty + iconSize.x, lineh - ty * 1.25f ), 0xFFFFFFFF, ann->text.c_str() );
|
||||||
draw->PopClipRect();
|
draw->PopClipRect();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -485,7 +502,6 @@ void View::DrawTimeline()
|
|||||||
draw->AddRect( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x4488DD88 );
|
draw->AddRect( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x4488DD88 );
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto scale = GetScale();
|
|
||||||
if( m_findZone.range.active && ( m_findZone.show || m_showRanges ) )
|
if( m_findZone.range.active && ( m_findZone.show || m_showRanges ) )
|
||||||
{
|
{
|
||||||
const auto px0 = ( m_findZone.range.min - m_vd.zvStart ) * pxns;
|
const auto px0 = ( m_findZone.range.min - m_vd.zvStart ) * pxns;
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
|
||||||
#include "TracyImGui.hpp"
|
#include "TracyImGui.hpp"
|
||||||
|
#include "TracyNameGen.hpp"
|
||||||
#include "TracyPrint.hpp"
|
#include "TracyPrint.hpp"
|
||||||
#include "TracyView.hpp"
|
#include "TracyView.hpp"
|
||||||
#include "tracy_pdqsort.h"
|
#include "tracy_pdqsort.h"
|
||||||
@@ -55,8 +56,22 @@ void View::DrawInfo()
|
|||||||
char buf[256];
|
char buf[256];
|
||||||
buf[descsz] = '\0';
|
buf[descsz] = '\0';
|
||||||
memcpy( buf, desc.c_str(), descsz );
|
memcpy( buf, desc.c_str(), descsz );
|
||||||
ImGui::SetNextItemWidth( -1 );
|
|
||||||
if( ImGui::InputTextWithHint( "##traceDesc", "Enter description of the trace", buf, 256 ) )
|
const char* buttonText = ICON_FA_DICE;
|
||||||
|
auto buttonSize = ImGui::CalcTextSize( buttonText );
|
||||||
|
buttonSize.x += ImGui::GetStyle().FramePadding.x * 2.0f + ImGui::GetStyle().ItemSpacing.x;
|
||||||
|
ImGui::SetNextItemWidth( ImGui::GetContentRegionAvail().x - buttonSize.x );
|
||||||
|
bool changed = ImGui::InputTextWithHint( "##traceDesc", "Enter description of the trace", buf, 256 );
|
||||||
|
ImGui::SameLine();
|
||||||
|
if( ImGui::Button( buttonText ) )
|
||||||
|
{
|
||||||
|
changed = true;
|
||||||
|
const auto name = GenerateAbstractName();
|
||||||
|
const auto len = std::min( sizeof( buf ) - 1, name.size() );
|
||||||
|
memcpy( buf, name.c_str(), len );
|
||||||
|
buf[len] = '\0';
|
||||||
|
}
|
||||||
|
if( changed )
|
||||||
{
|
{
|
||||||
m_userData.SetDescription( buf );
|
m_userData.SetDescription( buf );
|
||||||
if( m_stcb ) UpdateTitle();
|
if( m_stcb ) UpdateTitle();
|
||||||
|
|||||||
@@ -524,7 +524,7 @@ static const char* GetHostInfo()
|
|||||||
auto ptr = buf;
|
auto ptr = buf;
|
||||||
#if defined _WIN32
|
#if defined _WIN32
|
||||||
# if defined TRACY_WIN32_NO_DESKTOP
|
# if defined TRACY_WIN32_NO_DESKTOP
|
||||||
auto GetVersion = &::GetVersionEx;
|
auto GetVersion = &::GetVersionExW;
|
||||||
# else
|
# else
|
||||||
auto GetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" );
|
auto GetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" );
|
||||||
# endif
|
# endif
|
||||||
@@ -1408,9 +1408,30 @@ namespace
|
|||||||
// 1a. But s_queue is needed for initialization of variables in point 2.
|
// 1a. But s_queue is needed for initialization of variables in point 2.
|
||||||
extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
|
extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
|
||||||
|
|
||||||
|
// A producer token may be created before s_initTime is constructed (the dynamic loader
|
||||||
|
// runs shared object initializers before any of the executable's constructors, and such
|
||||||
|
// an initializer may emit a zone). Remember the time of such an early token creation, so
|
||||||
|
// that the init time can be backdated accordingly and no event timestamp precedes the
|
||||||
|
// trace epoch.
|
||||||
|
static std::atomic<int64_t> s_earlyTokenTime { 0 };
|
||||||
|
static bool s_initTimeConstructed = false;
|
||||||
|
|
||||||
// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread.
|
// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread.
|
||||||
thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue );
|
thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue );
|
||||||
thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) };
|
|
||||||
|
static moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* CreateProducerToken()
|
||||||
|
{
|
||||||
|
auto ptr = s_queue.get_explicit_producer( s_token_detail );
|
||||||
|
if( !s_initTimeConstructed )
|
||||||
|
{
|
||||||
|
const auto t = Profiler::GetTime();
|
||||||
|
auto e = s_earlyTokenTime.load( std::memory_order_relaxed );
|
||||||
|
while( ( e == 0 || t < e ) && !s_earlyTokenTime.compare_exchange_weak( e, t, std::memory_order_relaxed ) ) {}
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
thread_local ProducerWrapper init_order(108) s_token { CreateProducerToken() };
|
||||||
thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() };
|
thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() };
|
||||||
|
|
||||||
# ifdef _MSC_VER
|
# ifdef _MSC_VER
|
||||||
@@ -1419,12 +1440,36 @@ thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThr
|
|||||||
# pragma init_seg( ".CRT$XCB" )
|
# pragma init_seg( ".CRT$XCB" )
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() };
|
static int64_t GetInitTimeImpl()
|
||||||
|
{
|
||||||
|
auto t = SetupHwTimer();
|
||||||
|
const auto e = s_earlyTokenTime.load( std::memory_order_relaxed );
|
||||||
|
if( e != 0 && e < t ) t = e;
|
||||||
|
s_initTimeConstructed = true;
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
static InitTimeWrapper init_order(101) s_initTime { GetInitTimeImpl() };
|
||||||
std::atomic<int> init_order(102) RpInitDone( 0 );
|
std::atomic<int> init_order(102) RpInitDone( 0 );
|
||||||
std::atomic<int> init_order(102) RpInitLock( 0 );
|
std::atomic<int> init_order(102) RpInitLock( 0 );
|
||||||
thread_local bool RpThreadInitDone = false;
|
thread_local bool RpThreadInitDone = false;
|
||||||
thread_local bool RpThreadShutdown = false;
|
thread_local bool RpThreadShutdown = false;
|
||||||
moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc );
|
moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc );
|
||||||
|
|
||||||
|
# ifndef _MSC_VER
|
||||||
|
// An instrumented shared object may emit zones from its static initializers, which the
|
||||||
|
// dynamic loader runs before any of the executable's constructors, including the
|
||||||
|
// priority-ordered constructor of s_queue above. The main thread producer token (s_token)
|
||||||
|
// is then lazily created against the zero-initialized queue memory, and the queue
|
||||||
|
// constructor subsequently orphans it, making all zones emitted on the main thread
|
||||||
|
// invisible to the consumer. Re-adopt such a producer here. If no zones were emitted up
|
||||||
|
// to this point, this only triggers construction of s_token, which is a no-op repair.
|
||||||
|
struct EarlyMainThreadTokenRepair
|
||||||
|
{
|
||||||
|
EarlyMainThreadTokenRepair() { if( s_token.ptr ) s_queue.readopt_orphaned_producer( s_token.ptr ); }
|
||||||
|
};
|
||||||
|
static EarlyMainThreadTokenRepair init_order(104) s_earlyMainThreadTokenRepair;
|
||||||
|
# endif
|
||||||
|
|
||||||
std::atomic<uint32_t> init_order(104) s_lockCounter( 0 );
|
std::atomic<uint32_t> init_order(104) s_lockCounter( 0 );
|
||||||
std::atomic<uint8_t> init_order(104) s_gpuCtxCounter( 0 );
|
std::atomic<uint8_t> init_order(104) s_gpuCtxCounter( 0 );
|
||||||
|
|
||||||
@@ -5167,6 +5212,164 @@ TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_contex
|
|||||||
tracy::Profiler::QueueSerialFinish();
|
tracy::Profiler::QueueSerialFinish();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct __tracy_shared_lockable_context_data {
|
||||||
|
struct __tracy_lockable_context_data m_base;
|
||||||
|
};
|
||||||
|
|
||||||
|
TRACY_API struct __tracy_shared_lockable_context_data* ___tracy_announce_shared_lockable_ctx( const struct ___tracy_source_location_data* srcloc )
|
||||||
|
{
|
||||||
|
struct __tracy_shared_lockable_context_data *lockdata = (__tracy_shared_lockable_context_data*)tracy::tracy_malloc( sizeof( __tracy_shared_lockable_context_data ) );
|
||||||
|
lockdata->m_base.m_id = tracy::GetLockCounter().fetch_add( 1, std::memory_order_relaxed );
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
new(&lockdata->m_base.m_lockCount) std::atomic<uint32_t>( 0 );
|
||||||
|
new(&lockdata->m_base.m_active) std::atomic<bool>( false );
|
||||||
|
#endif
|
||||||
|
assert( lockdata->m_base.m_id != (std::numeric_limits<uint32_t>::max)() );
|
||||||
|
|
||||||
|
auto item = tracy::Profiler::QueueSerial();
|
||||||
|
tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockAnnounce );
|
||||||
|
tracy::MemWrite( &item->lockAnnounce.id, lockdata->m_base.m_id );
|
||||||
|
tracy::MemWrite( &item->lockAnnounce.time, tracy::Profiler::GetTime() );
|
||||||
|
tracy::MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
|
||||||
|
tracy::MemWrite( &item->lockAnnounce.type, tracy::LockType::SharedLockable );
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
tracy::GetProfiler().DeferItem( *item );
|
||||||
|
#endif
|
||||||
|
tracy::Profiler::QueueSerialFinish();
|
||||||
|
|
||||||
|
return lockdata;
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API void ___tracy_terminate_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata )
|
||||||
|
{
|
||||||
|
auto item = tracy::Profiler::QueueSerial();
|
||||||
|
tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockTerminate );
|
||||||
|
tracy::MemWrite( &item->lockTerminate.id, lockdata->m_base.m_id );
|
||||||
|
tracy::MemWrite( &item->lockTerminate.time, tracy::Profiler::GetTime() );
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
tracy::GetProfiler().DeferItem( *item );
|
||||||
|
#endif
|
||||||
|
tracy::Profiler::QueueSerialFinish();
|
||||||
|
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
lockdata->m_base.m_lockCount.~atomic();
|
||||||
|
lockdata->m_base.m_active.~atomic();
|
||||||
|
#endif
|
||||||
|
tracy::tracy_free((void*)lockdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API int32_t ___tracy_before_lock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata )
|
||||||
|
{
|
||||||
|
return ___tracy_before_lock_lockable_ctx( &lockdata->m_base );
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API void ___tracy_after_lock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata )
|
||||||
|
{
|
||||||
|
___tracy_after_lock_lockable_ctx( &lockdata->m_base );
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API void ___tracy_after_unlock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata )
|
||||||
|
{
|
||||||
|
___tracy_after_unlock_lockable_ctx( &lockdata->m_base );
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API void ___tracy_after_try_lock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata, int32_t acquired )
|
||||||
|
{
|
||||||
|
___tracy_after_try_lock_lockable_ctx( &lockdata->m_base, acquired );
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API int32_t ___tracy_before_lock_shared_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata )
|
||||||
|
{
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
bool queue = false;
|
||||||
|
const auto locks = lockdata->m_base.m_lockCount.fetch_add( 1, std::memory_order_relaxed );
|
||||||
|
const auto active = lockdata->m_base.m_active.load( std::memory_order_relaxed );
|
||||||
|
if( locks == 0 || active )
|
||||||
|
{
|
||||||
|
const bool connected = tracy::GetProfiler().IsConnected();
|
||||||
|
if( active != connected ) lockdata->m_base.m_active.store( connected, std::memory_order_relaxed );
|
||||||
|
if( connected ) queue = true;
|
||||||
|
}
|
||||||
|
if( !queue ) return static_cast<int32_t>(false);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
auto item = tracy::Profiler::QueueSerial();
|
||||||
|
tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockSharedWait );
|
||||||
|
tracy::MemWrite( &item->lockWait.thread, tracy::GetThreadHandle() );
|
||||||
|
tracy::MemWrite( &item->lockWait.id, lockdata->m_base.m_id );
|
||||||
|
tracy::MemWrite( &item->lockWait.time, tracy::Profiler::GetTime() );
|
||||||
|
tracy::Profiler::QueueSerialFinish();
|
||||||
|
return static_cast<int32_t>(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API void ___tracy_after_lock_shared_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata )
|
||||||
|
{
|
||||||
|
auto item = tracy::Profiler::QueueSerial();
|
||||||
|
tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockSharedObtain );
|
||||||
|
tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() );
|
||||||
|
tracy::MemWrite( &item->lockObtain.id, lockdata->m_base.m_id );
|
||||||
|
tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() );
|
||||||
|
tracy::Profiler::QueueSerialFinish();
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API void ___tracy_after_unlock_shared_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata )
|
||||||
|
{
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
lockdata->m_base.m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
|
||||||
|
if( !lockdata->m_base.m_active.load( std::memory_order_relaxed ) ) return;
|
||||||
|
if( !tracy::GetProfiler().IsConnected() )
|
||||||
|
{
|
||||||
|
lockdata->m_base.m_active.store( false, std::memory_order_relaxed );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
auto item = tracy::Profiler::QueueSerial();
|
||||||
|
tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockSharedRelease );
|
||||||
|
tracy::MemWrite( &item->lockReleaseShared.thread, tracy::GetThreadHandle() );
|
||||||
|
tracy::MemWrite( &item->lockReleaseShared.id, lockdata->m_base.m_id );
|
||||||
|
tracy::MemWrite( &item->lockReleaseShared.time, tracy::Profiler::GetTime() );
|
||||||
|
tracy::Profiler::QueueSerialFinish();
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API void ___tracy_after_try_lock_shared_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata, int32_t acquired )
|
||||||
|
{
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
if( !acquired ) return;
|
||||||
|
|
||||||
|
bool queue = false;
|
||||||
|
const auto locks = lockdata->m_base.m_lockCount.fetch_add( 1, std::memory_order_relaxed );
|
||||||
|
const auto active = lockdata->m_base.m_active.load( std::memory_order_relaxed );
|
||||||
|
if( locks == 0 || active )
|
||||||
|
{
|
||||||
|
const bool connected = tracy::GetProfiler().IsConnected();
|
||||||
|
if( active != connected ) lockdata->m_base.m_active.store( connected, std::memory_order_relaxed );
|
||||||
|
if( connected ) queue = true;
|
||||||
|
}
|
||||||
|
if( !queue ) return;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if( acquired )
|
||||||
|
{
|
||||||
|
auto item = tracy::Profiler::QueueSerial();
|
||||||
|
tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain );
|
||||||
|
tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() );
|
||||||
|
tracy::MemWrite( &item->lockObtain.id, lockdata->m_base.m_id );
|
||||||
|
tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() );
|
||||||
|
tracy::Profiler::QueueSerialFinish();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API void ___tracy_mark_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc )
|
||||||
|
{
|
||||||
|
___tracy_mark_lockable_ctx( &lockdata->m_base, srcloc );
|
||||||
|
}
|
||||||
|
|
||||||
|
TRACY_API void ___tracy_custom_name_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata, const char* name, size_t nameSz )
|
||||||
|
{
|
||||||
|
___tracy_custom_name_lockable_ctx( &lockdata->m_base, name, nameSz );
|
||||||
|
}
|
||||||
|
|
||||||
TRACY_API int32_t ___tracy_connected( void )
|
TRACY_API int32_t ___tracy_connected( void )
|
||||||
{
|
{
|
||||||
return static_cast<int32_t>( tracy::GetProfiler().IsConnected() );
|
return static_cast<int32_t>( tracy::GetProfiler().IsConnected() );
|
||||||
|
|||||||
@@ -52,20 +52,8 @@ public:
|
|||||||
RingBuffer( const RingBuffer& ) = delete;
|
RingBuffer( const RingBuffer& ) = delete;
|
||||||
RingBuffer& operator=( const RingBuffer& ) = delete;
|
RingBuffer& operator=( const RingBuffer& ) = delete;
|
||||||
|
|
||||||
RingBuffer( RingBuffer&& other )
|
RingBuffer( RingBuffer&& other ) = delete;
|
||||||
{
|
RingBuffer& operator=( RingBuffer&& other ) = delete;
|
||||||
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
|
|
||||||
m_metadata = nullptr;
|
|
||||||
m_fd = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
RingBuffer& operator=( RingBuffer&& other )
|
|
||||||
{
|
|
||||||
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
|
|
||||||
m_metadata = nullptr;
|
|
||||||
m_fd = 0;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsValid() const { return m_metadata != nullptr; }
|
bool IsValid() const { return m_metadata != nullptr; }
|
||||||
int GetId() const { return m_id; }
|
int GetId() const { return m_id; }
|
||||||
|
|||||||
@@ -105,6 +105,9 @@ uint8_t gpu_context_allocate( ToolData* data )
|
|||||||
tracy::MemWrite( &item->gpuNewContext.context, context_id );
|
tracy::MemWrite( &item->gpuNewContext.context, context_id );
|
||||||
tracy::MemWrite( &item->gpuNewContext.flags, GpuContextFlags( context_flags ) );
|
tracy::MemWrite( &item->gpuNewContext.flags, GpuContextFlags( context_flags ) );
|
||||||
tracy::MemWrite( &item->gpuNewContext.type, tracy::GpuContextType::Rocprof );
|
tracy::MemWrite( &item->gpuNewContext.type, tracy::GpuContextType::Rocprof );
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
GetProfiler().DeferItem( *item );
|
||||||
|
#endif
|
||||||
tracy::Profiler::QueueSerialFinish();
|
tracy::Profiler::QueueSerialFinish();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,6 +124,9 @@ uint8_t gpu_context_allocate( ToolData* data )
|
|||||||
tracy::MemWrite( &item->gpuContextNameFat.context, context_id );
|
tracy::MemWrite( &item->gpuContextNameFat.context, context_id );
|
||||||
tracy::MemWrite( &item->gpuContextNameFat.ptr, uint64_t( cloned_name ) );
|
tracy::MemWrite( &item->gpuContextNameFat.ptr, uint64_t( cloned_name ) );
|
||||||
tracy::MemWrite( &item->gpuContextNameFat.size, uint16_t( name_length ) );
|
tracy::MemWrite( &item->gpuContextNameFat.size, uint16_t( name_length ) );
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
GetProfiler().DeferItem( *item );
|
||||||
|
#endif
|
||||||
tracy::Profiler::QueueSerialFinish();
|
tracy::Profiler::QueueSerialFinish();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -380,8 +386,10 @@ void tool_callback_tracing_callback( rocprofiler_callback_tracing_record_t recor
|
|||||||
{
|
{
|
||||||
assert( callback_data != nullptr );
|
assert( callback_data != nullptr );
|
||||||
ToolData* data = static_cast<ToolData*>( callback_data );
|
ToolData* data = static_cast<ToolData*>( callback_data );
|
||||||
if( !data->init ) return;
|
|
||||||
|
|
||||||
|
// Kernel symbol registrations happen at HIP init time, before any Tracy
|
||||||
|
// client connects (and before data->init is set). Record them regardless
|
||||||
|
// of init state so that kernel names are available when profiling starts.
|
||||||
if( record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
if( record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
||||||
record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER )
|
record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER )
|
||||||
{
|
{
|
||||||
@@ -398,7 +406,13 @@ void tool_callback_tracing_callback( rocprofiler_callback_tracing_record_t recor
|
|||||||
data->client_kernels.erase( sym_data->kernel_id );
|
data->client_kernels.erase( sym_data->kernel_id );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if( record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH )
|
|
||||||
|
// Gate dispatch and memory-copy recording on data->init, which is set
|
||||||
|
// once the GPU context is allocated (under TRACY_ON_DEMAND this waits
|
||||||
|
// for a client connection).
|
||||||
|
if( !data->init ) return;
|
||||||
|
|
||||||
|
if( record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH )
|
||||||
{
|
{
|
||||||
auto* rdata = static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>( record.payload );
|
auto* rdata = static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>( record.payload );
|
||||||
if( record.operation == ROCPROFILER_KERNEL_DISPATCH_ENQUEUE )
|
if( record.operation == ROCPROFILER_KERNEL_DISPATCH_ENQUEUE )
|
||||||
|
|||||||
@@ -171,8 +171,8 @@ struct ConcurrentQueueDefaultTraits
|
|||||||
#if defined(malloc) || defined(free)
|
#if defined(malloc) || defined(free)
|
||||||
// Gah, this is 2015, stop defining macros that break standard code already!
|
// Gah, this is 2015, stop defining macros that break standard code already!
|
||||||
// Work around malloc/free being special macros:
|
// Work around malloc/free being special macros:
|
||||||
static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
|
static inline void* WORKAROUND_malloc(size_t size) { return tracy::tracy_malloc(size); }
|
||||||
static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
|
static inline void WORKAROUND_free(void* ptr) { return tracy::tracy_free(ptr); }
|
||||||
static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
|
static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
|
||||||
static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
|
static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
|
||||||
#else
|
#else
|
||||||
@@ -1210,6 +1210,21 @@ private:
|
|||||||
return static_cast<ExplicitProducer*>(token.producer);
|
return static_cast<ExplicitProducer*>(token.producer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If a producer token is created before the constructor of a statically allocated
|
||||||
|
// queue runs (which may happen due to the undefined order of static initialization
|
||||||
|
// across module boundaries), the constructor will orphan it by resetting the
|
||||||
|
// producer list. Such a producer is functional, as producer creation works on the
|
||||||
|
// zero-initialized queue memory, but the consumer is not able to see the data it
|
||||||
|
// enqueues. This method links the producer back into the list.
|
||||||
|
bool readopt_orphaned_producer(ExplicitProducer* producer)
|
||||||
|
{
|
||||||
|
for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {
|
||||||
|
if (ptr == static_cast<ProducerBase*>(producer)) return false;
|
||||||
|
}
|
||||||
|
add_producer(static_cast<ProducerBase*>(producer));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
|
|||||||
@@ -492,7 +492,8 @@ enum class GpuContextType : uint8_t
|
|||||||
Metal,
|
Metal,
|
||||||
Custom,
|
Custom,
|
||||||
CUDA,
|
CUDA,
|
||||||
Rocprof
|
Rocprof,
|
||||||
|
WebGPU
|
||||||
};
|
};
|
||||||
|
|
||||||
enum GpuContextFlags : uint8_t
|
enum GpuContextFlags : uint8_t
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ TRACY_API void ___tracy_set_thread_name( const char* name );
|
|||||||
typedef const void* TracyCZoneCtx;
|
typedef const void* TracyCZoneCtx;
|
||||||
|
|
||||||
typedef const void* TracyCLockCtx;
|
typedef const void* TracyCLockCtx;
|
||||||
|
typedef const void* TracyCSharedLockCtx;
|
||||||
|
|
||||||
#define TracyCZone(c,x)
|
#define TracyCZone(c,x)
|
||||||
#define TracyCZoneN(c,x,y)
|
#define TracyCZoneN(c,x,y)
|
||||||
@@ -121,6 +122,20 @@ typedef const void* TracyCLockCtx;
|
|||||||
#define TracyCLockMark(l)
|
#define TracyCLockMark(l)
|
||||||
#define TracyCLockCustomName(l,x,y)
|
#define TracyCLockCustomName(l,x,y)
|
||||||
|
|
||||||
|
#define TracyCSharedLockCtx(l)
|
||||||
|
#define TracyCSharedLockAnnonce(l)
|
||||||
|
#define TracyCSharedLockTerminate(l)
|
||||||
|
#define TracyCSharedLockBeforeLock(l)
|
||||||
|
#define TracyCSharedLockAfterLock(l)
|
||||||
|
#define TracyCSharedLockAfterUnlock(l)
|
||||||
|
#define TracyCSharedLockAfterTryLock(l,x)
|
||||||
|
#define TracyCSharedLockBeforeSharedLock(l)
|
||||||
|
#define TracyCSharedLockAfterSharedLock(l)
|
||||||
|
#define TracyCSharedLockAfterSharedUnlock(l)
|
||||||
|
#define TracyCSharedLockAfterTrySharedLock(l,x)
|
||||||
|
#define TracyCSharedLockMark(l)
|
||||||
|
#define TracyCSharedLockCustomName(l,x,y)
|
||||||
|
|
||||||
#define TracyCIsConnected 0
|
#define TracyCIsConnected 0
|
||||||
#define TracyCIsStarted 0
|
#define TracyCIsStarted 0
|
||||||
|
|
||||||
@@ -207,12 +222,14 @@ struct ___tracy_gpu_time_sync_data {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct __tracy_lockable_context_data;
|
struct __tracy_lockable_context_data;
|
||||||
|
struct __tracy_shared_lockable_context_data;
|
||||||
|
|
||||||
// Some containers don't support storing const types.
|
// Some containers don't support storing const types.
|
||||||
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
|
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
|
||||||
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
|
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
|
||||||
|
|
||||||
typedef struct __tracy_lockable_context_data* TracyCLockCtx;
|
typedef struct __tracy_lockable_context_data* TracyCLockCtx;
|
||||||
|
typedef struct __tracy_shared_lockable_context_data* TracyCSharedLockCtx;
|
||||||
|
|
||||||
#ifdef TRACY_MANUAL_LIFETIME
|
#ifdef TRACY_MANUAL_LIFETIME
|
||||||
TRACY_API void ___tracy_startup_profiler(void);
|
TRACY_API void ___tracy_startup_profiler(void);
|
||||||
@@ -367,6 +384,20 @@ TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_con
|
|||||||
TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc );
|
TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc );
|
||||||
TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz );
|
TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz );
|
||||||
|
|
||||||
|
TRACY_API struct __tracy_shared_lockable_context_data* ___tracy_announce_shared_lockable_ctx( const struct ___tracy_source_location_data* srcloc );
|
||||||
|
TRACY_API void ___tracy_terminate_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata );
|
||||||
|
TRACY_API int32_t ___tracy_before_lock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata );
|
||||||
|
TRACY_API void ___tracy_after_lock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata );
|
||||||
|
TRACY_API void ___tracy_after_unlock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata );
|
||||||
|
TRACY_API void ___tracy_after_try_lock_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata, int32_t acquired );
|
||||||
|
TRACY_API int32_t ___tracy_before_lock_shared_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata );
|
||||||
|
TRACY_API void ___tracy_after_lock_shared_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata );
|
||||||
|
TRACY_API void ___tracy_after_unlock_shared_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata );
|
||||||
|
TRACY_API void ___tracy_after_try_lock_shared_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata, int32_t acquired );
|
||||||
|
TRACY_API void ___tracy_mark_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc );
|
||||||
|
TRACY_API void ___tracy_custom_name_shared_lockable_ctx( struct __tracy_shared_lockable_context_data* lockdata, const char* name, size_t nameSz );
|
||||||
|
|
||||||
|
|
||||||
#define TracyCLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) );
|
#define TracyCLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) );
|
||||||
#define TracyCLockTerminate( lock ) ___tracy_terminate_lockable_ctx( lock );
|
#define TracyCLockTerminate( lock ) ___tracy_terminate_lockable_ctx( lock );
|
||||||
#define TracyCLockBeforeLock( lock ) ___tracy_before_lock_lockable_ctx( lock );
|
#define TracyCLockBeforeLock( lock ) ___tracy_before_lock_lockable_ctx( lock );
|
||||||
@@ -376,6 +407,19 @@ TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_contex
|
|||||||
#define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) );
|
#define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) );
|
||||||
#define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz );
|
#define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz );
|
||||||
|
|
||||||
|
#define TracyCSharedLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_shared_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) );
|
||||||
|
#define TracyCSharedLockTerminate( lock ) ___tracy_terminate_shared_lockable_ctx( lock );
|
||||||
|
#define TracyCSharedLockBeforeLock( lock ) ___tracy_before_lock_shared_lockable_ctx( lock );
|
||||||
|
#define TracyCSharedLockAfterLock( lock ) ___tracy_after_lock_shared_lockable_ctx( lock );
|
||||||
|
#define TracyCSharedLockAfterUnlock( lock ) ___tracy_after_unlock_shared_lockable_ctx( lock );
|
||||||
|
#define TracyCSharedLockAfterTryLock( lock, acquired ) ___tracy_after_try_lock_shared_lockable_ctx( lock, acquired );
|
||||||
|
#define TracyCSharedLockBeforeSharedLock( lock ) ___tracy_before_lock_shared_shared_lockable_ctx( lock );
|
||||||
|
#define TracyCSharedLockAfterSharedLock( lock ) ___tracy_after_lock_shared_shared_lockable_ctx( lock );
|
||||||
|
#define TracyCSharedLockAfterSharedUnlock( lock ) ___tracy_after_unlock_shared_shared_lockable_ctx( lock );
|
||||||
|
#define TracyCSharedLockAfterTrySharedLock( lock, acquired ) ___tracy_after_try_lock_shared_shared_lockable_ctx( lock, acquired );
|
||||||
|
#define TracyCSharedLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_shared_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) );
|
||||||
|
#define TracyCSharedLockCustomName( lock, name, nameSz ) ___tracy_custom_name_shared_lockable_ctx( lock, name, nameSz );
|
||||||
|
|
||||||
#define TracyCIsConnected ___tracy_connected()
|
#define TracyCIsConnected ___tracy_connected()
|
||||||
|
|
||||||
TRACY_API int ___tracy_begin_sampling_profiling( void );
|
TRACY_API int ___tracy_begin_sampling_profiling( void );
|
||||||
|
|||||||
@@ -34,6 +34,9 @@ public:
|
|||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||||
|
# include <chrono>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "Tracy.hpp"
|
#include "Tracy.hpp"
|
||||||
#include "../client/TracyProfiler.hpp"
|
#include "../client/TracyProfiler.hpp"
|
||||||
@@ -106,6 +109,14 @@ public:
|
|||||||
GLint bits;
|
GLint bits;
|
||||||
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
|
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
|
||||||
|
|
||||||
|
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||||
|
// The anchor above is never refreshed; advertise calibration and emit periodic
|
||||||
|
// GpuCalibration events to correct CPU/GPU drift (see Recalibrate). Opt-in,
|
||||||
|
// because Recalibrate() calls glGetInteger64v( GL_TIMESTAMP ), which forces a
|
||||||
|
// CPU/GPU sync.
|
||||||
|
m_prevCalibration = GetHostTimeNs();
|
||||||
|
#endif
|
||||||
|
|
||||||
const float period = 1.f;
|
const float period = 1.f;
|
||||||
const auto thread = GetThreadHandle();
|
const auto thread = GetThreadHandle();
|
||||||
TracyLfqPrepare( QueueType::GpuNewContext );
|
TracyLfqPrepare( QueueType::GpuNewContext );
|
||||||
@@ -114,7 +125,11 @@ public:
|
|||||||
MemWrite( &item->gpuNewContext.thread, thread );
|
MemWrite( &item->gpuNewContext.thread, thread );
|
||||||
MemWrite( &item->gpuNewContext.period, period );
|
MemWrite( &item->gpuNewContext.period, period );
|
||||||
MemWrite( &item->gpuNewContext.context, m_context );
|
MemWrite( &item->gpuNewContext.context, m_context );
|
||||||
|
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||||
|
MemWrite( &item->gpuNewContext.flags, GpuContextFlags( GpuContextCalibration ) );
|
||||||
|
#else
|
||||||
MemWrite( &item->gpuNewContext.flags, GpuContextFlags( 0 ) );
|
MemWrite( &item->gpuNewContext.flags, GpuContextFlags( 0 ) );
|
||||||
|
#endif
|
||||||
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
||||||
|
|
||||||
#ifdef TRACY_ON_DEMAND
|
#ifdef TRACY_ON_DEMAND
|
||||||
@@ -143,8 +158,6 @@ public:
|
|||||||
{
|
{
|
||||||
ZoneScopedC( Color::Red4 );
|
ZoneScopedC( Color::Red4 );
|
||||||
|
|
||||||
if( m_tail == m_head ) return;
|
|
||||||
|
|
||||||
#ifdef TRACY_ON_DEMAND
|
#ifdef TRACY_ON_DEMAND
|
||||||
if( !GetProfiler().IsConnected() )
|
if( !GetProfiler().IsConnected() )
|
||||||
{
|
{
|
||||||
@@ -153,6 +166,14 @@ public:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||||
|
// Before the drain's early-returns, so it runs even on frames with no
|
||||||
|
// completed queries.
|
||||||
|
Recalibrate();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if( m_tail == m_head ) return;
|
||||||
|
|
||||||
while( m_tail != m_head )
|
while( m_tail != m_head )
|
||||||
{
|
{
|
||||||
GLint available;
|
GLint available;
|
||||||
@@ -173,6 +194,38 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||||
|
// Monotonic host ns for the inter-calibration interval (cpuDelta), kept
|
||||||
|
// separate from Profiler::GetTime() as in the D3D12/Vulkan backends.
|
||||||
|
static tracy_force_inline int64_t GetHostTimeNs()
|
||||||
|
{
|
||||||
|
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||||
|
std::chrono::steady_clock::now().time_since_epoch() ).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenGL has no atomic CPU+GPU timestamp query, so sample back-to-back; the
|
||||||
|
// gap is negligible against the recalibration interval below. Note this forces
|
||||||
|
// a CPU/GPU sync, which is why the whole path is opt-in (TRACY_OPENGL_AUTO_CALIBRATION).
|
||||||
|
tracy_force_inline void Recalibrate()
|
||||||
|
{
|
||||||
|
const int64_t hostNow = GetHostTimeNs();
|
||||||
|
const int64_t delta = hostNow - m_prevCalibration;
|
||||||
|
if( delta < 1000ll * 1000 * 1000 ) return; // throttle: ~once per second
|
||||||
|
|
||||||
|
int64_t tgpu;
|
||||||
|
glGetInteger64v( GL_TIMESTAMP, &tgpu );
|
||||||
|
const int64_t refCpu = Profiler::GetTime();
|
||||||
|
m_prevCalibration = hostNow;
|
||||||
|
|
||||||
|
TracyLfqPrepare( QueueType::GpuCalibration );
|
||||||
|
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
|
||||||
|
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
|
||||||
|
MemWrite( &item->gpuCalibration.cpuDelta, delta );
|
||||||
|
MemWrite( &item->gpuCalibration.context, m_context );
|
||||||
|
TracyLfqCommit;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
tracy_force_inline unsigned int NextQueryId()
|
tracy_force_inline unsigned int NextQueryId()
|
||||||
{
|
{
|
||||||
const auto id = m_head;
|
const auto id = m_head;
|
||||||
@@ -196,6 +249,10 @@ private:
|
|||||||
|
|
||||||
unsigned int m_head;
|
unsigned int m_head;
|
||||||
unsigned int m_tail;
|
unsigned int m_tail;
|
||||||
|
|
||||||
|
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||||
|
int64_t m_prevCalibration; // host-ns timestamp of the last emitted calibration
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
class GpuCtxScope
|
class GpuCtxScope
|
||||||
|
|||||||
968
public/tracy/TracyWebGPU.hpp
Normal file
968
public/tracy/TracyWebGPU.hpp
Normal file
@@ -0,0 +1,968 @@
|
|||||||
|
#ifndef __TRACYWEBGPU_HPP__
|
||||||
|
#define __TRACYWEBGPU_HPP__
|
||||||
|
|
||||||
|
// WebGPU, unlike other graphics APIs, has many annoying restrictions that complicate
|
||||||
|
// the design of the Tracy WebGPU back-end:
|
||||||
|
// - there's no CPU/GPU clock calibration API
|
||||||
|
// - submitting GPU commands that touch a buffer that the host is mapping is not permitted
|
||||||
|
// - resolving timestamps require destination offsets aligned to 256 bytes
|
||||||
|
// - timestamps are only available at pass granularity (implementations may need to emulate this)
|
||||||
|
// - spec mandates timestamps to be in nanoseconds (implementationw may need to emulate this)
|
||||||
|
|
||||||
|
#ifndef TRACY_ENABLE
|
||||||
|
|
||||||
|
#define TracyWebGPUSetupDeviceDescriptor(deviceDescriptor)
|
||||||
|
|
||||||
|
#define TracyWebGPUContext(instance, device, queue) nullptr
|
||||||
|
#define TracyWebGPUDestroy(ctx)
|
||||||
|
#define TracyWebGPUContextName(ctx, name, size)
|
||||||
|
|
||||||
|
#define TracyWebGPUZone(ctx, encoder, passDesc, name)
|
||||||
|
#define TracyWebGPUZoneC(ctx, encoder, passDesc, name, color)
|
||||||
|
#define TracyWebGPUNamedZone(ctx, varname, encoder, passDesc, name, active)
|
||||||
|
#define TracyWebGPUNamedZoneC(ctx, varname, encoder, passDesc, name, color, active)
|
||||||
|
#define TracyWebGPUZoneTransient(ctx, varname, encoder, passDesc, name, active)
|
||||||
|
|
||||||
|
#define TracyWebGPUZoneS(ctx, encoder, passDesc, name, depth)
|
||||||
|
#define TracyWebGPUZoneCS(ctx, encoder, passDesc, name, color, depth)
|
||||||
|
#define TracyWebGPUNamedZoneS(ctx, varname, encoder, passDesc, name, depth, active)
|
||||||
|
#define TracyWebGPUNamedZoneCS(ctx, varname, encoder, passDesc, name, color, depth, active)
|
||||||
|
#define TracyWebGPUZoneTransientS(ctx, varname, encoder, passDesc, name, depth, active)
|
||||||
|
|
||||||
|
#define TracyWebGPUCollect(ctx)
|
||||||
|
|
||||||
|
namespace tracy
|
||||||
|
{
|
||||||
|
class WebGPUZoneScope {};
|
||||||
|
}
|
||||||
|
|
||||||
|
using TracyWebGPUCtx = void*;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#include "Tracy.hpp"
|
||||||
|
#include "../client/TracyProfiler.hpp"
|
||||||
|
#include "../client/TracyCallstack.hpp"
|
||||||
|
#include "../common/TracyAlign.hpp"
|
||||||
|
#include "../common/TracyAlloc.hpp"
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <mutex>
|
||||||
|
#include <vector>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cassert>
|
||||||
|
#include <chrono>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#include <webgpu/webgpu.h>
|
||||||
|
|
||||||
|
// piggy-back on WGPU_DAWN_TOGGLES_DESCRIPTOR_INIT to detect Dawn header
|
||||||
|
#ifdef WGPU_DAWN_TOGGLES_DESCRIPTOR_INIT
|
||||||
|
#define TRACY_WEBGPU_DAWN_NATIVE (1)
|
||||||
|
#include <dawn/native/DawnNative.h>
|
||||||
|
#else
|
||||||
|
#define TRACY_WEBGPU_WGPU_NATIVE (1)
|
||||||
|
#include <webgpu/wgpu.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef TRACY_WEBGPU_DEBUG_LEVEL
|
||||||
|
#define TRACY_WEBGPU_DEBUG_LEVEL (0)
|
||||||
|
#endif//TRACY_WEBGPU_DEBUG_LEVEL
|
||||||
|
|
||||||
|
#if TRACY_WEBGPU_DEBUG_LEVEL
|
||||||
|
#define TracyWebGPUDebug(...) __VA_ARGS__;
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
extern "C" int32_t IsDebuggerPresent(void);
|
||||||
|
#define TracyWebGPUBreak() if (IsDebuggerPresent()) __debugbreak()
|
||||||
|
#else
|
||||||
|
#define TracyWebGPUBreak() ((void)0)
|
||||||
|
#endif
|
||||||
|
#define TracyWebGPUAssert(predicate, ...) if (predicate) {} else { __VA_ARGS__; TracyWebGPUBreak(); }
|
||||||
|
#else
|
||||||
|
#define TracyWebGPUDebug(...)
|
||||||
|
#define TracyWebGPUBreak()
|
||||||
|
#define TracyWebGPUAssert(predicate, ...) assert(predicate);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define TracyWebGPULog(severity, msg) fprintf(stdout, "%s", msg), tracy::Profiler::LogString( tracy::MessageSourceType::Tracy, tracy::MessageSeverity::severity, tracy::Color::Red4, 0, msg );
|
||||||
|
#define TracyWebGPUPanic(msg, ...) do { TracyWebGPULog(Error, msg); TracyWebGPUAssert(false && "TracyWebGPU: " msg); __VA_ARGS__; } while(false);
|
||||||
|
|
||||||
|
namespace tracy
|
||||||
|
{
|
||||||
|
|
||||||
|
class WebGPUQueueCtx
|
||||||
|
{
|
||||||
|
friend class WebGPUZoneScope;
|
||||||
|
|
||||||
|
uint8_t m_contextId = 255; // 255 represents "invalid id"
|
||||||
|
|
||||||
|
std::mutex m_collectionMutex;
|
||||||
|
|
||||||
|
WGPUInstance m_instance = nullptr;
|
||||||
|
WGPUDevice m_device = nullptr;
|
||||||
|
WGPUQueue m_queue = nullptr;
|
||||||
|
|
||||||
|
struct ReadbackStage
|
||||||
|
{
|
||||||
|
WGPUBuffer buffer = nullptr;
|
||||||
|
std::atomic<uint64_t> copiedUpto {0};
|
||||||
|
std::atomic<WGPUMapAsyncStatus> mapStatus = {};
|
||||||
|
WGPUFuture pendingFuture = {};
|
||||||
|
};
|
||||||
|
static_assert(std::atomic<WGPUMapAsyncStatus>::is_always_lock_free, "WGPUMapAsyncStatus must be lock-free atomic");
|
||||||
|
|
||||||
|
WGPUQuerySet m_querySet = nullptr;
|
||||||
|
WGPUBuffer m_resolveBuffer = nullptr;
|
||||||
|
ReadbackStage m_readbackReel [3];
|
||||||
|
std::atomic<int> m_writeIdx {0};
|
||||||
|
|
||||||
|
using atomic_counter = std::atomic<uint64_t>;
|
||||||
|
atomic_counter m_queryCounter = 0;
|
||||||
|
atomic_counter m_previousCheckpoint = 0;
|
||||||
|
|
||||||
|
uint32_t m_queryLimit = 0;
|
||||||
|
|
||||||
|
std::vector<uint64_t> m_shadowBuffer;
|
||||||
|
|
||||||
|
using WallTime = std::chrono::steady_clock::time_point;
|
||||||
|
static tracy_force_inline auto GetWallTime() { return WallTime::clock::now(); }
|
||||||
|
static tracy_force_inline auto Milliseconds(int value) { return std::chrono::milliseconds(value); }
|
||||||
|
|
||||||
|
static bool WaitQueueIdle(WGPUQueue queue, WGPUInstance instance)
|
||||||
|
{
|
||||||
|
bool gpuDone = false;
|
||||||
|
WGPUQueueWorkDoneCallbackInfo doneCB = {};
|
||||||
|
doneCB.mode = WGPUCallbackMode_AllowProcessEvents;
|
||||||
|
doneCB.callback = [](WGPUQueueWorkDoneStatus, WGPUStringView, void* userData, void*) {
|
||||||
|
*static_cast<bool*>(userData) = true;
|
||||||
|
};
|
||||||
|
doneCB.userdata1 = &gpuDone;
|
||||||
|
wgpuQueueOnSubmittedWorkDone(queue, doneCB);
|
||||||
|
|
||||||
|
const auto deadline = GetWallTime() + Milliseconds(2000);
|
||||||
|
while (!gpuDone && GetWallTime() < deadline)
|
||||||
|
wgpuInstanceProcessEvents(instance);
|
||||||
|
return gpuDone;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const uint64_t* MapBufferSync(WGPUBuffer buffer, WGPUInstance instance)
|
||||||
|
{
|
||||||
|
struct MapCtx { WGPUMapAsyncStatus status = {}; } ctx;
|
||||||
|
WGPUBufferMapCallbackInfo cbInfo = {};
|
||||||
|
cbInfo.mode = WGPUCallbackMode_AllowProcessEvents;
|
||||||
|
cbInfo.callback = [](WGPUMapAsyncStatus status, WGPUStringView, void* userData, void*) {
|
||||||
|
auto* ctx = static_cast<MapCtx*>(userData);
|
||||||
|
ctx->status = status;
|
||||||
|
};
|
||||||
|
cbInfo.userdata1 = &ctx;
|
||||||
|
size_t offset = 0;
|
||||||
|
size_t size = 2 * sizeof(uint64_t);
|
||||||
|
wgpuBufferMapAsync(buffer, WGPUMapMode_Read, offset, size, cbInfo);
|
||||||
|
|
||||||
|
const auto deadline = GetWallTime() + Milliseconds(2000);
|
||||||
|
while (ctx.status == 0 && GetWallTime() < deadline)
|
||||||
|
wgpuInstanceProcessEvents(instance);
|
||||||
|
|
||||||
|
if (ctx.status != WGPUMapAsyncStatus_Success) return nullptr;
|
||||||
|
auto data = wgpuBufferGetConstMappedRange(buffer, offset, size);
|
||||||
|
return static_cast<const uint64_t*>(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Calibration {
|
||||||
|
int64_t minCpuRange = ~uint64_t(0) >> 1;
|
||||||
|
struct Regression
|
||||||
|
{
|
||||||
|
int64_t n = 0;
|
||||||
|
int64_t mean_x = 0;
|
||||||
|
int64_t mean_y = 0;
|
||||||
|
int64_t S_xx = 0;
|
||||||
|
int64_t S_xy = 0;
|
||||||
|
void Update(int64_t x, int64_t y)
|
||||||
|
{
|
||||||
|
n += 1;
|
||||||
|
int64_t dx = x - mean_x;
|
||||||
|
int64_t dy = y - mean_y;
|
||||||
|
mean_x += dx / n;
|
||||||
|
mean_y += dy / n;
|
||||||
|
S_xx += dx * (x - mean_x);
|
||||||
|
S_xy += dx * (y - mean_y);
|
||||||
|
}
|
||||||
|
double Slope() const { return double(S_xy) / S_xx; }
|
||||||
|
double Intercept() const { return mean_y - Slope() * mean_x; }
|
||||||
|
};
|
||||||
|
Regression cpuToGpuModel; // cpu-ticks to gpu-ticks
|
||||||
|
Regression cpuRangeModel; // cpu-tick interval uncertainty
|
||||||
|
Regression wallToGpuModel; // nanoseconds to gpu-ticks
|
||||||
|
void GetReferenceTime(uint64_t& cpuTime, uint64_t& gpuTime) const
|
||||||
|
{
|
||||||
|
// the mean belongs to the regression line
|
||||||
|
cpuTime = cpuToGpuModel.mean_x;
|
||||||
|
gpuTime = cpuToGpuModel.mean_y;
|
||||||
|
}
|
||||||
|
double Period() const { return 1.0 / wallToGpuModel.Slope(); } // ns/tick
|
||||||
|
bool AcceptX(const Regression& r, int64_t x, double threshold = 3.0) const {
|
||||||
|
if (r.n < 2) return true;
|
||||||
|
auto dx = x - r.mean_x;
|
||||||
|
if (dx <= 0) return true; // always accept "tighter" outliers
|
||||||
|
double variance = double(r.S_xx) / (r.n - 1);
|
||||||
|
if (variance == 0.0) return true;
|
||||||
|
// WARN: dx*dx "could" overflow, but very unlikely in practice
|
||||||
|
double zz = (double)(dx*dx) / variance;
|
||||||
|
return zz <= (threshold*threshold);
|
||||||
|
}
|
||||||
|
bool Update(WallTime twall0, WallTime twall1, uint64_t tcpu0, uint64_t tcpu1, uint64_t tgpu)
|
||||||
|
{
|
||||||
|
using namespace std::chrono;
|
||||||
|
int64_t cpuRange = tcpu1 - tcpu0;
|
||||||
|
cpuRangeModel.Update(cpuRange, 0);
|
||||||
|
if (!AcceptX(cpuRangeModel, cpuRange, 1.0)) return false;
|
||||||
|
// Process sample:
|
||||||
|
int64_t tcpu = tcpu0 + (tcpu1 - tcpu0) / 2; // mid-point
|
||||||
|
int64_t twall = duration_cast<nanoseconds>(
|
||||||
|
(twall0 + (twall1 - twall0) / 2) // mid-point
|
||||||
|
.time_since_epoch()
|
||||||
|
).count();
|
||||||
|
// incremental regression:
|
||||||
|
cpuToGpuModel.Update(tcpu, tgpu);
|
||||||
|
wallToGpuModel.Update(twall, tgpu);
|
||||||
|
TracyWebGPUDebug( fprintf(stderr, "----- (sample accepted! wall = %lld | cpu = %lld | gpu = %lld | period = %f)\n", twall, tcpu, tgpu, Period()) );
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} m_calibration;
|
||||||
|
|
||||||
|
tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item)
|
||||||
|
{
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
GetProfiler().DeferItem(*item);
|
||||||
|
#endif
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CalibrateClocks(uint64_t& outCpuTime, uint64_t& outGpuTime, double& period)
|
||||||
|
{
|
||||||
|
// WebGPU does not have any clock calibration API.
|
||||||
|
// This routine attempts to estimates a reasonable (cpuTime, gpuTime) correlation
|
||||||
|
// by sampling CPU and GPU timestamps around a "synchronous" draw call.
|
||||||
|
// Several samples are taken to tighten the estimation.
|
||||||
|
|
||||||
|
ZoneScoped;
|
||||||
|
|
||||||
|
WGPUShaderSourceWGSL wgslSrc = {};
|
||||||
|
wgslSrc.chain.sType = WGPUSType_ShaderSourceWGSL;
|
||||||
|
wgslSrc.code =
|
||||||
|
{
|
||||||
|
R"(
|
||||||
|
@vertex fn vs(@builtin(vertex_index) i: u32) -> @builtin(position) vec4f {
|
||||||
|
var p = array(vec4f(-1,-1,.5,1), vec4f(3,-1,.5,1), vec4f(-1,3,.5,1));
|
||||||
|
return p[i];
|
||||||
|
}
|
||||||
|
@fragment fn fs() -> @location(0) vec4f { return vec4f(0.0); }
|
||||||
|
)",
|
||||||
|
WGPU_STRLEN
|
||||||
|
};
|
||||||
|
WGPUShaderModuleDescriptor smDesc = {};
|
||||||
|
smDesc.nextInChain = reinterpret_cast<WGPUChainedStruct*>(&wgslSrc);
|
||||||
|
WGPUShaderModule calibShader = wgpuDeviceCreateShaderModule(m_device, &smDesc);
|
||||||
|
if (!calibShader) { TracyWebGPUPanic("Failed to create calibration shader.", return false); }
|
||||||
|
|
||||||
|
WGPUTextureDescriptor texDesc = {};
|
||||||
|
texDesc.usage = WGPUTextureUsage_RenderAttachment;
|
||||||
|
texDesc.dimension = WGPUTextureDimension_2D;
|
||||||
|
texDesc.size = { 1, 1, 1 };
|
||||||
|
texDesc.format = WGPUTextureFormat_BGRA8Unorm;
|
||||||
|
texDesc.mipLevelCount = 1;
|
||||||
|
texDesc.sampleCount = 1;
|
||||||
|
WGPUTexture tex = wgpuDeviceCreateTexture(m_device, &texDesc);
|
||||||
|
if (!tex) { wgpuShaderModuleRelease(calibShader); TracyWebGPUPanic("Failed to create calibration scratch texture.", return false); }
|
||||||
|
WGPUTextureView texView = wgpuTextureCreateView(tex, nullptr);
|
||||||
|
if (!texView) { wgpuTextureRelease(tex); wgpuShaderModuleRelease(calibShader); TracyWebGPUPanic("Failed to create calibration scratch texture view.", return false); }
|
||||||
|
|
||||||
|
WGPUColorTargetState colorTarget = {};
|
||||||
|
colorTarget.format = WGPUTextureFormat_BGRA8Unorm;
|
||||||
|
colorTarget.writeMask = WGPUColorWriteMask_All;
|
||||||
|
WGPUFragmentState fragState = {};
|
||||||
|
fragState.module = calibShader;
|
||||||
|
fragState.entryPoint = { "fs", WGPU_STRLEN };
|
||||||
|
fragState.targetCount = 1;
|
||||||
|
fragState.targets = &colorTarget;
|
||||||
|
WGPURenderPipelineDescriptor pipeDesc = {};
|
||||||
|
pipeDesc.vertex.module = calibShader;
|
||||||
|
pipeDesc.vertex.entryPoint = { "vs", WGPU_STRLEN };
|
||||||
|
pipeDesc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
|
||||||
|
pipeDesc.multisample.count = 1;
|
||||||
|
pipeDesc.fragment = &fragState;
|
||||||
|
WGPURenderPipeline calibPipeline = wgpuDeviceCreateRenderPipeline(m_device, &pipeDesc);
|
||||||
|
if (!calibPipeline) { wgpuTextureViewRelease(texView); wgpuTextureRelease(tex); wgpuShaderModuleRelease(calibShader); TracyWebGPUPanic("Failed to create calibration pipeline.", return false); }
|
||||||
|
|
||||||
|
uint32_t queryId = 0;
|
||||||
|
WGPUPassTimestampWrites anchorTs = {};
|
||||||
|
anchorTs.querySet = m_querySet;
|
||||||
|
anchorTs.beginningOfPassWriteIndex = queryId;
|
||||||
|
anchorTs.endOfPassWriteIndex = queryId+1;
|
||||||
|
|
||||||
|
WGPURenderPassColorAttachment att = {};
|
||||||
|
att.view = texView;
|
||||||
|
att.loadOp = WGPULoadOp_Clear;
|
||||||
|
att.storeOp = WGPUStoreOp_Store;
|
||||||
|
att.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
|
||||||
|
|
||||||
|
WGPURenderPassDescriptor passDesc = {};
|
||||||
|
passDesc.colorAttachmentCount = 1;
|
||||||
|
passDesc.colorAttachments = &att;
|
||||||
|
passDesc.timestampWrites = &anchorTs;
|
||||||
|
|
||||||
|
// calibration loop
|
||||||
|
const auto deadline = GetWallTime() + Milliseconds(100);
|
||||||
|
for (int i = 0; i < 1000; ++i)
|
||||||
|
{
|
||||||
|
// loop until time budget (100ms) allows, but ensure at least 5 iterations
|
||||||
|
if ((GetWallTime() >= deadline) && (i > 5))
|
||||||
|
break;
|
||||||
|
|
||||||
|
WGPUCommandEncoder enc = wgpuDeviceCreateCommandEncoder(m_device, nullptr);
|
||||||
|
if (!enc) { TracyWebGPUPanic("Failed to create command encoder for time calibration.", return false); }
|
||||||
|
|
||||||
|
WGPURenderPassEncoder pass = wgpuCommandEncoderBeginRenderPass(enc, &passDesc);
|
||||||
|
wgpuRenderPassEncoderSetPipeline(pass, calibPipeline);
|
||||||
|
wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
|
||||||
|
wgpuRenderPassEncoderEnd(pass);
|
||||||
|
wgpuRenderPassEncoderRelease(pass);
|
||||||
|
|
||||||
|
WGPUBuffer readBackBuffer = m_readbackReel[0].buffer;
|
||||||
|
uint32_t byteOffset = queryId * sizeof(uint64_t);
|
||||||
|
uint32_t sizeInBytes = 2 * sizeof(uint64_t);
|
||||||
|
wgpuCommandEncoderResolveQuerySet(enc, m_querySet, queryId, 2, m_resolveBuffer, byteOffset);
|
||||||
|
wgpuCommandEncoderCopyBufferToBuffer(enc, m_resolveBuffer, byteOffset, readBackBuffer, byteOffset, sizeInBytes);
|
||||||
|
|
||||||
|
WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(enc, nullptr);
|
||||||
|
wgpuCommandEncoderRelease(enc);
|
||||||
|
if (!cmd) { TracyWebGPUPanic("Failed to finish calibration command encoder.", return false); }
|
||||||
|
|
||||||
|
WaitQueueIdle(m_queue, m_instance);
|
||||||
|
int64_t cpu [2] = {};
|
||||||
|
int64_t gpu [2] = {};
|
||||||
|
WallTime wall [2] = {};
|
||||||
|
cpu[0] = Profiler::GetTime();
|
||||||
|
wall[0] = GetWallTime();
|
||||||
|
wgpuQueueSubmit(m_queue, 1, &cmd);
|
||||||
|
wgpuCommandBufferRelease(cmd);
|
||||||
|
WaitQueueIdle(m_queue, m_instance);
|
||||||
|
wall[1] = GetWallTime();
|
||||||
|
cpu[1] = Profiler::GetTime();
|
||||||
|
auto gpuTimestamps = MapBufferSync(readBackBuffer, m_instance);
|
||||||
|
TracyWebGPUAssert(gpuTimestamps != nullptr);
|
||||||
|
gpu[0] = gpuTimestamps[0];
|
||||||
|
gpu[1] = gpuTimestamps[1];
|
||||||
|
wgpuBufferUnmap(readBackBuffer);
|
||||||
|
TracyWebGPUDebug(
|
||||||
|
fprintf(stdout, "[%03d] CalibrateClocks() [CPU] %16lld | %16lld | /// %lld\n", i, cpu[0], cpu[1], cpu[1]-cpu[0]);
|
||||||
|
fprintf(stdout, "----------------------- [GPU] %16llu | %16llu | /// %lld\n", gpu[0], gpu[1], gpu[1]-gpu[0]);
|
||||||
|
uint64_t cpuTimeRef, gpuTimeRef;
|
||||||
|
m_calibration.GetReferenceTime(cpuTimeRef, gpuTimeRef);
|
||||||
|
if (gpu[0] < gpuTimeRef)
|
||||||
|
fprintf(stdout, "!!!!! CalibrateClocks() -> WARNING!!! going backwards!\n%llu\n%llu\n%lld\n", gpuTimeRef, gpu[0], gpu[0] - gpuTimeRef);
|
||||||
|
);
|
||||||
|
|
||||||
|
// skip first sample since it is quite jittery (lazy intialization of WebGPU objects)
|
||||||
|
if (i == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
m_calibration.Update(wall[0], wall[1], cpu[0], cpu[1], gpu[0]);
|
||||||
|
};
|
||||||
|
|
||||||
|
TracyWebGPUDebug(
|
||||||
|
fprintf(stdout, "##### CalibrateClocks() WALL = %lld | CPU = %lld | GPU = %lld | period = %f\n",
|
||||||
|
m_calibration.wallToGpuModel.mean_x,
|
||||||
|
m_calibration.cpuToGpuModel.mean_x,
|
||||||
|
m_calibration.cpuToGpuModel.mean_y,
|
||||||
|
m_calibration.Period());
|
||||||
|
);
|
||||||
|
|
||||||
|
wgpuRenderPipelineRelease(calibPipeline);
|
||||||
|
wgpuShaderModuleRelease(calibShader);
|
||||||
|
wgpuTextureViewRelease(texView);
|
||||||
|
wgpuTextureRelease(tex);
|
||||||
|
|
||||||
|
m_calibration.GetReferenceTime(outCpuTime, outGpuTime);
|
||||||
|
period = m_calibration.Period();
|
||||||
|
// assume 1 ns/tick if the period estimation is close enough to 1
|
||||||
|
if (std::abs(period - 1.0) < 0.001)
|
||||||
|
period = 1.0;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
class Requirements
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
# if (TRACY_WEBGPU_DAWN_NATIVE)
|
||||||
|
WGPUDawnTogglesDescriptor dawnTogglesDesc = {};
|
||||||
|
static constexpr int NumExtras = 0;
|
||||||
|
# elif (TRACY_WEBGPU_WGPU_NATIVE)
|
||||||
|
static constexpr int NumExtras = 1;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
public:
|
||||||
|
static constexpr int NumFeatures = 1 + NumExtras;
|
||||||
|
WGPUFeatureName features [NumFeatures] = {};
|
||||||
|
WGPUChainedStruct* togglesDesc = nullptr;
|
||||||
|
|
||||||
|
Requirements()
|
||||||
|
{
|
||||||
|
this->features[0] = WGPUFeatureName_TimestampQuery;
|
||||||
|
# if (TRACY_WEBGPU_WGPU_NATIVE)
|
||||||
|
this->features[1] = (WGPUFeatureName)WGPUNativeFeature_TimestampQueryInsideEncoders;
|
||||||
|
# endif
|
||||||
|
# if (TRACY_WEBGPU_DAWN_NATIVE)
|
||||||
|
static const char* dawnDisabledToggles[] = { "timestamp_quantization" };
|
||||||
|
static const char* dawnEnabledToggles[] = { "disable_timestamp_query_conversion" };
|
||||||
|
this->dawnTogglesDesc.chain.sType = WGPUSType_DawnTogglesDescriptor;
|
||||||
|
this->dawnTogglesDesc.disabledToggles = dawnDisabledToggles;
|
||||||
|
this->dawnTogglesDesc.disabledToggleCount = 1;
|
||||||
|
this->dawnTogglesDesc.enabledToggles = dawnEnabledToggles;
|
||||||
|
this->dawnTogglesDesc.enabledToggleCount = 1;
|
||||||
|
this->togglesDesc = reinterpret_cast<WGPUChainedStruct*>(&this->dawnTogglesDesc);
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool VerifyDevice(WGPUDevice device)
|
||||||
|
{
|
||||||
|
if (device == nullptr)
|
||||||
|
return false;
|
||||||
|
if (wgpuDeviceHasFeature(device, WGPUFeatureName_TimestampQuery) == WGPU_FALSE)
|
||||||
|
return false;
|
||||||
|
# if (TRACY_WEBGPU_DAWN_NATIVE)
|
||||||
|
bool hasDisableConversion = false, hasQuantization = false;
|
||||||
|
for (const char* t : ::dawn::native::GetTogglesUsed(device))
|
||||||
|
{
|
||||||
|
if (strcmp(t, "disable_timestamp_query_conversion") == 0)
|
||||||
|
hasDisableConversion = true;
|
||||||
|
if (strcmp(t, "timestamp_quantization") == 0)
|
||||||
|
hasQuantization = true;
|
||||||
|
}
|
||||||
|
return hasDisableConversion && !hasQuantization;
|
||||||
|
# elif (TRACY_WEBGPU_WGPU_NATIVE)
|
||||||
|
if (wgpuDeviceHasFeature(device, (WGPUFeatureName)WGPUNativeFeature_TimestampQueryInsideEncoders) == WGPU_FALSE)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
# endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ApplyToDeviceDescriptor(WGPUDeviceDescriptor& deviceDescriptor)
|
||||||
|
{
|
||||||
|
size_t userCount = deviceDescriptor.requiredFeatureCount;
|
||||||
|
size_t totalCount = userCount + NumFeatures;
|
||||||
|
// NOTE: this allocation will leak...
|
||||||
|
auto* mergedFeatures = static_cast<WGPUFeatureName*>(tracy_malloc(totalCount * sizeof(WGPUFeatureName)));
|
||||||
|
if (userCount > 0 && deviceDescriptor.requiredFeatures)
|
||||||
|
memcpy(mergedFeatures, deviceDescriptor.requiredFeatures, userCount * sizeof(WGPUFeatureName));
|
||||||
|
memcpy(mergedFeatures + userCount, features, NumFeatures * sizeof(WGPUFeatureName));
|
||||||
|
deviceDescriptor.requiredFeatures = mergedFeatures;
|
||||||
|
deviceDescriptor.requiredFeatureCount = totalCount;
|
||||||
|
|
||||||
|
if (togglesDesc)
|
||||||
|
{
|
||||||
|
togglesDesc->next = deviceDescriptor.nextInChain;
|
||||||
|
deviceDescriptor.nextInChain = togglesDesc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
WebGPUQueueCtx(WGPUInstance instance, WGPUDevice device, WGPUQueue queue)
|
||||||
|
{
|
||||||
|
ZoneScopedC(Color::Red4);
|
||||||
|
|
||||||
|
if (!Requirements::VerifyDevice(device))
|
||||||
|
TracyWebGPUPanic("GPU profiling disabled because the device did not enable the necessary features.", return)
|
||||||
|
|
||||||
|
TracyWebGPUAssert(instance); wgpuInstanceAddRef(instance); m_instance = instance;
|
||||||
|
TracyWebGPUAssert(device); wgpuDeviceAddRef(device); m_device = device;
|
||||||
|
TracyWebGPUAssert(queue); wgpuQueueAddRef(queue); m_queue = queue;
|
||||||
|
|
||||||
|
// Setup Query Set: must have even size since queries are issued in pairs.
|
||||||
|
// (The WebGPU spec mandates 4096, with no way to query the device limit.)
|
||||||
|
WGPUQuerySetDescriptor qsDesc = {};
|
||||||
|
qsDesc.type = WGPUQueryType_Timestamp;
|
||||||
|
qsDesc.count = 4096;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
m_querySet = wgpuDeviceCreateQuerySet(m_device, &qsDesc);
|
||||||
|
if (m_querySet) break;
|
||||||
|
qsDesc.count /= 2;
|
||||||
|
if (qsDesc.count < 128) break;
|
||||||
|
}
|
||||||
|
if (m_querySet == nullptr)
|
||||||
|
TracyWebGPUPanic("Failed to create timestamp query set.", return);
|
||||||
|
m_queryLimit = qsDesc.count;
|
||||||
|
|
||||||
|
WGPUBufferDescriptor resolveDesc = {};
|
||||||
|
resolveDesc.usage = WGPUBufferUsage_QueryResolve | WGPUBufferUsage_CopySrc;
|
||||||
|
resolveDesc.size = static_cast<uint64_t>(m_queryLimit) * sizeof(uint64_t);
|
||||||
|
m_resolveBuffer = wgpuDeviceCreateBuffer(m_device, &resolveDesc);
|
||||||
|
if (!m_resolveBuffer)
|
||||||
|
TracyWebGPUPanic("Failed to create timestamp resolve buffer.", return);
|
||||||
|
|
||||||
|
WGPUBufferDescriptor readbackDesc = {};
|
||||||
|
readbackDesc.usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead;
|
||||||
|
readbackDesc.size = static_cast<uint64_t>(m_queryLimit) * sizeof(uint64_t);
|
||||||
|
for (auto& stage : m_readbackReel)
|
||||||
|
{
|
||||||
|
stage.buffer = wgpuDeviceCreateBuffer(m_device, &readbackDesc);
|
||||||
|
stage.copiedUpto = 0;
|
||||||
|
if (!stage.buffer) { TracyWebGPUPanic("Failed to create timestamp readback buffer.", return); }
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t cpuTimestamp = 0;
|
||||||
|
uint64_t gpuTimestamp = 0;
|
||||||
|
double period = 0.0; // in nanoseconds per gpu-tick
|
||||||
|
if (!CalibrateClocks(cpuTimestamp, gpuTimestamp, period))
|
||||||
|
TracyWebGPUPanic("Failed to calibrate CPU/GPU clocks.", return);
|
||||||
|
|
||||||
|
TracyWebGPUDebug( fprintf(stdout, "[WebGPUQueueCtx] cpuTimestamp: %llu | gpuTimestamp: %llu | period: %f\n", cpuTimestamp, gpuTimestamp, period) );
|
||||||
|
m_shadowBuffer.resize(m_queryLimit, gpuTimestamp);
|
||||||
|
|
||||||
|
// All setup completed: register the context.
|
||||||
|
m_contextId = GetGpuCtxCounter().fetch_add(1);
|
||||||
|
ZoneValue(m_contextId);
|
||||||
|
|
||||||
|
auto* item = Profiler::QueueSerial();
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||||
|
MemWrite(&item->gpuNewContext.cpuTime, static_cast<int64_t>(cpuTimestamp));
|
||||||
|
MemWrite(&item->gpuNewContext.gpuTime, static_cast<int64_t>(gpuTimestamp));
|
||||||
|
MemWrite(&item->gpuNewContext.thread, static_cast<uint32_t>(0));
|
||||||
|
MemWrite(&item->gpuNewContext.period, static_cast<float>(period));
|
||||||
|
MemWrite(&item->gpuNewContext.context, static_cast<uint8_t>(GetId()));
|
||||||
|
MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0)); // no calibration available
|
||||||
|
MemWrite(&item->gpuNewContext.type, GpuContextType::WebGPU);
|
||||||
|
SubmitQueueItem(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
~WebGPUQueueCtx()
|
||||||
|
{
|
||||||
|
// TODO: a few problems to address later during this final Collect():
|
||||||
|
// 1. ensure "partial" query batches are collected
|
||||||
|
// 2. ensure all readback stages are collected and empty
|
||||||
|
// 3. ensure readback buffers are not mapped before deleting them
|
||||||
|
Collect();
|
||||||
|
|
||||||
|
for (auto& stage : m_readbackReel)
|
||||||
|
if (stage.buffer) { wgpuBufferRelease(stage.buffer); stage.buffer = nullptr; }
|
||||||
|
if (m_resolveBuffer) { wgpuBufferRelease(m_resolveBuffer); m_resolveBuffer = nullptr; }
|
||||||
|
if (m_querySet) { wgpuQuerySetRelease(m_querySet); m_querySet = nullptr; }
|
||||||
|
if (m_queue) { wgpuQueueRelease(m_queue); m_queue = nullptr; }
|
||||||
|
if (m_device) { wgpuDeviceRelease(m_device); m_device = nullptr; }
|
||||||
|
if (m_instance) { wgpuInstanceRelease(m_instance); m_instance = nullptr; }
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline uint8_t GetId() const
|
||||||
|
{
|
||||||
|
return m_contextId;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Name(const char* name, uint16_t len)
|
||||||
|
{
|
||||||
|
auto ptr = (char*)tracy_malloc(len);
|
||||||
|
memcpy(ptr, name, len);
|
||||||
|
|
||||||
|
auto item = Profiler::QueueSerial();
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuContextName);
|
||||||
|
MemWrite(&item->gpuContextNameFat.context, GetId());
|
||||||
|
MemWrite(&item->gpuContextNameFat.ptr, (uint64_t)ptr);
|
||||||
|
MemWrite(&item->gpuContextNameFat.size, len);
|
||||||
|
SubmitQueueItem(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Collect(bool webgpuProcessEvents=false)
|
||||||
|
{
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
if (!GetProfiler().IsConnected()) return;
|
||||||
|
#endif
|
||||||
|
if (!m_collectionMutex.try_lock()) return;
|
||||||
|
std::unique_lock<std::mutex> lock(m_collectionMutex, std::adopt_lock);
|
||||||
|
|
||||||
|
ZoneScopedC(Color::Red4);
|
||||||
|
|
||||||
|
if (Distance(m_previousCheckpoint, m_queryCounter) <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Current Readback "Reel" Stages:
|
||||||
|
const int state = m_writeIdx;
|
||||||
|
const int fillingIdx = (state + 0) % 3; // this is where instrumentation is pushing new queries
|
||||||
|
const int pendingIdx = (state + 1) % 3; // instrumentation is done here; ready to be collected
|
||||||
|
const int collectIdx = (state + 2) % 3; // this is where queries are being collected right now
|
||||||
|
|
||||||
|
// Ensure readback buffer has been mapped to the host
|
||||||
|
auto& collectStage = m_readbackReel[collectIdx];
|
||||||
|
if (collectStage.pendingFuture.id != 0)
|
||||||
|
{
|
||||||
|
if (webgpuProcessEvents)
|
||||||
|
wgpuInstanceProcessEvents(m_instance);
|
||||||
|
if (collectStage.mapStatus == WGPUMapAsyncStatus{})
|
||||||
|
return; // callback hasn't fired yet
|
||||||
|
collectStage.pendingFuture = {};
|
||||||
|
if (collectStage.mapStatus != WGPUMapAsyncStatus_Success)
|
||||||
|
TracyWebGPUPanic("Colect(): unable to map readback buffer.", return);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (collectStage.mapStatus == WGPUMapAsyncStatus_Success)
|
||||||
|
{
|
||||||
|
const uint64_t* ts = static_cast<const uint64_t*>(
|
||||||
|
wgpuBufferGetConstMappedRange(collectStage.buffer, 0,
|
||||||
|
static_cast<uint64_t>(m_queryLimit) * sizeof(uint64_t)));
|
||||||
|
if (ts)
|
||||||
|
{
|
||||||
|
uint64_t ticket = m_previousCheckpoint;
|
||||||
|
const uint64_t end = collectStage.copiedUpto;
|
||||||
|
TracyWebGPUDebug( fprintf(stdout, "[TWG] Collect [%d] (%llu, %llu)\n", collectIdx, ticket, end) );
|
||||||
|
for (; Distance(ticket, end) > 0; ticket += 2)
|
||||||
|
{
|
||||||
|
const uint32_t slotB = RingIndex(ticket);
|
||||||
|
const uint32_t slotE = slotB + 1;
|
||||||
|
TracyWebGPUDebug(
|
||||||
|
fprintf(stderr,
|
||||||
|
"[TWG] slot B=%4u E=%4u ts[B]=%llu ts[E]=%llu shadow[E]=%llu ts-diff=%lld shadow-diff=%lld\n",
|
||||||
|
slotB, slotE,
|
||||||
|
ts[slotB], ts[slotE], m_shadowBuffer[slotE],
|
||||||
|
Distance(ts[slotB], ts[slotE]),
|
||||||
|
Distance(m_shadowBuffer[slotE], ts[slotE]));
|
||||||
|
);
|
||||||
|
if (Distance(m_shadowBuffer[slotE], ts[slotE]) <= 0)
|
||||||
|
break; // GPU hasn't written this timestamp yet; retry next Collect()
|
||||||
|
EmitGpuTime(ts[slotB], slotB);
|
||||||
|
EmitGpuTime(ts[slotE], slotE);
|
||||||
|
}
|
||||||
|
m_previousCheckpoint = ticket;
|
||||||
|
|
||||||
|
if (Distance(ticket, end) > 0)
|
||||||
|
return; // still unresolved queries in this buffer; come back next Collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
// All queries resolved (or getMappedRange failed): unmap and fall through to rotate.
|
||||||
|
wgpuBufferUnmap(collectStage.buffer);
|
||||||
|
collectStage.mapStatus = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
// At this point, all queries in the collect buffer have been processed.
|
||||||
|
// (it's now tie to "rotate" the buffers around...)
|
||||||
|
|
||||||
|
// Has any ResolveQueryBatch call landed in this reel stage since it was last recycled?
|
||||||
|
// (Are there any queries to resolve and collect at all?)
|
||||||
|
if (m_readbackReel[fillingIdx].copiedUpto <= m_previousCheckpoint)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Rotate/Cycle the Readback Pipeline State:
|
||||||
|
// the buffer that was just collected shall now be used for instrumentation
|
||||||
|
collectStage.copiedUpto = m_previousCheckpoint.load();
|
||||||
|
m_writeIdx = collectIdx; // atomically commit the pipeline rotation
|
||||||
|
|
||||||
|
auto& nextToCollect = m_readbackReel[pendingIdx];
|
||||||
|
WGPUBufferMapCallbackInfo cbInfo = {};
|
||||||
|
cbInfo.mode = WGPUCallbackMode_AllowProcessEvents;
|
||||||
|
cbInfo.callback = [](WGPUMapAsyncStatus status, WGPUStringView, void* userData, void*)
|
||||||
|
{
|
||||||
|
auto* stage = static_cast<ReadbackStage*>(userData);
|
||||||
|
stage->mapStatus = status;
|
||||||
|
};
|
||||||
|
cbInfo.userdata1 = &nextToCollect;
|
||||||
|
nextToCollect.pendingFuture = wgpuBufferMapAsync(
|
||||||
|
nextToCollect.buffer, WGPUMapMode_Read, 0,
|
||||||
|
static_cast<uint64_t>(m_queryLimit) * sizeof(uint64_t), cbInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void EmitGpuTime(uint64_t gpuTimestamp, uint32_t queryId)
|
||||||
|
{
|
||||||
|
auto* item = Profiler::QueueSerial();
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||||
|
MemWrite(&item->gpuTime.gpuTime, static_cast<int64_t>(gpuTimestamp));
|
||||||
|
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
|
||||||
|
MemWrite(&item->gpuTime.context, GetId());
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
m_shadowBuffer[queryId] = gpuTimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline uint32_t RingCapacity() const { return m_queryLimit; }
|
||||||
|
|
||||||
|
tracy_force_inline uint32_t RingIndex(uint64_t t) const
|
||||||
|
{
|
||||||
|
return static_cast<uint32_t>(t % RingCapacity());
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline static int64_t Distance(uint64_t begin, uint64_t end)
|
||||||
|
{
|
||||||
|
return static_cast<int64_t>(end - begin);
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline uint64_t NextQueryId()
|
||||||
|
{
|
||||||
|
const uint64_t ticket = m_queryCounter.fetch_add(2, std::memory_order_relaxed);
|
||||||
|
if (Distance(m_previousCheckpoint, ticket)
|
||||||
|
>= static_cast<int64_t>(RingCapacity()))
|
||||||
|
{
|
||||||
|
TracyWebGPULog(Warning, "Too many pending GPU queries: stalling!");
|
||||||
|
Collect();
|
||||||
|
}
|
||||||
|
return ticket;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class WebGPUZoneScope
|
||||||
|
{
|
||||||
|
const bool m_active;
|
||||||
|
WebGPUQueueCtx* m_ctx = nullptr;
|
||||||
|
WGPUCommandEncoder m_encoder = nullptr;
|
||||||
|
uint64_t m_rawTicket = 0;
|
||||||
|
uint32_t m_queryId = 0;
|
||||||
|
|
||||||
|
WGPUPassTimestampWrites m_timestampWrites = {};
|
||||||
|
|
||||||
|
void ResolveQueryBatch(uint32_t queryBatchStartId)
|
||||||
|
{
|
||||||
|
// 32 queries = 32 * 8 bytes = 256 bytes
|
||||||
|
TracyWebGPUAssert(queryBatchStartId % 32 == 0, return);
|
||||||
|
queryBatchStartId = m_ctx->RingIndex(queryBatchStartId);
|
||||||
|
|
||||||
|
const uint64_t blockOffset = static_cast<uint64_t>(queryBatchStartId) * sizeof(uint64_t);
|
||||||
|
wgpuCommandEncoderResolveQuerySet(
|
||||||
|
m_encoder,
|
||||||
|
m_ctx->m_querySet,
|
||||||
|
queryBatchStartId, 32,
|
||||||
|
m_ctx->m_resolveBuffer,
|
||||||
|
blockOffset // MUST be a multiple of (aligned to) 256...
|
||||||
|
);
|
||||||
|
|
||||||
|
auto& stage = m_ctx->m_readbackReel[m_ctx->m_writeIdx];
|
||||||
|
auto readbackBuffer = stage.buffer;
|
||||||
|
wgpuCommandEncoderCopyBufferToBuffer(
|
||||||
|
m_encoder,
|
||||||
|
m_ctx->m_resolveBuffer,
|
||||||
|
blockOffset,
|
||||||
|
readbackBuffer,
|
||||||
|
blockOffset,
|
||||||
|
32 * sizeof(uint64_t)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Advance this stage's high-water mark to cover the block just encoded.
|
||||||
|
// TODO: maybe we can use fetch_add to increment the atomic and not need
|
||||||
|
// to keep track of the raw ticket; Collect would need to derive the raw
|
||||||
|
// end ticket number.
|
||||||
|
const uint64_t blockEnd = m_rawTicket;
|
||||||
|
uint64_t prev = stage.copiedUpto;
|
||||||
|
while ((WebGPUQueueCtx::Distance(prev, blockEnd) > 0) &&
|
||||||
|
!stage.copiedUpto.compare_exchange_weak(prev, blockEnd)) {}
|
||||||
|
TracyWebGPUDebug( fprintf(stdout, "[TWG] WebGPUZoneScope [%d] (%d,%d)\n", (int)m_ctx->m_writeIdx, queryBatchStartId, queryBatchStartId+32) );
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline void WriteQueueItem(const SourceLocationData* srcLocation, int32_t callstackDepth, uint32_t sourceLine, const char* sourceFile, size_t sourceFileLen, const char* functionName, size_t functionNameLen, const char* zoneName, size_t zoneNameLen)
|
||||||
|
{
|
||||||
|
if (!m_active) return;
|
||||||
|
|
||||||
|
const bool captureCallstack = callstackDepth > 0 && has_callstack();
|
||||||
|
const bool transientZone = srcLocation == nullptr;
|
||||||
|
uint64_t srcLocationAddr = reinterpret_cast<uint64_t>(srcLocation);
|
||||||
|
|
||||||
|
QueueItem* item = nullptr;
|
||||||
|
QueueType itemType;
|
||||||
|
if (transientZone)
|
||||||
|
{
|
||||||
|
srcLocationAddr = Profiler::AllocSourceLocation(sourceLine, sourceFile, sourceFileLen, functionName, functionNameLen, zoneName, zoneNameLen);
|
||||||
|
if (captureCallstack)
|
||||||
|
{
|
||||||
|
item = Profiler::QueueSerialCallstack(Callstack(callstackDepth));
|
||||||
|
itemType = QueueType::GpuZoneBeginAllocSrcLocCallstackSerial;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
item = Profiler::QueueSerial();
|
||||||
|
itemType = QueueType::GpuZoneBeginAllocSrcLocSerial;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (captureCallstack)
|
||||||
|
{
|
||||||
|
item = Profiler::QueueSerialCallstack(Callstack(callstackDepth));
|
||||||
|
itemType = QueueType::GpuZoneBeginCallstackSerial;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
item = Profiler::QueueSerial();
|
||||||
|
itemType = QueueType::GpuZoneBeginSerial;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MemWrite(&item->hdr.type, itemType);
|
||||||
|
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||||
|
MemWrite(&item->gpuZoneBegin.srcloc, srcLocationAddr);
|
||||||
|
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||||
|
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||||
|
MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId());
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fills in m_timestampWrites and assigns its address to passDesc.timestampWrites.
|
||||||
|
// Works with both WGPURenderPassDescriptor and WGPUComputePassDescriptor.
|
||||||
|
template<typename PassDescriptor>
|
||||||
|
tracy_force_inline void InitBase(WebGPUQueueCtx* ctx, WGPUCommandEncoder encoder, PassDescriptor& passDesc)
|
||||||
|
{
|
||||||
|
m_ctx = ctx;
|
||||||
|
m_encoder = encoder;
|
||||||
|
|
||||||
|
m_rawTicket = m_ctx->NextQueryId();
|
||||||
|
m_queryId = m_ctx->RingIndex(m_rawTicket);
|
||||||
|
|
||||||
|
m_timestampWrites.querySet = m_ctx->m_querySet;
|
||||||
|
m_timestampWrites.beginningOfPassWriteIndex = m_queryId;
|
||||||
|
m_timestampWrites.endOfPassWriteIndex = m_queryId + 1;
|
||||||
|
passDesc.timestampWrites = &m_timestampWrites;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
template<typename PassDescriptor>
|
||||||
|
tracy_force_inline WebGPUZoneScope(WebGPUQueueCtx* ctx, WGPUCommandEncoder encoder, PassDescriptor& passDesc, const SourceLocationData* srcLocation, bool active)
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
: m_active(active && GetProfiler().IsConnected())
|
||||||
|
#else
|
||||||
|
: m_active(active)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (!m_active || !ctx) return;
|
||||||
|
InitBase(ctx, encoder, passDesc);
|
||||||
|
WriteQueueItem(srcLocation, 0, 0, nullptr, 0, nullptr, 0, nullptr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename PassDescriptor>
|
||||||
|
tracy_force_inline WebGPUZoneScope(WebGPUQueueCtx* ctx, WGPUCommandEncoder encoder, PassDescriptor& passDesc, const SourceLocationData* srcLocation, int32_t depth, bool active)
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
: m_active(active && GetProfiler().IsConnected())
|
||||||
|
#else
|
||||||
|
: m_active(active)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (!m_active || !ctx) return;
|
||||||
|
InitBase(ctx, encoder, passDesc);
|
||||||
|
WriteQueueItem(srcLocation, depth, 0, nullptr, 0, nullptr, 0, nullptr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename PassDescriptor>
|
||||||
|
tracy_force_inline WebGPUZoneScope(WebGPUQueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, WGPUCommandEncoder encoder, PassDescriptor& passDesc, bool active)
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
: m_active(active && GetProfiler().IsConnected())
|
||||||
|
#else
|
||||||
|
: m_active(active)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (!m_active || !ctx) return;
|
||||||
|
InitBase(ctx, encoder, passDesc);
|
||||||
|
WriteQueueItem(nullptr, 0, line, source, sourceSz, function, functionSz, name, nameSz);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename PassDescriptor>
|
||||||
|
tracy_force_inline WebGPUZoneScope(WebGPUQueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, WGPUCommandEncoder encoder, PassDescriptor& passDesc, int32_t depth, bool active)
|
||||||
|
#ifdef TRACY_ON_DEMAND
|
||||||
|
: m_active(active && GetProfiler().IsConnected())
|
||||||
|
#else
|
||||||
|
: m_active(active)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (!m_active || !ctx) return;
|
||||||
|
InitBase(ctx, encoder, passDesc);
|
||||||
|
WriteQueueItem(nullptr, depth, line, source, sourceSz, function, functionSz, name, nameSz);
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy_force_inline ~WebGPUZoneScope()
|
||||||
|
{
|
||||||
|
if (!m_active || !m_ctx) return;
|
||||||
|
|
||||||
|
const auto queryId = m_queryId + 1;
|
||||||
|
|
||||||
|
auto* item = Profiler::QueueSerial();
|
||||||
|
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||||
|
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||||
|
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||||
|
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
|
||||||
|
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||||
|
Profiler::QueueSerialFinish();
|
||||||
|
|
||||||
|
if (m_queryId % 32 == 0)
|
||||||
|
ResolveQueryBatch(m_queryId-32);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void DestroyWebGPUContext(WebGPUQueueCtx* ctx)
|
||||||
|
{
|
||||||
|
if (!ctx) return;
|
||||||
|
ctx->~WebGPUQueueCtx();
|
||||||
|
tracy_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline WebGPUQueueCtx* CreateWebGPUContext(WGPUInstance instance, WGPUDevice device, WGPUQueue queue)
|
||||||
|
{
|
||||||
|
auto* ctx = static_cast<WebGPUQueueCtx*>(tracy_malloc(sizeof(WebGPUQueueCtx)));
|
||||||
|
new (ctx) WebGPUQueueCtx{ instance, device, queue };
|
||||||
|
if (ctx->GetId() == 255)
|
||||||
|
{
|
||||||
|
DestroyWebGPUContext(ctx);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef TracyWebGPUPanic
|
||||||
|
#undef TracyWebGPULog
|
||||||
|
#undef TracyWebGPUAssert
|
||||||
|
#undef TracyWebGPUBreak
|
||||||
|
#undef TracyWebGPUDebug
|
||||||
|
#undef TRACY_WEBGPU_DEBUG_LEVEL
|
||||||
|
|
||||||
|
using TracyWebGPUCtx = tracy::WebGPUQueueCtx*;
|
||||||
|
|
||||||
|
#define TracyWebGPUSetupDeviceDescriptor(deviceDescriptor) tracy::WebGPUQueueCtx::Requirements TracyConcat(__tracy_wgpu_setup_, TracyLine); TracyConcat(__tracy_wgpu_setup_, TracyLine).ApplyToDeviceDescriptor(deviceDescriptor)
|
||||||
|
|
||||||
|
#define TracyWebGPUContext(instance, device, queue) tracy::CreateWebGPUContext(instance, device, queue);
|
||||||
|
#define TracyWebGPUDestroy(ctx) tracy::DestroyWebGPUContext(ctx);
|
||||||
|
#define TracyWebGPUContextName(ctx, name, size) if (ctx) ctx->Name(name, size);
|
||||||
|
|
||||||
|
#define TracyWebGPUUnnamedZone ___tracy_gpu_webgpu_zone
|
||||||
|
#define TracyWebGPUSrcLocSymbol TracyConcat(__tracy_webgpu_source_location,TracyLine)
|
||||||
|
#define TracyWebGPUSrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyWebGPUSrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color };
|
||||||
|
|
||||||
|
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||||
|
# define TracyWebGPUZone(ctx, encoder, passDesc, name) TracyWebGPUNamedZoneS(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, TRACY_CALLSTACK, true)
|
||||||
|
# define TracyWebGPUZoneC(ctx, encoder, passDesc, name, color) TracyWebGPUNamedZoneCS(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, color, TRACY_CALLSTACK, true)
|
||||||
|
# define TracyWebGPUNamedZone(ctx, varname, encoder, passDesc, name, active) TracyWebGPUSrcLocObject(name, 0); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, TRACY_CALLSTACK, active };
|
||||||
|
# define TracyWebGPUNamedZoneC(ctx, varname, encoder, passDesc, name, color, active) TracyWebGPUSrcLocObject(name, color); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, TRACY_CALLSTACK, active };
|
||||||
|
# define TracyWebGPUZoneTransient(ctx, varname, encoder, passDesc, name, active) TracyWebGPUZoneTransientS(ctx, varname, encoder, passDesc, name, TRACY_CALLSTACK, active)
|
||||||
|
#else
|
||||||
|
# define TracyWebGPUZone(ctx, encoder, passDesc, name) TracyWebGPUNamedZone(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, true)
|
||||||
|
# define TracyWebGPUZoneC(ctx, encoder, passDesc, name, color) TracyWebGPUNamedZoneC(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, color, true)
|
||||||
|
# define TracyWebGPUNamedZone(ctx, varname, encoder, passDesc, name, active) TracyWebGPUSrcLocObject(name, 0); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, active };
|
||||||
|
# define TracyWebGPUNamedZoneC(ctx, varname, encoder, passDesc, name, color, active) TracyWebGPUSrcLocObject(name, color); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, active };
|
||||||
|
# define TracyWebGPUZoneTransient(ctx, varname, encoder, passDesc, name, active) tracy::WebGPUZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), encoder, passDesc, active };
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef TRACY_HAS_CALLSTACK
|
||||||
|
# define TracyWebGPUZoneS(ctx, encoder, passDesc, name, depth) TracyWebGPUNamedZoneS(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, depth, true)
|
||||||
|
# define TracyWebGPUZoneCS(ctx, encoder, passDesc, name, color, depth) TracyWebGPUNamedZoneCS(ctx, TracyWebGPUUnnamedZone, encoder, passDesc, name, color, depth, true)
|
||||||
|
# define TracyWebGPUNamedZoneS(ctx, varname, encoder, passDesc, name, depth, active) TracyWebGPUSrcLocObject(name, 0); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, depth, active };
|
||||||
|
# define TracyWebGPUNamedZoneCS(ctx, varname, encoder, passDesc, name, color, depth, active) TracyWebGPUSrcLocObject(name, color); tracy::WebGPUZoneScope varname{ ctx, encoder, passDesc, &TracyWebGPUSrcLocSymbol, depth, active };
|
||||||
|
# define TracyWebGPUZoneTransientS(ctx, varname, encoder, passDesc, name, depth, active) tracy::WebGPUZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), encoder, passDesc, depth, active };
|
||||||
|
#else
|
||||||
|
# define TracyWebGPUZoneS(ctx, encoder, passDesc, name, depth) TracyWebGPUZone(ctx, encoder, passDesc, name)
|
||||||
|
# define TracyWebGPUZoneCS(ctx, encoder, passDesc, name, color, depth) TracyWebGPUZoneC(ctx, encoder, passDesc, name, color)
|
||||||
|
# define TracyWebGPUNamedZoneS(ctx, varname, encoder, passDesc, name, depth, active) TracyWebGPUNamedZone(ctx, varname, encoder, passDesc, name, active)
|
||||||
|
# define TracyWebGPUNamedZoneCS(ctx, varname, encoder, passDesc, name, color, depth, active) TracyWebGPUNamedZoneC(ctx, varname, encoder, passDesc, name, color, active)
|
||||||
|
# define TracyWebGPUZoneTransientS(ctx, varname, encoder, passDesc, name, depth, active) TracyWebGPUZoneTransient(ctx, varname, encoder, passDesc, name, active)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define TracyWebGPUCollect(ctx) if (ctx) ctx->Collect();
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -1033,14 +1033,15 @@ PYBIND11_MODULE( TracyServerBindings, m )
|
|||||||
// --- GPU contexts ---
|
// --- GPU contexts ---
|
||||||
.def( "get_gpu_contexts", []( const Worker& w ) {
|
.def( "get_gpu_contexts", []( const Worker& w ) {
|
||||||
static const char* gpuTypeStr[] = {
|
static const char* gpuTypeStr[] = {
|
||||||
"Invalid", "OpenGL", "Vulkan", "OpenCL", "Direct3D12", "Direct3D11", "Metal", "Custom", "CUDA", "Rocprof" };
|
"Invalid", "OpenGL", "Vulkan", "OpenCL", "Direct3D12", "Direct3D11", "Metal", "Custom", "CUDA", "Rocprof", "WebGPU" };
|
||||||
|
static size_t numTypes = sizeof(gpuTypeStr) / sizeof(gpuTypeStr[0]);
|
||||||
std::vector<GpuContextSummary> result;
|
std::vector<GpuContextSummary> result;
|
||||||
for( const auto* ctx : w.GetGpuData() )
|
for( const auto* ctx : w.GetGpuData() )
|
||||||
{
|
{
|
||||||
if( !ctx ) continue;
|
if( !ctx ) continue;
|
||||||
const std::string name = ctx->name.Active() ? w.GetString( ctx->name ) : "";
|
const std::string name = ctx->name.Active() ? w.GetString( ctx->name ) : "";
|
||||||
const uint8_t typeIdx = (uint8_t)ctx->type;
|
const uint8_t typeIdx = (uint8_t)ctx->type;
|
||||||
const char* typeStr = typeIdx < 10 ? gpuTypeStr[typeIdx] : "Unknown";
|
const char* typeStr = typeIdx < numTypes ? gpuTypeStr[typeIdx] : "Unknown";
|
||||||
result.push_back( GpuContextSummary{
|
result.push_back( GpuContextSummary{
|
||||||
name, ctx->count, std::string( typeStr ), ctx->thread } );
|
name, ctx->count, std::string( typeStr ), ctx->thread } );
|
||||||
}
|
}
|
||||||
|
|||||||
63
tests/rocprof/repro/on_demand/CMakeLists.txt
Normal file
63
tests/rocprof/repro/on_demand/CMakeLists.txt
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.21)
|
||||||
|
project(RocprofOnDemandReproTests LANGUAGES CXX)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
|
||||||
|
set(TRACY_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../.."
|
||||||
|
CACHE PATH "Root of the Tracy repository")
|
||||||
|
set(TRACY_PUBLIC "${TRACY_PATH}/public")
|
||||||
|
|
||||||
|
set(ROCM_PATH "/opt/rocm" CACHE PATH "Root of the ROCm installation")
|
||||||
|
|
||||||
|
# Locate hipcc so the HIP language can be enabled.
|
||||||
|
if(NOT DEFINED CMAKE_HIP_COMPILER)
|
||||||
|
find_program(CMAKE_HIP_COMPILER hipcc HINTS "${ROCM_PATH}/bin")
|
||||||
|
endif()
|
||||||
|
enable_language(HIP)
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
# rocprofiler-sdk: prefer the packaged config, fall back to a bare library
|
||||||
|
# search under ROCM_PATH (mirrors the original Makefile's -lrocprofiler-sdk).
|
||||||
|
find_library(ROCPROFILER_SDK_LIB rocprofiler-sdk HINTS "${ROCM_PATH}/lib")
|
||||||
|
if(NOT ROCPROFILER_SDK_LIB)
|
||||||
|
message(FATAL_ERROR "Could not find librocprofiler-sdk under ${ROCM_PATH}/lib")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Defines shared by the Tracy client and the reproducer. On-demand mode is
|
||||||
|
# the point of this repro: profiling starts when a client connects.
|
||||||
|
set(REPRO_DEFINES TRACY_ENABLE TRACY_ON_DEMAND TRACY_ROCPROF __HIP_PLATFORM_AMD__)
|
||||||
|
|
||||||
|
# Tracy client (built with the rocprof backend enabled).
|
||||||
|
add_library(TracyClient STATIC ${TRACY_PUBLIC}/TracyClient.cpp)
|
||||||
|
target_include_directories(TracyClient PUBLIC ${TRACY_PUBLIC} "${ROCM_PATH}/include")
|
||||||
|
target_compile_definitions(TracyClient PUBLIC ${REPRO_DEFINES})
|
||||||
|
target_link_libraries(TracyClient PUBLIC Threads::Threads ${CMAKE_DL_LIBS})
|
||||||
|
|
||||||
|
# repro: minimal HIP program that emits GPU zones via the rocprof backend.
|
||||||
|
add_executable(repro repro.cpp)
|
||||||
|
set_source_files_properties(repro.cpp PROPERTIES LANGUAGE HIP)
|
||||||
|
target_include_directories(repro PRIVATE "${ROCM_PATH}/include")
|
||||||
|
target_compile_definitions(repro PRIVATE ${REPRO_DEFINES})
|
||||||
|
target_link_libraries(repro PRIVATE TracyClient ${ROCPROFILER_SDK_LIB})
|
||||||
|
|
||||||
|
# check_gpu_ctx_name: loads a .tracy capture and verifies the GPU context
|
||||||
|
# name was deferred to (and received by) a late-connecting client. Links the
|
||||||
|
# Tracy server library, assembled the same way tracy-capture does it
|
||||||
|
# (cmake/server.cmake + vendor.cmake). Off by default since it pulls in the
|
||||||
|
# full server build and its vendored dependencies.
|
||||||
|
option(BUILD_CHECK_TOOL "Build the check_gpu_ctx_name verification helper" OFF)
|
||||||
|
if(BUILD_CHECK_TOOL)
|
||||||
|
set(NO_STATISTICS ON)
|
||||||
|
include(${TRACY_PATH}/cmake/vendor.cmake)
|
||||||
|
include(${TRACY_PATH}/cmake/server.cmake)
|
||||||
|
add_executable(check_gpu_ctx_name check_gpu_ctx_name.cpp)
|
||||||
|
target_compile_features(check_gpu_ctx_name PRIVATE cxx_std_20)
|
||||||
|
target_include_directories(check_gpu_ctx_name PRIVATE ${TRACY_PATH})
|
||||||
|
target_link_libraries(check_gpu_ctx_name PRIVATE TracyServer)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# ctest integration. To run the binaries via ctest:
|
||||||
|
# ctest --test-dir <cmake-build-dir> -R repro
|
||||||
|
enable_testing()
|
||||||
|
add_test(NAME repro COMMAND repro)
|
||||||
74
tests/rocprof/repro/on_demand/README.md
Normal file
74
tests/rocprof/repro/on_demand/README.md
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
# Rocprofiler On-Demand Profiling Repro
|
||||||
|
|
||||||
|
Demonstrates that unpatched Tracy crashes when a profiler connects to a
|
||||||
|
HIP application built with `TRACY_ON_DEMAND` and `TRACY_ROCPROF`.
|
||||||
|
|
||||||
|
## Root cause
|
||||||
|
|
||||||
|
Three bugs in `TracyRocprof.cpp` break on-demand profiling:
|
||||||
|
|
||||||
|
1. **GpuNewContext not deferred.** `gpu_context_allocate()` writes a
|
||||||
|
`GpuNewContext` queue item but does not call `DeferItem()`. When a
|
||||||
|
Tracy client connects late, the context creation message is never
|
||||||
|
replayed. The server then receives `GpuZoneBegin` events for a
|
||||||
|
context it has never seen, triggering:
|
||||||
|
|
||||||
|
Assertion `ctx' failed in ProcessGpuZoneBeginImplCommon
|
||||||
|
|
||||||
|
2. **GpuContextName not deferred.** Same function writes the context
|
||||||
|
name ("rocprofv3") without calling `DeferItem()`. Even after fixing
|
||||||
|
bug 1, a late-connecting client sees the GPU context but it appears
|
||||||
|
unnamed in the profiler. Use `check_gpu_ctx_name` to verify.
|
||||||
|
|
||||||
|
3. **Kernel symbols dropped before init.** The `data->init` guard at the
|
||||||
|
top of `tool_callback_tracing_callback()` blocks all callbacks before
|
||||||
|
the GPU context is allocated. Kernel symbol registrations
|
||||||
|
(`CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER`) happen at HIP init
|
||||||
|
time — before `data->init` is true — so they are silently dropped.
|
||||||
|
Even if the crash is worked around, kernel names would be missing.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- AMD GPU with working ROCm driver
|
||||||
|
- `librocprofiler-sdk.so` available (typically at `/opt/rocm/lib/`)
|
||||||
|
- `/opt/rocm/bin/hipcc`
|
||||||
|
|
||||||
|
## Build and run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cmake -B build -DCMAKE_BUILD_TYPE=Release
|
||||||
|
cmake --build build
|
||||||
|
./build/repro &
|
||||||
|
tracy-capture -o repro.tracy -s 5
|
||||||
|
```
|
||||||
|
|
||||||
|
If ROCm is not under `/opt/rocm`, pass `-DROCM_PATH=/path/to/rocm`.
|
||||||
|
|
||||||
|
The reproducer is also registered as a ctest target:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ctest --test-dir build -R repro
|
||||||
|
```
|
||||||
|
|
||||||
|
## Verifying the context name
|
||||||
|
|
||||||
|
`check_gpu_ctx_name` loads a `.tracy` file and prints the GPU context
|
||||||
|
names. It links the Tracy server library, so it is built only on request:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cmake -B build -DBUILD_CHECK_TOOL=ON
|
||||||
|
cmake --build build --target check_gpu_ctx_name
|
||||||
|
./build/check_gpu_ctx_name repro.tracy
|
||||||
|
# Expected (patched): "GPU context 0: rocprofv3"
|
||||||
|
# Expected (unpatched): "GPU context 0: (unnamed)"
|
||||||
|
```
|
||||||
|
|
||||||
|
Exit codes: 0 = all contexts named, 2 = unnamed context found.
|
||||||
|
|
||||||
|
## What to expect
|
||||||
|
|
||||||
|
| Tracy version | Result |
|
||||||
|
|---|---|
|
||||||
|
| Unpatched | `tracy-capture` crashes: `Assertion 'ctx' failed` |
|
||||||
|
| Patched (GpuNewContext only) | Capture succeeds but GPU context is unnamed |
|
||||||
|
| Fully patched | Capture succeeds with ~50 GPU zones and context named "rocprofv3" |
|
||||||
65
tests/rocprof/repro/on_demand/check_gpu_ctx_name.cpp
Normal file
65
tests/rocprof/repro/on_demand/check_gpu_ctx_name.cpp
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
// Loads a .tracy file and prints the GPU context names.
|
||||||
|
// Used to verify that on-demand profiling correctly defers the
|
||||||
|
// GpuContextName message so late-connecting clients see the name.
|
||||||
|
//
|
||||||
|
// Usage: ./check_gpu_ctx_name trace.tracy
|
||||||
|
// Expected output: "GPU context 0: rocprofv3"
|
||||||
|
// If name is missing: "GPU context 0: (unnamed)"
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include "server/TracyFileRead.hpp"
|
||||||
|
#include "server/TracyWorker.hpp"
|
||||||
|
|
||||||
|
int main( int argc, char** argv )
|
||||||
|
{
|
||||||
|
if( argc != 2 )
|
||||||
|
{
|
||||||
|
fprintf( stderr, "Usage: %s <trace.tracy>\n", argv[0] );
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
auto f = std::unique_ptr<tracy::FileRead>( tracy::FileRead::Open( argv[1] ) );
|
||||||
|
if( !f )
|
||||||
|
{
|
||||||
|
fprintf( stderr, "Cannot open %s\n", argv[1] );
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
tracy::Worker worker( *f, tracy::EventType::None, false );
|
||||||
|
|
||||||
|
const auto& gpuData = worker.GetGpuData();
|
||||||
|
if( gpuData.empty() )
|
||||||
|
{
|
||||||
|
printf( "No GPU contexts found.\n" );
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool all_named = true;
|
||||||
|
for( size_t i = 0; i < gpuData.size(); i++ )
|
||||||
|
{
|
||||||
|
const auto& ctx = gpuData[i];
|
||||||
|
if( ctx->name.Active() )
|
||||||
|
{
|
||||||
|
const char* name = worker.GetString( ctx->name );
|
||||||
|
bool has_name = name && name[0] != '\0';
|
||||||
|
printf( "GPU context %zu: %s\n", i, has_name ? name : "(unnamed)" );
|
||||||
|
if( !has_name ) all_named = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf( "GPU context %zu: (unnamed)\n", i );
|
||||||
|
all_named = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return all_named ? 0 : 2;
|
||||||
|
}
|
||||||
|
catch( const std::exception& e )
|
||||||
|
{
|
||||||
|
fprintf( stderr, "Error: %s\n", e.what() );
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
85
tests/rocprof/repro/on_demand/repro.cpp
Normal file
85
tests/rocprof/repro/on_demand/repro.cpp
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
// Reproduces the rocprofiler on-demand profiling crash.
|
||||||
|
//
|
||||||
|
// When Tracy is built with TRACY_ON_DEMAND, a late-connecting profiler
|
||||||
|
// (tracy-capture / GUI) triggers an assertion failure in the server:
|
||||||
|
//
|
||||||
|
// Assertion `ctx' failed in ProcessGpuZoneBeginImplCommon
|
||||||
|
//
|
||||||
|
// Root cause: gpu_context_allocate() writes a GpuNewContext queue item
|
||||||
|
// but does not call DeferItem(), so the context is never replayed to a
|
||||||
|
// late-connecting client. The client then receives GpuZoneBegin events
|
||||||
|
// for a context it has never seen.
|
||||||
|
//
|
||||||
|
// A secondary issue: tool_callback_tracing_callback() guards ALL
|
||||||
|
// callbacks on data->init, which is only set after the calibration
|
||||||
|
// thread allocates the GPU context. Kernel symbol registrations
|
||||||
|
// (CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) happen at HIP init time,
|
||||||
|
// before data->init is true, so they are silently dropped. This causes
|
||||||
|
// kernel names to be missing in the profiler.
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
// make
|
||||||
|
// ./repro &
|
||||||
|
// tracy-capture -o repro.tracy -s 5
|
||||||
|
//
|
||||||
|
// Expected (unpatched): tracy-capture crashes with assertion failure
|
||||||
|
// Expected (patched): capture succeeds with GPU zones showing kernel names
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
#include "tracy/Tracy.hpp"
|
||||||
|
|
||||||
|
__global__ void vectorAdd( const float* a, const float* b, float* c, int n )
|
||||||
|
{
|
||||||
|
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
if( i < n ) c[i] = a[i] + b[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
printf( "Rocprofiler on-demand repro — waiting for profiler to connect...\n" );
|
||||||
|
fflush( stdout );
|
||||||
|
|
||||||
|
constexpr int N = 1024;
|
||||||
|
float h_a[N], h_b[N], h_c[N];
|
||||||
|
float *d_a, *d_b, *d_c;
|
||||||
|
|
||||||
|
for( int i = 0; i < N; i++ )
|
||||||
|
{
|
||||||
|
h_a[i] = float( i );
|
||||||
|
h_b[i] = float( i * 2 );
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)hipMalloc( &d_a, N * sizeof( float ) );
|
||||||
|
(void)hipMalloc( &d_b, N * sizeof( float ) );
|
||||||
|
(void)hipMalloc( &d_c, N * sizeof( float ) );
|
||||||
|
|
||||||
|
(void)hipMemcpy( d_a, h_a, N * sizeof( float ), hipMemcpyHostToDevice );
|
||||||
|
(void)hipMemcpy( d_b, h_b, N * sizeof( float ), hipMemcpyHostToDevice );
|
||||||
|
|
||||||
|
// Run many iterations so tracy-capture has time to connect.
|
||||||
|
// With 100ms sleep per iteration this runs for ~10 seconds.
|
||||||
|
for( int iter = 0; iter < 100; iter++ )
|
||||||
|
{
|
||||||
|
ZoneScopedN( "iteration" );
|
||||||
|
vectorAdd<<<( N + 255 ) / 256, 256>>>( d_a, d_b, d_c, N );
|
||||||
|
(void)hipDeviceSynchronize();
|
||||||
|
usleep( 100000 );
|
||||||
|
FrameMark;
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)hipMemcpy( h_c, d_c, N * sizeof( float ), hipMemcpyDeviceToHost );
|
||||||
|
|
||||||
|
bool ok = true;
|
||||||
|
for( int i = 0; i < N; i++ )
|
||||||
|
{
|
||||||
|
if( h_c[i] != h_a[i] + h_b[i] ) { ok = false; break; }
|
||||||
|
}
|
||||||
|
printf( "Result: %s\n", ok ? "PASS" : "FAIL" );
|
||||||
|
|
||||||
|
(void)hipFree( d_a );
|
||||||
|
(void)hipFree( d_b );
|
||||||
|
(void)hipFree( d_c );
|
||||||
|
return ok ? 0 : 1;
|
||||||
|
}
|
||||||
@@ -11,6 +11,22 @@
|
|||||||
|
|
||||||
#include "OfflineSymbolResolver.h"
|
#include "OfflineSymbolResolver.h"
|
||||||
|
|
||||||
|
bool ResolveSymbols( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||||
|
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||||
|
SymbolEntryList& resolvedEntries )
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
// On Windows the default (no custom tool given) is the DbgHelp backend.
|
||||||
|
if( addr2lineToolPath.empty() )
|
||||||
|
{
|
||||||
|
return ResolveSymbolsDbgHelp( imagePath, inputEntryList, resolvedEntries );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
// Everywhere else, and whenever a custom tool is given, use the addr2line-compatible backend.
|
||||||
|
// An empty path lets that backend fall back to the 'addr2line' found in PATH.
|
||||||
|
return ResolveSymbolsAddr2Line( addr2lineToolPath, addr2lineArgs, imagePath, inputEntryList, resolvedEntries );
|
||||||
|
}
|
||||||
|
|
||||||
bool ApplyPathSubstitutions( std::string& path, const PathSubstitutionList& pathSubstitutionlist )
|
bool ApplyPathSubstitutions( std::string& path, const PathSubstitutionList& pathSubstitutionlist )
|
||||||
{
|
{
|
||||||
for( const auto& substitution : pathSubstitutionlist )
|
for( const auto& substitution : pathSubstitutionlist )
|
||||||
@@ -31,7 +47,35 @@ tracy::StringIdx AddSymbolString( tracy::Worker& worker, const std::string& str
|
|||||||
return tracy::StringIdx( location.idx );
|
return tracy::StringIdx( location.idx );
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstitutionlist, bool verbose )
|
void ResetSymbols( tracy::Worker& worker )
|
||||||
|
{
|
||||||
|
std::cout << "Resetting callstack frame symbols to the unresolved state..." << std::endl;
|
||||||
|
|
||||||
|
const tracy::StringIdx unresolvedName = AddSymbolString( worker, "[unresolved]" );
|
||||||
|
const tracy::StringIdx unknownFile = AddSymbolString( worker, "[unknown]" );
|
||||||
|
|
||||||
|
uint64_t frameCount = 0;
|
||||||
|
auto& callstackFrameMap = worker.GetCallstackFrameMap();
|
||||||
|
for( auto it = callstackFrameMap.begin(); it != callstackFrameMap.end(); ++it )
|
||||||
|
{
|
||||||
|
if( !it->second ) continue;
|
||||||
|
|
||||||
|
tracy::CallstackFrameData& frameData = *it->second;
|
||||||
|
for( uint8_t f = 0; f < frameData.size; f++ )
|
||||||
|
{
|
||||||
|
tracy::CallstackFrame& frame = frameData.data[f];
|
||||||
|
frame.name = unresolvedName;
|
||||||
|
frame.file = unknownFile;
|
||||||
|
frame.line = 0;
|
||||||
|
++frameCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Reset " << frameCount << " callstack frames." << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstitutionlist,
|
||||||
|
const std::string& addr2lineToolPath, const std::string& addr2lineArgs, bool verbose )
|
||||||
{
|
{
|
||||||
uint64_t callstackFrameCount = worker.GetCallstackFrameCount();
|
uint64_t callstackFrameCount = worker.GetCallstackFrameCount();
|
||||||
std::string relativeSoNameMatch = "[unresolved]";
|
std::string relativeSoNameMatch = "[unresolved]";
|
||||||
@@ -91,7 +135,7 @@ bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& p
|
|||||||
}
|
}
|
||||||
|
|
||||||
SymbolEntryList resolvedEntries;
|
SymbolEntryList resolvedEntries;
|
||||||
ResolveSymbols( imagePath, entries, resolvedEntries );
|
ResolveSymbols( addr2lineToolPath, addr2lineArgs, imagePath, entries, resolvedEntries );
|
||||||
|
|
||||||
if( resolvedEntries.size() != entries.size() )
|
if( resolvedEntries.size() != entries.size() )
|
||||||
{
|
{
|
||||||
@@ -131,7 +175,8 @@ bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& p
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings, bool verbose )
|
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings,
|
||||||
|
const std::string& addr2lineToolPath, const std::string& addr2lineArgs, bool verbose )
|
||||||
{
|
{
|
||||||
std::cout << "Resolving and patching symbols..." << std::endl;
|
std::cout << "Resolving and patching symbols..." << std::endl;
|
||||||
|
|
||||||
@@ -160,7 +205,7 @@ void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !PatchSymbolsWithRegex(worker, pathSubstitutionList, verbose) )
|
if ( !PatchSymbolsWithRegex(worker, pathSubstitutionList, addr2lineToolPath, addr2lineArgs, verbose) )
|
||||||
{
|
{
|
||||||
std::cerr << "Failed to patch symbols" << std::endl;
|
std::cerr << "Failed to patch symbols" << std::endl;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,12 +29,41 @@ struct SymbolEntry
|
|||||||
|
|
||||||
using SymbolEntryList = std::vector<SymbolEntry>;
|
using SymbolEntryList = std::vector<SymbolEntry>;
|
||||||
|
|
||||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
// Dispatches to the appropriate backend depending on the platform and whether a custom
|
||||||
|
// addr2line-compatible tool was specified. When addr2lineToolPath is non-empty, the tool at
|
||||||
|
// that path is invoked (on any platform); otherwise the platform default is used (DbgHelp on
|
||||||
|
// Windows, the 'addr2line' found in PATH elsewhere). addr2lineArgs are extra arguments passed
|
||||||
|
// verbatim to the addr2line-compatible tool (e.g. "--relative-address").
|
||||||
|
bool ResolveSymbols( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||||
|
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||||
SymbolEntryList& resolvedEntries );
|
SymbolEntryList& resolvedEntries );
|
||||||
|
|
||||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings, bool verbose = false );
|
// Backend invoking an addr2line-compatible tool. Available on all platforms. An empty
|
||||||
|
// addr2lineToolPath falls back to the 'addr2line' found in PATH. addr2lineArgs are inserted
|
||||||
|
// verbatim into the tool's command line.
|
||||||
|
bool ResolveSymbolsAddr2Line( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||||
|
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||||
|
SymbolEntryList& resolvedEntries );
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
// Backend using the Windows DbgHelp library.
|
||||||
|
bool ResolveSymbolsDbgHelp( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||||
|
SymbolEntryList& resolvedEntries );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Resets all callstack frame symbols back to the unresolved state ("[unresolved]" / "[unknown]"),
|
||||||
|
// so a subsequent PatchSymbols pass re-resolves every frame. This is useful to chain several
|
||||||
|
// resolution passes with different path substitutions. Only meaningful for traces captured with
|
||||||
|
// TRACY_SYMBOL_OFFLINE_RESOLVE, where each frame's symAddr holds the image-relative offset.
|
||||||
|
void ResetSymbols( tracy::Worker& worker );
|
||||||
|
|
||||||
|
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings,
|
||||||
|
const std::string& addr2lineToolPath = std::string(),
|
||||||
|
const std::string& addr2lineArgs = std::string(), bool verbose = false );
|
||||||
|
|
||||||
using PathSubstitutionList = std::vector<std::pair<std::regex, std::string> >;
|
using PathSubstitutionList = std::vector<std::pair<std::regex, std::string> >;
|
||||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstituionlist, bool verbose = false );
|
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstituionlist,
|
||||||
|
const std::string& addr2lineToolPath = std::string(),
|
||||||
|
const std::string& addr2lineArgs = std::string(), bool verbose = false );
|
||||||
|
|
||||||
#endif // __SYMBOLRESOLVER_HPP__
|
#endif // __SYMBOLRESOLVER_HPP__
|
||||||
@@ -1,5 +1,3 @@
|
|||||||
#ifndef _WIN32
|
|
||||||
|
|
||||||
#include "OfflineSymbolResolver.h"
|
#include "OfflineSymbolResolver.h"
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
@@ -10,6 +8,11 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define popen _popen
|
||||||
|
# define pclose _pclose
|
||||||
|
#endif
|
||||||
|
|
||||||
std::string ExecShellCommand( const char* cmd )
|
std::string ExecShellCommand( const char* cmd )
|
||||||
{
|
{
|
||||||
std::array<char, 128> buffer;
|
std::array<char, 128> buffer;
|
||||||
@@ -29,23 +32,66 @@ std::string ExecShellCommand( const char* cmd )
|
|||||||
class SymbolResolver
|
class SymbolResolver
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
SymbolResolver()
|
SymbolResolver( const std::string& addr2lineToolPath, const std::string& addr2lineArgs )
|
||||||
{
|
{
|
||||||
|
// Extra arguments are inserted verbatim into the tool invocation. Tracy records frame
|
||||||
|
// offsets as RVAs; for images with a non-zero preferred image base (PE, Mach-O) the user
|
||||||
|
// can pass "--relative-address" here so llvm-addr2line / llvm-symbolizer add the base back.
|
||||||
|
if( !addr2lineArgs.empty() )
|
||||||
|
{
|
||||||
|
m_addr2LineArgs = " " + addr2lineArgs;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( !addr2lineToolPath.empty() )
|
||||||
|
{
|
||||||
|
// If the value looks like a path (not a bare command name resolved via PATH), verify
|
||||||
|
// it exists so a wrong path fails with an actionable error instead of a cryptic shell one.
|
||||||
|
const bool looksLikePath = addr2lineToolPath.find( '/' ) != std::string::npos ||
|
||||||
|
addr2lineToolPath.find( '\\' ) != std::string::npos;
|
||||||
|
if( looksLikePath && !std::ifstream( addr2lineToolPath ).good() )
|
||||||
|
{
|
||||||
|
std::cerr << "Specified symbol resolution tool not found: '" << addr2lineToolPath
|
||||||
|
<< "' (check the path passed to the '-a' option)" << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A user-provided path may contain spaces or other shell-special characters.
|
||||||
|
escapeShellParam( addr2lineToolPath, m_addr2LinePath );
|
||||||
|
std::cout << "Using user-specified symbol resolution tool: '" << addr2lineToolPath.c_str() << "'" << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
std::cerr << "No symbol resolution tool specified (use the '-a' option to provide one)" << std::endl;
|
||||||
|
#else
|
||||||
std::stringstream result( ExecShellCommand("which addr2line") );
|
std::stringstream result( ExecShellCommand("which addr2line") );
|
||||||
std::getline(result, m_addr2LinePath);
|
std::getline(result, m_addr2LinePath);
|
||||||
|
|
||||||
if( !m_addr2LinePath.length() )
|
if( !m_addr2LinePath.length() )
|
||||||
{
|
{
|
||||||
std::cerr << "'addr2line' was not found in the system, please installed it" << std::endl;
|
std::cerr << "'addr2line' was not found in the system, please install it" << std::endl;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::cout << "Using 'addr2line' found at: '" << m_addr2LinePath.c_str() << "'" << std::endl;
|
std::cout << "Using 'addr2line' found at: '" << m_addr2LinePath.c_str() << "'" << std::endl;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void escapeShellParam(std::string const& s, std::string& out)
|
static void escapeShellParam(std::string const& s, std::string& out)
|
||||||
{
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
// cmd.exe / the CRT command parser do not understand POSIX backslash escapes, and
|
||||||
|
// backslashes are path separators on Windows. Wrap the parameter in double quotes
|
||||||
|
// (which handles spaces) and drop any embedded quotes, which cannot appear in a path.
|
||||||
|
out.reserve( s.size() + 2 );
|
||||||
|
out.push_back( '"' );
|
||||||
|
for( char c : s )
|
||||||
|
{
|
||||||
|
if( c != '"' ) out.push_back( c );
|
||||||
|
}
|
||||||
|
out.push_back( '"' );
|
||||||
|
#else
|
||||||
out.reserve( s.size() + 2 );
|
out.reserve( s.size() + 2 );
|
||||||
out.push_back( '"' );
|
out.push_back( '"' );
|
||||||
for( unsigned char c : s )
|
for( unsigned char c : s )
|
||||||
@@ -73,34 +119,51 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
out.push_back( '"' );
|
out.push_back( '"' );
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||||
SymbolEntryList& resolvedEntries )
|
SymbolEntryList& resolvedEntries )
|
||||||
{
|
{
|
||||||
if( !m_addr2LinePath.length() ) return false;
|
if( !m_addr2LinePath.length() ) return false;
|
||||||
|
|
||||||
std:: string escapedPath;
|
std:: string escapedPath;
|
||||||
escapeShellParam( imagePath, escapedPath );
|
escapeShellParam( imagePath, escapedPath );
|
||||||
|
|
||||||
|
// Command-line length limits: cmd.exe (used by _popen on Windows) allows ~8191 characters;
|
||||||
|
// a single POSIX 'sh -c' argument is capped by MAX_ARG_STRLEN (128 KiB on Linux).
|
||||||
|
// 8000 stays under all of these, so a single conservative budget works on every platform.
|
||||||
|
const size_t maxCmdLength = 8000;
|
||||||
|
|
||||||
size_t entryIdx = 0;
|
size_t entryIdx = 0;
|
||||||
while( entryIdx < inputEntryList.size() )
|
while( entryIdx < inputEntryList.size() )
|
||||||
{
|
{
|
||||||
const size_t startIdx = entryIdx;
|
const size_t startIdx = entryIdx;
|
||||||
const size_t batchEndIdx = std::min( inputEntryList.size(), startIdx + (size_t)1024 );
|
|
||||||
|
|
||||||
printf( "Resolving symbols [%zu-%zu]\n", startIdx, batchEndIdx );
|
// generate a single addr2line cmd line for as many addresses as fit the length budget
|
||||||
|
|
||||||
// generate a single addr2line cmd line for all addresses in one invocation
|
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << m_addr2LinePath << " -C -f -e " << escapedPath << " -a ";
|
ss << m_addr2LinePath << " -C -f" << m_addr2LineArgs << " -e " << escapedPath << " -a ";
|
||||||
for( ; entryIdx < batchEndIdx; entryIdx++ )
|
while( entryIdx < inputEntryList.size() )
|
||||||
{
|
{
|
||||||
const FrameEntry& entry = inputEntryList[entryIdx];
|
const FrameEntry& entry = inputEntryList[entryIdx];
|
||||||
ss << " 0x" << std::hex << entry.symbolOffset;
|
ss << " 0x" << std::hex << entry.symbolOffset;
|
||||||
|
entryIdx++;
|
||||||
|
// always include at least one address, then stop once near the length limit
|
||||||
|
if( static_cast<size_t>( ss.tellp() ) >= maxCmdLength ) break;
|
||||||
}
|
}
|
||||||
|
const size_t batchEndIdx = entryIdx;
|
||||||
|
|
||||||
std::string resultStr = ExecShellCommand( ss.str().c_str() );
|
printf( "Resolving symbols [%zu-%zu]\n", startIdx, batchEndIdx );
|
||||||
|
|
||||||
|
std::string cmd = ss.str();
|
||||||
|
#ifdef _WIN32
|
||||||
|
// _popen runs the command through 'cmd.exe /c', which strips the outermost pair of
|
||||||
|
// quotes. Wrap the whole command so the quoting around the (possibly spaced) tool
|
||||||
|
// and image paths survives.
|
||||||
|
cmd = "\"" + cmd + "\"";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string resultStr = ExecShellCommand( cmd.c_str() );
|
||||||
std::stringstream result( resultStr );
|
std::stringstream result( resultStr );
|
||||||
|
|
||||||
//printf("executing: '%s' got '%s'\n", ss.str().c_str(), result.str().c_str());
|
//printf("executing: '%s' got '%s'\n", ss.str().c_str(), result.str().c_str());
|
||||||
@@ -147,13 +210,13 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::string m_addr2LinePath;
|
std::string m_addr2LinePath;
|
||||||
|
std::string m_addr2LineArgs;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
bool ResolveSymbolsAddr2Line( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||||
SymbolEntryList& resolvedEntries )
|
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||||
|
SymbolEntryList& resolvedEntries )
|
||||||
{
|
{
|
||||||
static SymbolResolver symbolResolver;
|
static SymbolResolver symbolResolver( addr2lineToolPath, addr2lineArgs );
|
||||||
return symbolResolver.ResolveSymbols( imagePath, inputEntryList, resolvedEntries );
|
return symbolResolver.ResolveSymbols( imagePath, inputEntryList, resolvedEntries );
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // #ifndef _WIN32
|
|
||||||
|
|||||||
@@ -122,8 +122,8 @@ private:
|
|||||||
|
|
||||||
char SymbolResolver::s_symbolResolutionBuffer[symbolResolutionBufferSize];
|
char SymbolResolver::s_symbolResolutionBuffer[symbolResolutionBufferSize];
|
||||||
|
|
||||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
bool ResolveSymbolsDbgHelp( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||||
SymbolEntryList& resolvedEntries )
|
SymbolEntryList& resolvedEntries )
|
||||||
{
|
{
|
||||||
static SymbolResolver resolver;
|
static SymbolResolver resolver;
|
||||||
return resolver.ResolveSymbolsForModule( imagePath, inputEntryList, resolvedEntries );
|
return resolver.ResolveSymbolsForModule( imagePath, inputEntryList, resolvedEntries );
|
||||||
|
|||||||
@@ -38,7 +38,12 @@ void Usage()
|
|||||||
printf( " c: context switches, s: sampling data, C: symbol code, S: source cache\n" );
|
printf( " c: context switches, s: sampling data, C: symbol code, S: source cache\n" );
|
||||||
printf( " -c: scan for source files missing in cache and add if found\n" );
|
printf( " -c: scan for source files missing in cache and add if found\n" );
|
||||||
printf( " -r: resolve symbols and patch callstack frames\n");
|
printf( " -r: resolve symbols and patch callstack frames\n");
|
||||||
|
printf( " -R: reset all callstack frame symbols to unresolved (e.g. to re-run resolution)\n");
|
||||||
printf( " -p: substitute symbol resolution path with an alternative: \"REGEX_MATCH;REPLACEMENT\"\n");
|
printf( " -p: substitute symbol resolution path with an alternative: \"REGEX_MATCH;REPLACEMENT\"\n");
|
||||||
|
printf( " -a: path to a custom addr2line-compatible tool to use for symbol resolution\n");
|
||||||
|
printf( " -A: extra arguments passed verbatim to the symbol resolution tool,\n");
|
||||||
|
printf( " e.g. \"--relative-address\" for llvm-addr2line on PE/Mach-O images\n");
|
||||||
|
printf( " -v: verbose output while resolving symbols\n");
|
||||||
printf( " -j: number of threads to use for compression (-1 to use all cores)\n" );
|
printf( " -j: number of threads to use for compression (-1 to use all cores)\n" );
|
||||||
|
|
||||||
exit( 1 );
|
exit( 1 );
|
||||||
@@ -61,10 +66,14 @@ int main( int argc, char** argv )
|
|||||||
bool buildDict = false;
|
bool buildDict = false;
|
||||||
bool cacheSource = false;
|
bool cacheSource = false;
|
||||||
bool resolveSymbols = false;
|
bool resolveSymbols = false;
|
||||||
|
bool resetSymbols = false;
|
||||||
std::vector<std::string> pathSubstitutions;
|
std::vector<std::string> pathSubstitutions;
|
||||||
|
std::string addr2lineToolPath;
|
||||||
|
std::string addr2lineArgs;
|
||||||
|
bool verboseSymbols = false;
|
||||||
|
|
||||||
int c;
|
int c;
|
||||||
while( ( c = getopt( argc, argv, "4hez:ds:crp:j:" ) ) != -1 )
|
while( ( c = getopt( argc, argv, "4hez:ds:crRp:a:A:vj:" ) ) != -1 )
|
||||||
{
|
{
|
||||||
switch( c )
|
switch( c )
|
||||||
{
|
{
|
||||||
@@ -137,9 +146,21 @@ int main( int argc, char** argv )
|
|||||||
case 'r':
|
case 'r':
|
||||||
resolveSymbols = true;
|
resolveSymbols = true;
|
||||||
break;
|
break;
|
||||||
|
case 'R':
|
||||||
|
resetSymbols = true;
|
||||||
|
break;
|
||||||
case 'p':
|
case 'p':
|
||||||
pathSubstitutions.push_back(optarg);
|
pathSubstitutions.push_back(optarg);
|
||||||
break;
|
break;
|
||||||
|
case 'a':
|
||||||
|
addr2lineToolPath = optarg;
|
||||||
|
break;
|
||||||
|
case 'A':
|
||||||
|
addr2lineArgs = optarg;
|
||||||
|
break;
|
||||||
|
case 'v':
|
||||||
|
verboseSymbols = true;
|
||||||
|
break;
|
||||||
case 'j':
|
case 'j':
|
||||||
streams = atoi( optarg );
|
streams = atoi( optarg );
|
||||||
break;
|
break;
|
||||||
@@ -171,7 +192,7 @@ int main( int argc, char** argv )
|
|||||||
{
|
{
|
||||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||||
const bool allowBgThreads = false;
|
const bool allowBgThreads = false;
|
||||||
const bool allowStringModification = resolveSymbols;
|
const bool allowStringModification = resolveSymbols || resetSymbols;
|
||||||
tracy::Worker worker( *f, (tracy::EventType::Type)events, allowBgThreads, allowStringModification );
|
tracy::Worker worker( *f, (tracy::EventType::Type)events, allowBgThreads, allowStringModification );
|
||||||
|
|
||||||
#ifndef TRACY_NO_STATISTICS
|
#ifndef TRACY_NO_STATISTICS
|
||||||
@@ -181,7 +202,8 @@ int main( int argc, char** argv )
|
|||||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
if( cacheSource ) worker.CacheSourceFiles();
|
if( cacheSource ) worker.CacheSourceFiles();
|
||||||
if( resolveSymbols ) PatchSymbols( worker, pathSubstitutions );
|
if( resetSymbols ) ResetSymbols( worker );
|
||||||
|
if( resolveSymbols ) PatchSymbols( worker, pathSubstitutions, addr2lineToolPath, addr2lineArgs, verboseSymbols );
|
||||||
|
|
||||||
auto w = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output, clev, zstdLevel, streams ) );
|
auto w = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output, clev, zstdLevel, streams ) );
|
||||||
if( !w )
|
if( !w )
|
||||||
|
|||||||
Reference in New Issue
Block a user