Iterate ranges array when drawing ranges on timeline.

Do not include alpha in color entries in ranges array.
Move ranges setup to an appropriate file.
2026-06-23 23:58:56 +00:00 · 2026-06-23 23:45:41 +02:00 · 2026-06-23 23:45:11 +02:00 · 2026-06-23 23:29:33 +02:00 · 2026-06-23 23:07:27 +02:00 · 2026-06-23 23:06:02 +02:00
336 changed files with 261942 additions and 227041 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -1,18 +1,35 @@
 # Empirical format config, based on observed style guide
 # Use this only as an help to fit the surrounding code style - don't reformat whole files at once
 ---
-BasedOnStyle: LLVM
-AllowShortIfStatementsOnASingleLine: WithoutElse
+BasedOnStyle: Microsoft
+AllowShortIfStatementsOnASingleLine: AllIfsAndElse
 AllowShortLoopsOnASingleLine: true
-AlwaysBreakTemplateDeclarations: Yes
-BreakBeforeBraces: Allman
+AllowShortCaseLabelsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: All
+# AllowShortEnumsOnASingleLine: true # Broken for some reason, even in last versions of clang-format... So don't use it or it may change formating in the future.
+AllowShortLambdasOnASingleLine: All
 BreakConstructorInitializers: BeforeComma
 BreakStringLiterals: false
-ColumnLimit: 120
+SpaceAfterTemplateKeyword: false
+AlwaysBreakTemplateDeclarations: Yes
+# Allman seems to break lambda formatting for some reason with `ColumnLimit: 0`. See https://github.com/llvm/llvm-project/issues/50275
+# Even though it is supposed to have been fixed, issue still remains in 20.1.8. (and is very much present in 18.x which is the one shipped by VS2022 and VSCord clangd as of 2025-07-27)
+# Things work fine with `BasedOnStyle: Microsoft` so use that instead
+#BreakBeforeBraces: Allman 
+ColumnLimit: 0
+# We'd like to use LeftWithLastLine but it's only available in >=19.x
+#AlignEscapedNewlines: LeftWithLastLine
+AlignEscapedNewlines: Left
 FixNamespaceComments: false
 IndentPPDirectives: AfterHash
+IndentAccessModifiers: false
+AccessModifierOffset: -4
+LambdaBodyIndentation: OuterScope
+PPIndentWidth: 2
 IndentWidth: 4
 PointerAlignment: Left
 SpaceBeforeParens: Never
 SpacesInParentheses: true
 TabWidth: 4
+AlignTrailingComments:
+  Kind: Leave
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -20,18 +20,23 @@ Checks:
 	-google-readability-namespace-comments,
 	-misc-confusable-identifiers,
 	-misc-no-recursion,
+	-misc-use-anonymous-namespace,
+	-misc-use-internal-linkage,
 	-modernize-avoid-c-arrays,
 	-modernize-deprecated-headers,
 	-modernize-use-default-member-init,
+	-modernize-use-designated-initializers,
 	-modernize-use-trailing-return-type,
 	-performance-no-int-to-ptr,
 	-readability-braces-around-statements,
 	-readability-else-after-return,
 	-readability-function-cognitive-complexity,
+	-readability-function-size,
 	-readability-identifier-length,
 	-readability-implicit-bool-conversion,
 	-readability-isolate-declaration,
 	-readability-magic-numbers,
+	-readability-math-missing-parentheses,
 	-readability-qualified-auto,
 	-readability-uppercase-literal-suffix
 '
--- a/.github/actions/test-tracy/action.yml
+++ b/.github/actions/test-tracy/action.yml
@@ -0,0 +1,35 @@
+name: 'Test Tracy'
+description: 'Build the Tracy test application with various cmake flag combinations'
+
+inputs:
+  extra_cmake_flags:
+    description: 'Additional cmake flags appended to each configure command (e.g. cross-compilation flags)'
+    required: false
+    default: ''
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Test application
+      shell: bash
+      run: |
+        # test compilation with different flags
+        # we clean the build folder to reset cached variables between runs
+        cmake -B tests/tracy/build -S tests/tracy -DCMAKE_BUILD_TYPE=Release ${{ inputs.extra_cmake_flags }}
+        cmake --build tests/tracy/build --parallel
+        cmake -E rm -rf tests/tracy/build
+
+        # same with TRACY_ON_DEMAND
+        cmake -B tests/tracy/build -S tests/tracy -DCMAKE_BUILD_TYPE=Release -DTRACY_ON_DEMAND=ON ${{ inputs.extra_cmake_flags }}
+        cmake --build tests/tracy/build --parallel
+        cmake -E rm -rf tests/tracy/build
+
+        # same with TRACY_DELAYED_INIT and TRACY_MANUAL_LIFETIME
+        cmake -B tests/tracy/build -S tests/tracy -DCMAKE_BUILD_TYPE=Release -DTRACY_DELAYED_INIT=ON -DTRACY_MANUAL_LIFETIME=ON ${{ inputs.extra_cmake_flags }}
+        cmake --build tests/tracy/build --parallel
+        cmake -E rm -rf tests/tracy/build
+
+        # same with TRACY_DEMANGLE
+        cmake -B tests/tracy/build -S tests/tracy -DCMAKE_BUILD_TYPE=Release -DTRACY_DEMANGLE=ON ${{ inputs.extra_cmake_flags }}
+        cmake --build tests/tracy/build --parallel
+        cmake -E rm -rf tests/tracy/build
--- a/.github/workflows/emscripten.yml
+++ b/.github/workflows/emscripten.yml
@@ -5,24 +5,31 @@ on:
    branches: [ master ]
  pull_request:
    branches: [ master ]
+  workflow_dispatch:

 env:
  CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm-cache

 jobs:
-  build:
+  build-emscripten:
    runs-on: ubuntu-latest
    container: archlinux:base-devel
    steps:
    - name: Install dependencies
-      run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed cmake git unzip python ninja zstd
+      run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed cmake git unzip python ninja zstd nodejs
    - name: Setup emscripten
-      uses: mymindstorm/setup-emsdk@v14
+      uses: emscripten-core/setup-emsdk@v16
      with:
-        version: 3.1.67
+        version: 5.0.7
    - name: Trust git repo
      run: git config --global --add safe.directory '*'
    - uses: actions/checkout@v4
+    - name: Cache CPM packages
+      uses: actions/cache@v4
+      with:
+        path: ${{ env.CPM_SOURCE_CACHE }}
+        key: ${{ runner.os }}-cpm-${{ hashFiles('**/vendor.cmake', '**/CMakeLists.txt') }}
+        restore-keys: ${{ runner.os }}-cpm-
    - name: Profiler GUI
      run: |
        cmake -G Ninja -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=MinSizeRel -DGIT_REV=${{ github.sha }} -DCMAKE_TOOLCHAIN_FILE=${{env.EMSDK}}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake
@@ -48,7 +55,7 @@ jobs:
        path: bin
  deploy:
    runs-on: ubuntu-latest
-    needs: build
+    needs: build-emscripten
    if: github.ref == 'refs/heads/master'
    steps:
    - uses: actions/download-artifact@v4
--- a/.github/workflows/latex.yml
+++ b/.github/workflows/latex.yml
@@ -5,9 +5,11 @@ on:
    branches: [ master ]
  pull_request:
    branches: [ master ]
+  workflow_dispatch:
+  workflow_call:

 jobs:
-  build:
+  build-manual:

    runs-on: ubuntu-latest

--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -5,64 +5,68 @@ on:
    branches: [ master ]
  pull_request:
    branches: [ master ]
+  workflow_dispatch:

 env:
  CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm-cache

 jobs:
-  build:
+  build-linux:
    runs-on: ubuntu-latest
    container: archlinux:base-devel
    steps:
    - name: Install dependencies
-      run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
+      run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs lua
    - name: Trust git repo
      run: git config --global --add safe.directory '*'
    - uses: actions/checkout@v4
+    - name: Cache CPM packages
+      uses: actions/cache@v4
+      with:
+        path: ${{ env.CPM_SOURCE_CACHE }}
+        key: ${{ runner.os }}-cpm-${{ hashFiles('**/vendor.cmake', '**/CMakeLists.txt') }}
+        restore-keys: ${{ runner.os }}-cpm-
    - name: Profiler GUI
      run: |
        cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
-        cmake --build profiler/build --parallel
+        if [ "${ACT:-}" != "true" ] && [ "${FORGEJO_ACTIONS:-}" != "true" ]; then
+          cmake --build profiler/build
+        else
+          cmake --build profiler/build --parallel 2
+        fi
    - name: Update utility
      run: |
-        cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release
+        cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
        cmake --build update/build --parallel
    - name: Capture utility
      run: |
-        cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release
+        cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
        cmake --build capture/build --parallel
    - name: Csvexport utility
      run: |
-        cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release
+        cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
        cmake --build csvexport/build --parallel
    - name: Import utilities
      run: |
-        cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release
+        cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
        cmake --build import/build --parallel
-    - name: Library
-      run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib build && meson compile -C build && meson install -C build
-    - name: Test application
+    - name: Merge utility
      run: |
-        # test compilation with different flags
-        # we clean the build folder to reset cached variables between runs
-        cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release
-        cmake --build test/build --parallel
-        rm -rf test/build
-
-        # same with TRACY_ON_DEMAND
-        cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_ON_DEMAND=ON .
-        cmake --build test/build --parallel
-        rm -rf test/build
-
-        # same with TRACY_DELAYED_INIT TRACY_MANUAL_LIFETIME
-        cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DELAYED_INIT=ON -DTRACY_MANUAL_LIFETIME=ON .
-        cmake --build test/build --parallel
-        rm -rf test/build
-
-        # same with TRACY_DEMANGLE
-        cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DEMANGLE=ON .
-        cmake --build test/build --parallel
-        rm -rf test/build
+        cmake -B merge/build -S merge -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
+        cmake --build merge/build --parallel
+    - name: Library (cmake)
+      run: |
+        cmake -B build -DCMAKE_BUILD_TYPE=Release -DTRACY_ENABLE=ON
+        cmake --build build
+        cmake --install build
+      env:
+        CMAKE_INSTALL_PREFIX: ${{ github.workspace }}/bin
+    - name: Library (meson)
+      run: |
+        meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib -Dtracy_enable=true build-meson
+        meson compile -C build-meson
+    - name: Test application
+      uses: ./.github/actions/test-tracy
    - name: Find Artifacts
      id: find_artifacts
      run: |
@@ -73,6 +77,7 @@ jobs:
        cp csvexport/build/tracy-csvexport bin
        cp import/build/tracy-import-chrome bin
        cp import/build/tracy-import-fuchsia bin
+        cp merge/build/tracy-merge bin
        strip bin/tracy-*
    - uses: actions/upload-artifact@v4
      with:
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -0,0 +1,69 @@
+name: macos
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+  workflow_dispatch:
+
+env:
+  CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm-cache
+
+jobs:
+  build-macos:
+    runs-on: macos-15
+    steps:
+    - uses: actions/checkout@v4
+    - name: Cache CPM packages
+      uses: actions/cache@v4
+      with:
+        path: ${{ env.CPM_SOURCE_CACHE }}
+        key: ${{ runner.os }}-cpm-${{ hashFiles('**/vendor.cmake', '**/CMakeLists.txt') }}
+        restore-keys: ${{ runner.os }}-cpm-
+    - name: Install dependencies
+      run: brew install pkg-config glfw meson
+    - name: Trust git repo
+      run: git config --global --add safe.directory '*'
+    - name: Build profiler
+      run: |
+        cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
+        cmake --build profiler/build --parallel 2 --config Release
+    - name: Build update
+      run: |
+        cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
+        cmake --build update/build --parallel --config Release
+    - name: Build capture
+      run: |
+        cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
+        cmake --build capture/build --parallel --config Release
+    - name: Build csvexport
+      run: |
+        cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
+        cmake --build csvexport/build --parallel --config Release
+    - name: Build import
+      run: |
+        cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
+        cmake --build import/build --parallel --config Release
+    - name: Build merge
+      run: |
+        cmake -B merge/build -S merge -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
+        cmake --build merge/build --parallel --config Release
+    - name: Build library
+      run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib -Dtracy_enable=true build && meson compile -C build && meson install -C build
+    - name: Test application
+      uses: ./.github/actions/test-tracy
+    - name: Package artifacts
+      run: |
+        mkdir -p bin
+        cp profiler/build/tracy-profiler bin
+        cp update/build/tracy-update bin
+        cp capture/build/tracy-capture bin
+        cp csvexport/build/tracy-csvexport bin
+        cp import/build/tracy-import-chrome bin
+        cp import/build/tracy-import-fuchsia bin
+        cp merge/build/tracy-merge bin
+    - uses: actions/upload-artifact@v4
+      with:
+        name: macos
+        path: bin
--- a/.github/workflows/mingw.yml
+++ b/.github/workflows/mingw.yml
@@ -0,0 +1,61 @@
+name: build-mingw
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+  workflow_dispatch:
+
+env:
+  CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm-cache
+
+jobs:
+  build-mingw:
+    runs-on: ubuntu-latest
+    container: archlinux:base-devel
+    steps:
+    - name: Install dependencies
+      run: |
+        pacman -Syu --noconfirm
+        pacman -S --noconfirm --needed mingw-w64-gcc cmake git nodejs meson
+    - name: Trust git repo
+      run: git config --global --add safe.directory '*'
+    - uses: actions/checkout@v4
+    - name: Cache CPM packages
+      uses: actions/cache@v4
+      with:
+        path: ${{ env.CPM_SOURCE_CACHE }}
+        key: ${{ runner.os }}-cpm-${{ hashFiles('**/vendor.cmake', '**/CMakeLists.txt') }}
+        restore-keys: ${{ runner.os }}-cpm-
+    - name: Build TracyClient
+      run: |
+        cmake -B build -DCMAKE_BUILD_TYPE=Release -DTRACY_ENABLE=ON \
+          -DCMAKE_SYSTEM_NAME=Windows \
+          -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc \
+          -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++
+        cmake --build build
+    - name: Library (meson)
+      run: |
+        cat > mingw-cross.txt << 'EOF'
+        [binaries]
+        c = '/usr/bin/x86_64-w64-mingw32-gcc'
+        cpp = '/usr/bin/x86_64-w64-mingw32-g++'
+        ar = '/usr/bin/x86_64-w64-mingw32-ar'
+        strip = '/usr/bin/x86_64-w64-mingw32-strip'
+
+        [host_machine]
+        system = 'windows'
+        cpu_family = 'x86_64'
+        cpu = 'x86_64'
+        endian = 'little'
+        EOF
+        meson setup build-meson --cross-file mingw-cross.txt -Ddefault_library=static -Dtracy_enable=true
+        meson compile -C build-meson
+    - name: Test application
+      uses: ./.github/actions/test-tracy
+      with:
+        extra_cmake_flags: >-
+          -DCMAKE_SYSTEM_NAME=Windows
+          -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc
+          -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -0,0 +1,29 @@
+name: release
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  release-build-windows:
+    uses: ./.github/workflows/windows.yml
+
+  release-build-manual:
+    uses: ./.github/workflows/latex.yml
+
+  attach-to-release:
+    needs: [release-build-windows, release-build-manual]
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/download-artifact@v4
+    - name: Create versioned zip
+      run: |
+        VERSION="${{ github.event.release.tag_name }}"
+        VERSION_NO_V="${VERSION#v}"
+        cd windows
+        zip -r ../windows-$VERSION_NO_V.zip .
+    - uses: softprops/action-gh-release@v2
+      with:
+        files: |
+          windows-*.zip
+          manual/tracy.pdf
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -1,74 +1,61 @@
-name: build
+name: windows

 on:
  push:
    branches: [ master ]
  pull_request:
    branches: [ master ]
+  workflow_dispatch:
+  workflow_call:

 env:
  CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm-cache

 jobs:
-  build:
-    strategy:
-      matrix:
-        os: [ windows-latest, macos-15 ]
-
-    runs-on: ${{ matrix.os }}
-    continue-on-error: true
+  build-windows:
+    runs-on: windows-latest
    steps:
    - uses: actions/checkout@v4
-    - if: startsWith(matrix.os, 'windows')
-      uses: microsoft/setup-msbuild@v2
-    - if: startsWith(matrix.os, 'windows')
-      uses: actions/setup-python@v2
+    - name: Cache CPM packages
+      uses: actions/cache@v4
+      with:
+        path: ${{ env.CPM_SOURCE_CACHE }}
+        key: ${{ runner.os }}-cpm-${{ hashFiles('**/vendor.cmake', '**/CMakeLists.txt') }}
+        restore-keys: ${{ runner.os }}-cpm-
+    - uses: microsoft/setup-msbuild@v2
+    - uses: actions/setup-python@v2
      with:
        python-version: '3.x'
-    - if: startsWith(matrix.os, 'windows')
-      run: pip install meson ninja
-    - if: startsWith(matrix.os, 'macos')
-      name: Install macos dependencies
-      run: brew install pkg-config glfw meson
+    - run: pip install meson ninja
    - name: Trust git repo
      run: git config --global --add safe.directory '*'
-    - name: Profiler GUI
+    - name: Build profiler
      run: |
        cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
-        cmake --build profiler/build --parallel --config Release
-    - name: Update utility
+        cmake --build profiler/build --parallel 2 --config Release
+    - name: Build update
      run: |
-        cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release
+        cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
        cmake --build update/build --parallel --config Release
-    - name: Capture utility
+    - name: Build capture
      run: |
-        cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release
+        cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
        cmake --build capture/build --parallel --config Release
-    - name: Csvexport utility
+    - name: Build csvexport
      run: |
-        cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release
+        cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
        cmake --build csvexport/build --parallel --config Release
-    - name: Import utilities
+    - name: Build import
      run: |
-        cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release
+        cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
        cmake --build import/build --parallel --config Release
-    - if: ${{ !startsWith(matrix.os, 'windows') }}
-      name: Library
-      run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib build && meson compile -C build && meson install -C build
-    - if: ${{ !startsWith(matrix.os, 'windows') }}
-      name: Find Artifacts
-      id: find_artifacts
+    - name: Build merge
      run: |
-        mkdir -p bin
-        cp profiler/build/tracy-profiler bin
-        cp update/build/tracy-update bin
-        cp capture/build/tracy-capture bin
-        cp csvexport/build/tracy-csvexport bin
-        cp import/build/tracy-import-chrome bin
-        cp import/build/tracy-import-fuchsia bin
-    - if: startsWith(matrix.os, 'windows')
-      name: Find Artifacts
-      id: find_artifacts_windows
+        cmake -B merge/build -S merge -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
+        cmake --build merge/build --parallel --config Release
+    - name: Test application
+      uses: ./.github/actions/test-tracy
+    - name: Package artifacts
      run: |
        mkdir bin
        copy profiler\build\Release\tracy-profiler.exe bin
@@ -77,7 +64,8 @@ jobs:
        copy csvexport\build\Release\tracy-csvexport.exe bin
        copy import\build\Release\tracy-import-chrome.exe bin
        copy import\build\Release\tracy-import-fuchsia.exe bin
+        copy merge\build\Release\tracy-merge.exe bin
    - uses: actions/upload-artifact@v4
      with:
-        name: ${{ matrix.os }}
+        name: windows
        path: bin
--- a/.gitignore
+++ b/.gitignore
@@ -30,7 +30,12 @@ profiler/build/win32/Tracy.aps
 extra/vswhere.exe
 extra/tracy-build
 .cache
+.uv-cache/
+.venv/
 compile_commands.json
 profiler/build/wasm/Tracy-release.*
 profiler/build/wasm/Tracy-debug.*
 profiler/build/wasm/embed.tracy
+examples/ToyPathTracer/Windows/TestCpu
+examples/ToyPathTracer/Windows/x64
+*.user
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,12 +1,12 @@
 {
-    "cmake.configureOnOpen": true,
    "cmake.sourceDirectory": [
        "${workspaceFolder}/profiler",
        "${workspaceFolder}/capture",
        "${workspaceFolder}/csvexport",
        "${workspaceFolder}/import",
+        "${workspaceFolder}/merge",
        "${workspaceFolder}/update",
-        "${workspaceFolder}/test",
+        "${workspaceFolder}/tests/tracy",
        "${workspaceFolder}",
    ],
    "cmake.buildDirectory": "${sourceDirectory}/build",
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.10)
+cmake_minimum_required(VERSION 3.13)

 # Run version helper script
 include(cmake/version.cmake)
@@ -33,6 +33,7 @@ else()
 endif()

 find_package(Threads REQUIRED)
+find_package(rocprofiler-sdk PATHS "/opt/rocm/lib/cmake")

 set(TRACY_PUBLIC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/public)

@@ -56,6 +57,10 @@ target_link_libraries(
        Threads::Threads
        ${CMAKE_DL_LIBS}
 )
+if(rocprofiler-sdk_FOUND)
+    target_compile_definitions(TracyClient PUBLIC TRACY_ROCPROF)
+    target_link_libraries(TracyClient PUBLIC rocprofiler-sdk::rocprofiler-sdk)
+endif()

 if(TRACY_Fortran)
    add_library(TracyClientF90 ${TRACY_VISIBILITY} "${TRACY_PUBLIC_DIR}/TracyClient.F90")
@@ -73,7 +78,8 @@ endif()

 # Public dependency on some libraries required when using Mingw
 if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} MATCHES "GNU|Clang")
-    target_link_libraries(TracyClient PUBLIC ws2_32 dbghelp)
+    target_link_libraries(TracyClient PUBLIC ws2_32 dbghelp secur32)
+    target_compile_definitions(TracyClient PUBLIC WINVER=0x0A00 _WIN32_WINNT=0x0A00)
 endif()

 if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
@@ -100,53 +106,58 @@ if(TRACY_Fortran)
    add_library(Tracy::TracyClient_Fortran ALIAS TracyClientF90)
 endif()

-macro(set_option option help value)
-    option(${option} ${help} ${value})
-    if(${option})
-        message(STATUS "${option}: ON")
-        target_compile_definitions(TracyClient PUBLIC ${option})
-    else()
-        message(STATUS "${option}: OFF")
-    endif()
-endmacro()
+include(cmake/options.cmake)

-set_option(TRACY_ENABLE "Enable profiling" ON)
-set_option(TRACY_ON_DEMAND "On-demand profiling" OFF)
-set_option(TRACY_CALLSTACK "Enforce callstack collection for tracy regions" OFF)
-set_option(TRACY_NO_CALLSTACK "Disable all callstack related functionality" OFF)
-set_option(TRACY_NO_CALLSTACK_INLINES "Disables the inline functions in callstacks" OFF)
-set_option(TRACY_ONLY_LOCALHOST "Only listen on the localhost interface" OFF)
-set_option(TRACY_NO_BROADCAST "Disable client discovery by broadcast to local network" OFF)
-set_option(TRACY_ONLY_IPV4 "Tracy will only accept connections on IPv4 addresses (disable IPv6)" OFF)
-set_option(TRACY_NO_CODE_TRANSFER "Disable collection of source code" OFF)
-set_option(TRACY_NO_CONTEXT_SWITCH "Disable capture of context switches" OFF)
-set_option(TRACY_NO_EXIT "Client executable does not exit until all profile data is sent to server" OFF)
-set_option(TRACY_NO_SAMPLING "Disable call stack sampling" OFF)
-set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF)
-set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF)
-set_option(TRACY_NO_FRAME_IMAGE  "Disable the frame image support and its thread" OFF)
-set_option(TRACY_NO_SYSTEM_TRACING  "Disable systrace sampling" OFF)
-set_option(TRACY_PATCHABLE_NOPSLEDS  "Enable nopsleds for efficient patching by system-level tools (e.g. rr)" OFF)
-set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF)
-set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF)
-set_option(TRACY_FIBERS "Enable fibers support" OFF)
-set_option(TRACY_NO_CRASH_HANDLER "Disable crash handling" OFF)
-set_option(TRACY_TIMER_FALLBACK "Use lower resolution timers" OFF)
-set_option(TRACY_LIBUNWIND_BACKTRACE "Use libunwind backtracing where supported" OFF)
-set_option(TRACY_SYMBOL_OFFLINE_RESOLVE "Instead of full runtime symbol resolution, only resolve the image path and offset to enable offline symbol resolution" OFF)
-set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF)
-set_option(TRACY_DEBUGINFOD "Enable debuginfod support" OFF)
+set_option(TRACY_ENABLE "Enable profiling" OFF TracyClient)
+set_option(TRACY_ON_DEMAND "On-demand profiling" OFF TracyClient)
+set_option_value(TRACY_CALLSTACK "Override the callstack collection depth for tracy zones" "" TracyClient)
+set_option_value_as_string(TRACY_PLATFORM_HEADER "Path to a header providing TRACY_HAS_CUSTOM_* hooks for an unsupported platform" "" TracyClient)
+set_option(TRACY_NO_CALLSTACK "Disable all callstack related functionality" OFF TracyClient)
+set_option(TRACY_NO_CALLSTACK_INLINES "Disables the inline functions in callstacks" OFF TracyClient)
+set_option(TRACY_ONLY_LOCALHOST "Only listen on the localhost interface" OFF TracyClient)
+set_option(TRACY_NO_BROADCAST "Disable client discovery by broadcast to local network" OFF TracyClient)
+set_option(TRACY_ONLY_IPV4 "Tracy will only accept connections on IPv4 addresses (disable IPv6)" OFF TracyClient)
+set_option(TRACY_NO_CODE_TRANSFER "Disable collection of source code" OFF TracyClient)
+set_option(TRACY_NO_CONTEXT_SWITCH "Disable capture of context switches" OFF TracyClient)
+set_option(TRACY_NO_EXIT "Client executable does not exit until all profile data is sent to server" OFF TracyClient)
+set_option(TRACY_NO_SAMPLING "Disable call stack sampling" OFF TracyClient)
+set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF TracyClient)
+set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF TracyClient)
+set_option(TRACY_NO_FRAME_IMAGE "Disable the frame image support and its thread" OFF TracyClient)
+set_option(TRACY_NO_SYSTEM_TRACING "Disable systrace sampling" OFF TracyClient)
+set_option(TRACY_PATCHABLE_NOPSLEDS "Enable nopsleds for efficient patching by system-level tools (e.g. rr)" OFF TracyClient)
+set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF TracyClient)
+set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF TracyClient)
+set_option(TRACY_FIBERS "Enable fibers support" OFF TracyClient)
+set_option(TRACY_NO_CRASH_HANDLER "Disable crash handling" OFF TracyClient)
+set_option(TRACY_TIMER_FALLBACK "Use lower resolution timers" OFF TracyClient)
+set_option(TRACY_DISALLOW_HW_TIMER "Disallow hardware timer (may be useful on VMs). Requires TRACY_TIMER_FALLBACK=ON" OFF TracyClient)
+set_option(TRACY_LIBUNWIND_BACKTRACE "Use libunwind backtracing where supported" OFF TracyClient)
+set_option(TRACY_SYMBOL_OFFLINE_RESOLVE "Instead of full runtime symbol resolution, only resolve the image path and offset to enable offline symbol resolution" OFF TracyClient)
+set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF TracyClient)
+set_option(TRACY_DEBUGINFOD "Enable debuginfod support" OFF TracyClient)
+set_option(TRACY_IGNORE_MEMORY_FAULTS "Ignore instrumentation errors from memory free events that do not have a matching allocation" OFF TracyClient)
+set_option(TRACY_OPENGL_AUTO_CALIBRATION "Periodically recalibrate OpenGL GPU/CPU clock drift (forces a CPU/GPU sync each time)" OFF TracyClient)

 # advanced
-set_option(TRACY_VERBOSE "[advanced] Verbose output from the profiler" OFF)
+set_option(TRACY_VERBOSE "[advanced] Verbose output from the profiler" OFF TracyClient)
 mark_as_advanced(TRACY_VERBOSE)
-set_option(TRACY_DEMANGLE "[advanced] Don't use default demangling function - You'll need to provide your own" OFF)
+set_option(TRACY_NO_INTERNAL_MESSAGE "[advanced] Prevent the profiler from logging messages" OFF TracyClient)
+mark_as_advanced(TRACY_NO_INTERNAL_MESSAGE)
+set_option(TRACY_DEMANGLE "[advanced] Don't use default demangling function - You'll need to provide your own" OFF TracyClient)
 mark_as_advanced(TRACY_DEMANGLE)
+if(rocprofiler-sdk_FOUND)
+    set_option(TRACY_ROCPROF_CALIBRATION "[advanced] Use continuous calibration of the Rocprof GPU time." OFF TracyClient)
+    mark_as_advanced(TRACY_ROCPROF_CALIBRATION)
+endif()

 # handle incompatible combinations
 if(TRACY_MANUAL_LIFETIME AND NOT TRACY_DELAYED_INIT)
    message(FATAL_ERROR "TRACY_MANUAL_LIFETIME can not be activated with disabled TRACY_DELAYED_INIT")
 endif()
+if(TRACY_DISALLOW_HW_TIMER AND NOT TRACY_TIMER_FALLBACK)
+    message(FATAL_ERROR "TRACY_DISALLOW_HW_TIMER can not be activated with disabled TRACY_TIMER_FALLBACK")
+endif()

 if(NOT TRACY_STATIC)
    target_compile_definitions(TracyClient PRIVATE TRACY_EXPORTS)
@@ -186,6 +197,7 @@ set(client_includes
    ${TRACY_PUBLIC_DIR}/client/TracyDxt1.hpp
    ${TRACY_PUBLIC_DIR}/client/TracyFastVector.hpp
    ${TRACY_PUBLIC_DIR}/client/TracyLock.hpp
+    ${TRACY_PUBLIC_DIR}/client/TracyMangle.hpp
    ${TRACY_PUBLIC_DIR}/client/TracyProfiler.hpp
    ${TRACY_PUBLIC_DIR}/client/TracyRingBuffer.hpp
    ${TRACY_PUBLIC_DIR}/client/TracyScoped.hpp
@@ -209,21 +221,22 @@ set(common_includes
    ${TRACY_PUBLIC_DIR}/common/TracySocket.hpp
    ${TRACY_PUBLIC_DIR}/common/TracyStackFrames.hpp
    ${TRACY_PUBLIC_DIR}/common/TracySystem.hpp
-    ${TRACY_PUBLIC_DIR}/common/TracyUwp.hpp
+    ${TRACY_PUBLIC_DIR}/common/TracyTaggedUserlandAddress.hpp
+    ${TRACY_PUBLIC_DIR}/common/TracyWinFamily.hpp
    ${TRACY_PUBLIC_DIR}/common/TracyYield.hpp)

 install(TARGETS TracyClient
        EXPORT TracyConfig
-        RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR}
-        LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-        ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR}/$<IF:$<CONFIG:Release>,,$<CONFIG>>
+        LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}/$<IF:$<CONFIG:Release>,,$<CONFIG>>
+        ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}/$<IF:$<CONFIG:Release>,,$<CONFIG>>
        COMPONENT lib)
 if(TRACY_Fortran)
    install(TARGETS TracyClientF90
            EXPORT TracyConfig
-            RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR}
-            LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-            ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+            RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR}/$<IF:$<CONFIG:Release>,,$<CONFIG>>
+            LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}/$<IF:$<CONFIG:Release>,,$<CONFIG>>
+            ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}/$<IF:$<CONFIG:Release>,,$<CONFIG>>
            COMPONENT lib)
 endif()
 # Export targets to build tree root
@@ -271,3 +284,7 @@ if(TRACY_CLIENT_PYTHON)

    add_subdirectory(python)
 endif()
+
+if(PROJECT_IS_TOP_LEVEL)
+    set(CMAKE_COLOR_DIAGNOSTICS ON)
+endif()
--- a/2
+++ b/2
@@ -1,7 +1,7 @@
 Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the
 3-clause BSD license.

-Copyright (c) 2017-2025, Bartosz Taudul <wolf@nereid.pl>
+Copyright (c) 2017-2026, Bartosz Taudul <wolf@nereid.pl>
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
--- a/278
+++ b/278
@@ -2,6 +2,284 @@ Note: There is no guarantee that version mismatched client and server will
 be able to talk with each other. Network protocol breakages won't be listed
 here.

+vx.xx.x (2026-xx-xx)
+--------------------
+
+- API break: removed "secure" variants of memory alloc and free macros. The
+  secure code path is now always enabled. Migrate by removing "Secure" from
+  the macros you use, e.g. TracySecureAlloc(...) -> TracyAlloc(...).
+- Added tracy-capture-daemon for automated multi-client trace capture.
+- Added tracy-merge utility for combining multiple trace files into one.
+- Added support for Windows on ARM64 with MSVC.
+- Added support for WebGPU.
+- Trace-specific settings storage has been completely overhauled. It is now
+  possible to make the settings sidecar file public, saved next to the trace
+  file.
+- External frames are now omitted in the single-line call stack list visible
+  in messages list, or in memory allocation info window.
+- External frames are now hidden by default in various contexts where they
+  were previously enabled:
+  - Flame graph window.
+  - Call stack window.
+  - Statistics window (sampling mode).
+- External frames are now dimmed out in call stacks in various parts of UI.
+- Single-line call stacks now have ellipsis at the end, if there are frames
+  remaining.
+- System tracing on Windows has been refactored to be more robust.
+- Tracing on Arm macOS will now have more precise timer readings.
+- Extended CUDA support to track some previously missing memory operations.
+- Added support for setting message's source and severity, through the
+  TracyLogString macro.
+- The "zone trace" list in zone information window has been removed. It was
+  never convenient to use properly. It was replaced by "parent zones" list,
+  which is basically a less convoluted equivalent.
+- Added inline call stack list to the zone information window.
+  - The "call stack" button for opening the call stack in a separate window
+    is still there.
+- Call stacks are now also displayed in zone tooltips (single-line).
+- Implemented heuristic reconstruction of zone call stacks for zones that
+  were captured without call stacks.
+  - Requires sampling to be enabled, and at least one sample in the zone
+    extent.
+  - Since this is a heuristic, the result can be wrong.
+  - The reconstructed call stack can be displayed in the zone tooltip and in
+    the inline call stack view in zone information window.
+  - Reconstructed call stacks are indicated with the "magic wand" icon.
+- Various LLM integration improvements.
+  - The protocol has been updated to use model templates. As a result, tools
+    are now specified in a common way and the reasoning is performed in a
+    separate content stream.
+  - Several new tools were added, which in concert enable the assistant to
+    answer very general questions, such as "how to optimize this program?".
+  - Smaller models are now viable to use. Models as small as 4B parameters do
+    now work really well. You can run such models on virtually all hardware.
+  - Added horizontal scroll bars to code segments.
+  - LLM thinking regions are now hidden by default.
+    - The assistant may notify the user about its current findings, then
+      resume thinking, after which it may give a more complete answer. In
+      such cases, the initial part of the reply will be faded out.
+  - Sampled execution costs are now included in assembly attachments.
+  - Source code retrieval now has an optional line context parameter.
+  - Added ability to search the code for keywords.
+  - Calls in assembly attachments are now annotated with function names.
+  - Wikipedia search will now return 10 results, not only the top one.
+  - Brave search engine is now available as an alternative web search option.
+  - Added emoji font.
+  - Maximum tool reply size has been tweaked to better work with larger
+    contexts.
+  - Tool reply size limit is now configurable in LLM settings.
+  - Tool reply eviction logic for context management has been adjusted to
+    better work with larger contexts. Additional logic was added to prefer
+    eviction of old responses.
+  - Certain LLM actions want to run in a fast mode, with reasoning disabled.
+    In most scenarios the default chat model will have to do here. If you
+    have the memory to spare, you can optionally load two models at once,
+    setting the "fast" model to a smaller and much quicker one.
+  - Chat topic description is now provided, based on the first user question.
+  - Each assistant reply is now labeled with used model and reply time.
+  - Follow-up questions can be automatically suggested.
+- Expanded LLM attachments.
+  - You can now attach complete symbol assembly.
+  - Entry call stacks can be now attached (previously it was only regular
+    call stacks).
+  - Crash call stack attachments are now annotated with crash info.
+  - Source code can be attached (also with execution costs in symbol view).
+  - Zone histogram data can be attached for analysis.
+- Markdown renderer improvements.
+  - Tables are now properly rendered.
+  - Tasklist rendering has been implemented.
+  - Strikethrough is now supported.
+  - Clickable links are now underlined.
+- Tweaked high-resolution scroll handling on Wayland.
+- Touchpad gestures on the timeline now either scroll or zoom, but not both
+  simultaneously.
+- Full user name is now stored in trace info.
+- External functions can be filtered out in the sampling statistics view.
+- Tweaked external paths heuristics.
+  - Check for both 64-bit and 32-bit versions of Program Files directory.
+  - Hidden unix files and directories are now also considered external. For
+    example: $(HOME)/.cache/cpm/somelib/file.h.
+- Call stack window can now provide LLM summaries.
+  - These summaries can be performed automatically. Enable in LLM settings.
+- The capture utility is now displaying query backlog, just like the profiler
+  GUI.
+- Lua source locations that are script code will now have newlines removed.
+  This is a capture-time change, so previously captured broken Lua source
+  locations won't be fixed.
+- Call stack window will now display notification if viewing a crash call
+  stack.
+- Removal of Tracy crash handler stack from the reported crash call stack
+  should now work again on Linux.
+- In disassembly line view, source file names are now displayed instead of
+  "unknown", in case the source line number is not known.
+- Trace host info is now properly formatted.
+- It is now possible to sort the order of threads on the timeline ("visible
+  threads" in trace settings).
+- Added clipboard support to emscripten backend.
+- Added TRACY_DISALLOW_HW_TIMER define for virtualized environments and WSL2,
+  which may not have reliable access to hardware timer registers. Falls back
+  to standard library timer with reduced resolution.
+- Fixed DPI scaling on macOS.
+- Thread names are no longer truncated to 15 characters on Apple.
+- Executable path is now inspected when looking for PDB files on Windows.
+- Added ___tracy_get_time() C API as an equivalent for Profiler::GetTime().
+- D3D12 instrumentation improvements.
+- CUDA instrumentation improvements.
+- Properly set API visibility attributes on MSVC + clang.
+- Fixed regression in data sorting algorithm that could cause broken (going
+  back in time) plots. A retroactive fix is included for previously broken
+  traces.
+- All tools provided by the project now report the version number and git sha
+  revision on the command line help output.
+- Microarchitectural data has been updated to include the latest uops.info
+  measurements.
+- Added validation check for SymSrv.dll on Windows.
+- Various CMake options are now available to control optional build settings:
+  - NO_LTO disables link-time optimization.
+  - NO_MOLD_LINKER disables use of the mold linker.
+  - NO_CCACHE disables use of ccache (compiler cache).
+- The Tracy library now has TRACY_ENABLED unset by default in the CMake and
+  Meson default configurations. This now matches what the documentation was
+  always saying. Some build setups may need updating.
+- Adjust to max sampling rate on Linux.
+- Further improvements to tracefs mount path discovery robustness.
+- Macro mismatch detection between Tracy configuration and client code.
+  - Tracy client build settings change the ABI of the library.
+  - Mismatched versions will break at linking.
+  - As a reminder, Tracy *always* required using the same set of compilation
+    options for the entire program.
+- Message windows will now properly show full message in a tooltip for
+  multi-line messages.
+- Greatly improved the in-profiler user manual.
+  - There is now chapter tree and the manual contents are displayed section
+    by section.
+  - Links to chapters are now properly working.
+  - The "bclogo" blocks are now correctly processed and displayed as proper
+    admonitions.
+  - The font awesome icons now show as in the rest of the UI.
+  - Footnotes are now rendered as proper footnotes.
+  - Tables are now rendered as intended.
+  - LaTeX math is now converted to readable form.
+  - Added a button to download the full PDF manual to the user manual window.
+- Call stack window will now show the thread viewed call stack originates
+  from (if possible).
+- "Visible threads" checkboxes in messages, flame graph and wait stacks
+  windows are now displayed in multiple columns, and the maximum number of
+  visible rows is limited, with fallback to scrollable view.
+- Improved child call distribution list in the symbol view window.
+  - The visible area can be now resized horizontally.
+  - The list is now displayed as a table with resizable columns, etc.
+  - Child calls time percentage is now shown as a percentage of calls (as
+    it was before), and also as a percentage of total symbol time.
+- Child calls time is now also displayed as a percentage.
+- Prototype implementation of system tracing on Apple devices.
+- Local (inline) call stack printouts were added to tooltips in statistics
+  window, in sampling mode.
+- Ironed out some code corners to make integration of closed gaming console
+  platforms easier. Added support for custom platform headers.
+- Bottom and top sample trees (in wait stacks, or in entry call stacks)
+  now display aggregation counts if "group by function name" is enabled.
+- HW sample view in symbol view are now disabled by default.
+- The profiler can no longer be built with the statistics disabled.
+- Fixed NVCC builds.
+- Fixed possible lockups in Vulkan timer calibration loop.
+- The flame graph view now supports zooming in and panning with the mouse.
+- General application crash information polish in the profiler UI.
+- The achievements system has been converted to use markdown renderer.
+- Offline symbol resolution with the update utility now supports custom
+  addr2line-compatible tools via -a and -A command line parameters.
+  Additionally, it is now possible to reset all call stack frame symbols to
+  unresolved with the -R parameter.
+- Periodic recalibration of the clock drift in OpenGL contexts can be enabled
+  with the TRACY_OPENGL_AUTO_CALIBRATION compilation define. Note that this
+  requires a full CPU/GPU sync on each calibration event. These events will
+  not fire more often than once every second.
+- Added missing C API for shared locks.
+- Implemented semi-unique, nonsense random name generator.
+  - Can be used to set a trace description.
+  - Will be used to provide default description for newly added annotations.
+- Polished look and feel of annotation regions on the timeline.
+
+
+v0.13.1 (2025-12-11)
+--------------------
+
+- Fixed parsing of extended model and family of x86 CPUID.
+- Fixed memory corruption when a "long" user name was used on Android.
+- Fixed wrong function signature when TRACY_DEBUGINFOD was enabled.
+- Mount list is now read using proper API instead of processing /proc/mounts.
+- Fixed shadow warning supression not being enabled on gcc.
+- Silently ignore lost ETW Vsync events instead of asserting.
+- Worked around few cases where old macOS machines do not support C++20
+  properly. Thanks Tim Apple!
+- Added truncated mean parameter to csvexport.
+- Added experimental viewer for the user manual.
+- Memory free faults can be now ignored with the TRACY_IGNORE_MEMORY_FAULTS
+  option.
+- Fixed race condition during profiler shutdown.
+
+
+v0.13.0 (2025-11-11)
+--------------------
+
+- Added optional LLM integration.
+  - Can be completely disabled in options.
+  - Requires you to provide a local LLM service.
+  - Can be used to retrieve information from the user manual.
+  - Can answer queries about application call stacks, assembly code, other
+    general questions.
+  - Will refer to network resources to obtain information.
+  - The required setup is detailed in the user manual.
+- Added support for Microsoft Game Development Kit (GDK).
+- Added support for ROCm / Rocprof.
+- Default values for certain settings can be now saved in the options
+  window.
+- The display height of any timeline thread can be limited with a thread
+  cropper widget at the left border of the screen.
+- System tracing is now stopped when the profiled program wants to exit.
+- System tracing can be now enabled and disabled by the profile program.
+- Added support for host query reset when collecting Vulkan traces.
+- The find zone statistics now also show P99 and P99.9.
+- Timeline for a thread will no longer hide if there are no zones to show,
+  but samples are visible.
+- Fixed problems with Wayland integration.
+  - Proper order of operations is now ensured during initialization.
+  - The window size calculations for fractional scaling are now done
+    correctly.
+- The Linux tracefs mount path is now properly detected, instead of relying
+  on a hardcoded value.
+- Fixed LockMark macro expansion.
+- Fixed invalid reported fiber enter time.
+- Properly handle fiber enter and leave events in the on demand mode.
+- Removed calibration of queue delay time. It served no real purpose.
+- Various improvements have been made to speed up symbol and executable
+  image queries.
+- Exposed internal mutex variable in Lockable and SharedLockable.
+- Fixed problems with Linux systems that do not use glibc.
+- Fixed edge case that could corrupt rpmalloc state in the profiled
+  application.
+- Extended ZoneNameF macro with compiler checks for proper printf args.
+- Warnings about variable redefinition by nested zone macros are now
+  supressed by default. The old behavior can be restored by adding the
+  TRACY_ALLOW_SHADOW_WARNING define during compilation of your program.
+- Fixed window icon and dock integration on macOS.
+- Fixed edge case with symbols thread not behaving as expected when on
+  demand mode was used and a rapid reconnection was made.
+- Properly defer GPU context events in serial C API.
+
+
+v0.12.2 (2025-06-25)
+--------------------
+
+- Fixed builds made out of git checkout directory.
+- Added range limits for flame graph.
+- Fixed wayland include paths for distros that use non-standard package
+  layouts.
+- Workarounded MinGW build problems. Safe symbol retrieval is not available
+  on this platform.
+- Fixed Lua bindings when TRACY_NO_CALLSTACK is defined.
+
+
 v0.12.1 (2025-06-07)
 --------------------

--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@

 ### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.

-Tracy supports profiling CPU (Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/tealsnow/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL, CUDA.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
+Tracy supports profiling CPU (Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/tealsnow/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphics/compute APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL, CUDA, WebGPU.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.

 - [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
 - [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
--- a/capture/CMakeLists.txt
+++ b/capture/CMakeLists.txt
@@ -1,7 +1,6 @@
 cmake_minimum_required(VERSION 3.16)

-option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
-option(NO_STATISTICS "Disable calculation of statistics" ON)
+set(NO_STATISTICS ON)

 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)

@@ -16,13 +15,22 @@ project(
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/GitRef.cmake)

 set(PROGRAM_FILES
    src/capture.cpp
+    src/CaptureOutput.cpp
 )

 add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
+add_git_ref(${PROJECT_NAME})
 target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
 set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})

-install(TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
+install(TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+add_executable(tracy-capture-daemon src/capturedaemon.cpp src/CaptureOutput.cpp ${COMMON_FILES} ${SERVER_FILES})
+add_git_ref(tracy-capture-daemon)
+target_link_libraries(tracy-capture-daemon PRIVATE TracyServer TracyGetOpt)
+
+install(TARGETS tracy-capture-daemon DESTINATION ${CMAKE_INSTALL_BINDIR})
--- a/capture/src/CaptureOutput.cpp
+++ b/capture/src/CaptureOutput.cpp
@@ -0,0 +1,202 @@
+#ifdef _WIN32
+#  include <io.h>
+#  include <windows.h>
+#else
+#  include <unistd.h>
+#endif
+
+#include <atomic>
+#include <chrono>
+#include <cstdarg>
+#include <cstdio>
+#include <cstring>
+#include <inttypes.h>
+#include <thread>
+
+#include "CaptureOutput.hpp"
+#include "../../public/common/TracyProtocol.hpp"
+#include "../../public/common/TracyStackFrames.hpp"
+#include "../../server/TracyMemory.hpp"
+#include "../../server/TracyPrint.hpp"
+#include "../../server/TracyWorker.hpp"
+
+static bool s_isTerminal = false;
+
+void InitTerminalDetection()
+{
+#ifdef _WIN32
+    s_isTerminal = _isatty( fileno( stdout ) );
+#else
+    s_isTerminal = isatty( fileno( stdout ) );
+#endif
+}
+
+bool IsTerminal()
+{
+    return s_isTerminal;
+}
+
+void AnsiPrintf( const char* ansiEscape, const char* format, ... )
+{
+    if( IsTerminal() )
+    {
+        char buf[256];
+        va_list args;
+        va_start( args, format );
+        vsnprintf( buf, sizeof buf, format, args );
+        va_end( args );
+        printf( "%s%s" ANSI_RESET, ansiEscape, buf );
+    }
+    else
+    {
+        va_list args;
+        va_start( args, format );
+        vfprintf( stdout, format, args );
+        va_end( args );
+    }
+}
+
+int WaitForConnection( tracy::Worker& worker )
+{
+    while( !worker.HasData() )
+    {
+        const auto handshake = worker.GetHandshakeStatus();
+        if( handshake == tracy::HandshakeProtocolMismatch )
+        {
+            printf( "\nThe client you are trying to connect to uses incompatible protocol version.\nMake sure you are using the same Tracy version on both client and server.\n" );
+            return 1;
+        }
+        if( handshake == tracy::HandshakeNotAvailable )
+        {
+            printf( "\nThe client you are trying to connect to is no longer able to sent profiling data,\nbecause another server was already connected to it.\nYou can do the following:\n\n  1. Restart the client application.\n  2. Rebuild the client application with on-demand mode enabled.\n" );
+            return 2;
+        }
+        if( handshake == tracy::HandshakeDropped )
+        {
+            printf( "\nThe client you are trying to connect to has disconnected during the initial\nconnection handshake. Please check your network configuration.\n" );
+            return 3;
+        }
+        std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
+    }
+    return 0;
+}
+
+void PrintWorkerFailure( tracy::Worker& worker )
+{
+    const auto& failure = worker.GetFailureType();
+    if( failure == tracy::Worker::Failure::None ) return;
+
+    AnsiPrintf( ANSI_RED ANSI_BOLD, "\nInstrumentation failure: %s", tracy::Worker::GetFailureString( failure ) );
+    auto& fd = worker.GetFailureData();
+    if( !fd.message.empty() )
+    {
+        printf( "\nContext: %s", fd.message.c_str() );
+    }
+    if( fd.callstack != 0 )
+    {
+        AnsiPrintf( ANSI_BOLD, "\nFailure callstack:\n" );
+        auto& cs = worker.GetCallstack( fd.callstack );
+        int fidx = 0;
+        for( auto& entry : cs )
+        {
+            auto frameData = worker.GetCallstackFrame( entry );
+            if( !frameData )
+            {
+                printf( "%3i. %p\n", fidx++, (void*)worker.GetCanonicalPointer( entry ) );
+            }
+            else
+            {
+                const auto fsz = frameData->size;
+                for( uint8_t f = 0; f < fsz; f++ )
+                {
+                    const auto& frame = frameData->data[f];
+                    auto txt = worker.GetString( frame.name );
+
+                    if( fidx == 0 && f != fsz - 1 )
+                    {
+                        auto test = tracy::s_tracyStackFrames;
+                        bool match = false;
+                        do
+                        {
+                            if( strcmp( txt, *test ) == 0 )
+                            {
+                                match = true;
+                                break;
+                            }
+                        }
+                        while( *++test );
+                        if( match ) continue;
+                    }
+
+                    if( f == fsz - 1 )
+                    {
+                        printf( "%3i. ", fidx++ );
+                    }
+                    else
+                    {
+                        AnsiPrintf( ANSI_BLACK ANSI_BOLD, "inl. " );
+                    }
+                    AnsiPrintf( ANSI_CYAN, "%s  ", txt );
+                    txt = worker.GetString( frame.file );
+                    if( frame.line == 0 )
+                    {
+                        AnsiPrintf( ANSI_YELLOW, "(%s)", txt );
+                    }
+                    else
+                    {
+                        AnsiPrintf( ANSI_YELLOW, "(%s:%" PRIu32 ")", txt, frame.line );
+                    }
+                    if( frameData->imageName.Active() )
+                    {
+                        AnsiPrintf( ANSI_MAGENTA, " %s\n", worker.GetString( frameData->imageName ) );
+                    }
+                    else
+                    {
+                        printf( "\n" );
+                    }
+                }
+            }
+        }
+    }
+}
+
+void PrintCaptureProgress( tracy::Worker& worker, int64_t firstTime, int64_t memoryLimit )
+{
+    if( !IsTerminal() ) return;
+
+    auto& lock = worker.GetMbpsDataLock();
+    lock.lock();
+    const auto mbps = worker.GetMbpsData().back();
+    const auto compRatio = worker.GetCompRatio();
+    const auto netTotal = worker.GetDataTransferred();
+    const auto queueSize = worker.GetSendQueueSize();
+    lock.unlock();
+
+    const char* unit = "Mbps";
+    float unitsPerMbps = 1.f;
+    if( mbps < 0.1f )
+    {
+        unit = "Kbps";
+        unitsPerMbps = 1000.f;
+    }
+    AnsiPrintf( ANSI_ERASE_LINE ANSI_CYAN ANSI_BOLD, "\r%7.2f %s", mbps * unitsPerMbps, unit );
+    printf( " /" );
+    AnsiPrintf( ANSI_CYAN ANSI_BOLD, "%5.1f%%", compRatio * 100.f );
+    printf( " =" );
+    AnsiPrintf( ANSI_YELLOW ANSI_BOLD, "%7.2f Mbps", mbps / compRatio );
+    printf( " | " );
+    AnsiPrintf( ANSI_YELLOW, "Tx: " );
+    AnsiPrintf( ANSI_GREEN, "%s", tracy::MemSizeToString( netTotal ) );
+    printf( " | " );
+    AnsiPrintf( ANSI_RED ANSI_BOLD, "%s", tracy::MemSizeToString( tracy::memUsage.load( std::memory_order_relaxed ) ) );
+    if( memoryLimit > 0 )
+    {
+        printf( " / " );
+        AnsiPrintf( ANSI_BLUE ANSI_BOLD, "%s", tracy::MemSizeToString( memoryLimit ) );
+    }
+    printf( " | " );
+    AnsiPrintf( ANSI_RED, "%s", tracy::TimeToString( worker.GetLastTime() - firstTime ) );
+    printf( " | " );
+    AnsiPrintf( ANSI_RED ANSI_BOLD, "%s query backlog", tracy::RealToString( queueSize ) );
+    fflush( stdout );
+}
--- a/capture/src/CaptureOutput.hpp
+++ b/capture/src/CaptureOutput.hpp
@@ -0,0 +1,33 @@
+#ifndef __CAPTUREOUTPUT_HPP__
+#define __CAPTUREOUTPUT_HPP__
+
+#include <stdint.h>
+
+#define ANSI_RESET "\033[0m"
+#define ANSI_BOLD "\033[1m"
+#define ANSI_BLACK "\033[30m"
+#define ANSI_RED "\033[31m"
+#define ANSI_GREEN "\033[32m"
+#define ANSI_YELLOW "\033[33m"
+#define ANSI_BLUE "\033[34m"
+#define ANSI_MAGENTA "\033[35m"
+#define ANSI_CYAN "\033[36m"
+#define ANSI_ERASE_LINE "\033[2K"
+
+namespace tracy { class Worker; }
+
+void InitTerminalDetection();
+bool IsTerminal();
+
+#ifdef __GNUC__
+[[gnu::format( __printf__, 2, 3 )]]
+#endif
+void AnsiPrintf( const char* ansiEscape, const char* format, ... );
+
+int WaitForConnection( tracy::Worker& worker );
+
+void PrintWorkerFailure( tracy::Worker& worker );
+
+void PrintCaptureProgress( tracy::Worker& worker, int64_t firstTime, int64_t memoryLimit );
+
+#endif
--- a/capture/src/capture.cpp
+++ b/capture/src/capture.cpp
@@ -1,6 +1,5 @@
 #ifdef _WIN32
 #  include <windows.h>
-#  include <io.h>
 #else
 #  include <unistd.h>
 #endif
@@ -8,21 +7,20 @@
 #include <atomic>
 #include <chrono>
 #include <inttypes.h>
-#include <mutex>
 #include <signal.h>
-#include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/stat.h>

-#include "../../public/common/TracyProtocol.hpp"
-#include "../../public/common/TracyStackFrames.hpp"
 #include "../../server/TracyFileWrite.hpp"
-#include "../../server/TracyMemory.hpp"
 #include "../../server/TracyPrint.hpp"
 #include "../../server/TracySysUtil.hpp"
 #include "../../server/TracyWorker.hpp"
+#include "../../public/common/TracyVersion.hpp"
+#include "GitRef.hpp"
+
+#include "CaptureOutput.hpp"

 #ifdef _WIN32
 #  include "../../getopt/getopt.h"
@@ -38,60 +36,12 @@ static std::atomic<bool> s_disconnect { false };

 void SigInt( int )
 {
-    // Relaxed order is closest to a traditional `volatile` write.
-    // We don't need stronger ordering since this signal handler doesn't do
-    // anything else that would need to be ordered relatively to this.
    s_disconnect.store(true, std::memory_order_relaxed);
 }

-static bool s_isStdoutATerminal = false;
-
-void InitIsStdoutATerminal() {
-#ifdef _WIN32
-    s_isStdoutATerminal = _isatty( fileno( stdout ) );
-#else
-    s_isStdoutATerminal = isatty( fileno( stdout ) );
-#endif
-}
-
-bool IsStdoutATerminal() { return s_isStdoutATerminal; }
-
-#define ANSI_RESET "\033[0m"
-#define ANSI_BOLD "\033[1m"
-#define ANSI_BLACK "\033[30m"
-#define ANSI_RED "\033[31m"
-#define ANSI_GREEN "\033[32m"
-#define ANSI_YELLOW "\033[33m"
-#define ANSI_BLUE "\033[34m"
-#define ANSI_MAGENTA "\033[35m"
-#define ANSI_CYAN "\033[36m"
-#define ANSI_ERASE_LINE "\033[2K"
-
-// Like printf, but if stdout is a terminal, prepends the output with
-// the given `ansiEscape` and appends ANSI_RESET.
-void AnsiPrintf( const char* ansiEscape, const char* format, ... ) {
-    if( IsStdoutATerminal() )
-    {
-        // Prepend ansiEscape and append ANSI_RESET.
-        char buf[256];
-        va_list args;
-        va_start( args, format );
-        vsnprintf( buf, sizeof buf, format, args );
-        va_end( args );
-        printf( "%s%s" ANSI_RESET, ansiEscape, buf );
-    }
-    else
-    {
-        // Just a normal printf.
-        va_list args;
-        va_start( args, format );
-        vfprintf( stdout, format, args );
-        va_end( args );
-    }
-}
-
 [[noreturn]] void Usage()
 {
+    printf( "tracy-capture %i.%i.%i / %s\n\n", tracy::Version::Major, tracy::Version::Minor, tracy::Version::Patch, tracy::GitRef );
    printf( "Usage: capture -o output.tracy [-a address] [-p port] [-f] [-s seconds] [-m memlimit]\n" );
    exit( 1 );
 }
@@ -106,7 +56,7 @@ int main( int argc, char** argv )
    }
 #endif

-    InitIsStdoutATerminal();
+    InitTerminalDetection();

    bool overwrite = false;
    const char* address = "127.0.0.1";
@@ -165,27 +115,9 @@ int main( int argc, char** argv )
    printf( "Connecting to %s:%i...", address, port );
    fflush( stdout );
    tracy::Worker worker( address, port, memoryLimit );
-    while( !worker.HasData() )
-    {
-        const auto handshake = worker.GetHandshakeStatus();
-        if( handshake == tracy::HandshakeProtocolMismatch )
-        {
-            printf( "\nThe client you are trying to connect to uses incompatible protocol version.\nMake sure you are using the same Tracy version on both client and server.\n" );
-            return 1;
-        }
-        if( handshake == tracy::HandshakeNotAvailable )
-        {
-            printf( "\nThe client you are trying to connect to is no longer able to sent profiling data,\nbecause another server was already connected to it.\nYou can do the following:\n\n  1. Restart the client application.\n  2. Rebuild the client application with on-demand mode enabled.\n" );
-            return 2;
-        }
-        if( handshake == tracy::HandshakeDropped )
-        {
-            printf( "\nThe client you are trying to connect to has disconnected during the initial\nconnection handshake. Please check your network configuration.\n" );
-            return 3;
-        }
-        std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
-    }
-    printf( "\nQueue delay: %s\nTimer resolution: %s\n", tracy::TimeToString( worker.GetDelay() ), tracy::TimeToString( worker.GetResolution() ) );
+    int result = WaitForConnection( worker );
+    if( result != 0 ) return result;
+    printf( "\nTimer resolution: %s\n", tracy::TimeToString( worker.GetResolution() ) );

 #ifdef _WIN32
    signal( SIGINT, SigInt );
@@ -197,59 +129,18 @@ int main( int argc, char** argv )
 #endif

    const auto firstTime = worker.GetFirstTime();
-    auto& lock = worker.GetMbpsDataLock();

    const auto t0 = std::chrono::high_resolution_clock::now();
    while( worker.IsConnected() )
    {
-        // Relaxed order is sufficient here because `s_disconnect` is only ever
-        // set by this thread or by the SigInt handler, and that handler does
-        // nothing else than storing `s_disconnect`.
        if( s_disconnect.load( std::memory_order_relaxed ) )
        {
            worker.Disconnect();
-            // Relaxed order is sufficient because only this thread ever reads
-            // this value.
            s_disconnect.store(false, std::memory_order_relaxed );
            break;
        }

-        lock.lock();
-        const auto mbps = worker.GetMbpsData().back();
-        const auto compRatio = worker.GetCompRatio();
-        const auto netTotal = worker.GetDataTransferred();
-        lock.unlock();
-
-        // Output progress info only if destination is a TTY to avoid bloating
-        // log files (so this is not just about usage of ANSI color codes).
-        if( IsStdoutATerminal() )
-        {
-            const char* unit = "Mbps";
-            float unitsPerMbps = 1.f;
-            if( mbps < 0.1f )
-            {
-                unit = "Kbps";
-                unitsPerMbps = 1000.f;
-            }
-            AnsiPrintf( ANSI_ERASE_LINE ANSI_CYAN ANSI_BOLD, "\r%7.2f %s", mbps * unitsPerMbps, unit );
-            printf( " /");
-            AnsiPrintf( ANSI_CYAN ANSI_BOLD, "%5.1f%%", compRatio * 100.f );
-            printf( " =");
-            AnsiPrintf( ANSI_YELLOW ANSI_BOLD, "%7.2f Mbps", mbps / compRatio );
-            printf( " | ");
-            AnsiPrintf( ANSI_YELLOW, "Tx: ");
-            AnsiPrintf( ANSI_GREEN, "%s", tracy::MemSizeToString( netTotal ) );
-            printf( " | ");
-            AnsiPrintf( ANSI_RED ANSI_BOLD, "%s", tracy::MemSizeToString( tracy::memUsage.load( std::memory_order_relaxed ) ) );
-            if( memoryLimit > 0 )
-            {
-                printf( " / " );
-                AnsiPrintf( ANSI_BLUE ANSI_BOLD, "%s", tracy::MemSizeToString( memoryLimit ) );
-            }
-            printf( " | ");
-            AnsiPrintf( ANSI_RED, "%s", tracy::TimeToString( worker.GetLastTime() - firstTime ) );
-            fflush( stdout );
-        }
+        PrintCaptureProgress( worker, firstTime, memoryLimit );

        std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
        if( seconds != -1 )
@@ -257,90 +148,13 @@ int main( int argc, char** argv )
            const auto dur = std::chrono::high_resolution_clock::now() - t0;
            if( std::chrono::duration_cast<std::chrono::seconds>(dur).count() >= seconds )
            {
-                // Relaxed order is sufficient because only this thread ever reads
-                // this value.
                s_disconnect.store(true, std::memory_order_relaxed );
            }
        }
    }
    const auto t1 = std::chrono::high_resolution_clock::now();

-    const auto& failure = worker.GetFailureType();
-    if( failure != tracy::Worker::Failure::None )
-    {
-        AnsiPrintf( ANSI_RED ANSI_BOLD, "\nInstrumentation failure: %s", tracy::Worker::GetFailureString( failure ) );
-        auto& fd = worker.GetFailureData();
-        if( !fd.message.empty() )
-        {
-            printf( "\nContext: %s", fd.message.c_str() );
-        }
-        if( fd.callstack != 0 )
-        {
-            AnsiPrintf( ANSI_BOLD, "\nFailure callstack:\n" );
-            auto& cs = worker.GetCallstack( fd.callstack );
-            int fidx = 0;
-            for( auto& entry : cs )
-            {
-                auto frameData = worker.GetCallstackFrame( entry );
-                if( !frameData )
-                {
-                    printf( "%3i. %p\n", fidx++, (void*)worker.GetCanonicalPointer( entry ) );
-                }
-                else
-                {
-                    const auto fsz = frameData->size;
-                    for( uint8_t f=0; f<fsz; f++ )
-                    {
-                        const auto& frame = frameData->data[f];
-                        auto txt = worker.GetString( frame.name );
-
-                        if( fidx == 0 && f != fsz-1 )
-                        {
-                            auto test = tracy::s_tracyStackFrames;
-                            bool match = false;
-                            do
-                            {
-                                if( strcmp( txt, *test ) == 0 )
-                                {
-                                    match = true;
-                                    break;
-                                }
-                            }
-                            while( *++test );
-                            if( match ) continue;
-                        }
-
-                        if( f == fsz-1 )
-                        {
-                            printf( "%3i. ", fidx++ );
-                        }
-                        else
-                        {
-                            AnsiPrintf( ANSI_BLACK ANSI_BOLD, "inl. " );
-                        }
-                        AnsiPrintf( ANSI_CYAN, "%s  ", txt );
-                        txt = worker.GetString( frame.file );
-                        if( frame.line == 0 )
-                        {
-                            AnsiPrintf( ANSI_YELLOW, "(%s)", txt );
-                        }
-                        else
-                        {
-                            AnsiPrintf( ANSI_YELLOW, "(%s:%" PRIu32 ")", txt, frame.line );
-                        }
-                        if( frameData->imageName.Active() )
-                        {
-                            AnsiPrintf( ANSI_MAGENTA, " %s\n", worker.GetString( frameData->imageName ) );
-                        }
-                        else
-                        {
-                            printf( "\n" );
-                        }
-                    }
-                }
-            }
-        }
-    }
+    PrintWorkerFailure( worker );

    printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nElapsed time: %s\nSaving trace...",
        worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() - firstTime ), tracy::RealToString( worker.GetZoneCount() ),
--- a/capture/src/capturedaemon.cpp
+++ b/capture/src/capturedaemon.cpp
@@ -0,0 +1,438 @@
+#ifdef _WIN32
+#  include <windows.h>
+#else
+#  include <unistd.h>
+#endif
+
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+#include <cstring>
+#include <filesystem>
+#include <map>
+#include <mutex>
+#include <signal.h>
+#include <string>
+#include <thread>
+#include <unordered_set>
+#include <vector>
+
+#include "../../getopt/getopt.h"
+#include "../../public/common/TracySocket.hpp"
+#include "../../public/common/TracyVersion.hpp"
+#include "../../server/TracyBroadcast.hpp"
+#include "../../server/TracyFileWrite.hpp"
+#include "../../server/TracyMemory.hpp"
+#include "../../server/TracyPrint.hpp"
+#include "../../server/TracySysUtil.hpp"
+#include "../../server/TracyWorker.hpp"
+#include "GitRef.hpp"
+
+#include "CaptureOutput.hpp"
+
+static std::atomic<bool> g_shutdown{false};
+static std::mutex g_clientsMutex;
+static uint16_t g_listenPort = 8086;
+static std::string g_filterName;
+static int g_filterPort = 0;
+static int64_t g_memoryLimit = -1;
+
+void SigInt( int )
+{
+    g_shutdown.store( true, std::memory_order_relaxed );
+}
+
+struct ClientStats
+{
+    std::atomic<float> mbps{0};
+    std::atomic<int64_t> txBytes{0};
+    std::atomic<int64_t> memUsage{0};
+    std::atomic<int64_t> firstTime{-1};
+};
+
+struct ClientSession
+{
+    std::string id;
+    std::string programName;
+    std::string address;
+    uint16_t port;
+    std::string outputFile;
+    std::thread thread;
+    std::atomic<bool> active{true};
+    std::atomic<bool> finished{false};
+    ClientStats stats;
+    std::atomic<uint64_t> fileSize{0};
+};
+
+static std::map<std::string, ClientSession*> g_clients;
+static std::unordered_set<std::string> g_outputFiles;
+
+[[noreturn]] void Usage()
+{
+    printf( "tracy-capture-daemon %i.%i.%i / %s\n\n", tracy::Version::Major, tracy::Version::Minor, tracy::Version::Patch, tracy::GitRef );
+    printf( "Usage: tracy-capture-daemon -o <output_dir> [options]\n\n" );
+    printf( "Options:\n" );
+    printf( "  -o, --output <dir>       Output directory (required)\n" );
+    printf( "  -p, --port <port>        UDP listen port (default: 8086)\n" );
+    printf( "  -m, --memory <limit>     Memory limit per client as %% of system RAM\n" );
+    printf( "  --filter-name <pattern>  Only capture clients matching program name\n" );
+    printf( "  --filter-port <port>     Only capture clients with specific data port\n" );
+    printf( "  -h, --help               Show this help\n" );
+    printf( "  -V, --version            Show version information\n" );
+    exit( 1 );
+}
+
+std::string SanitizeName( const std::string& name )
+{
+    std::string result;
+    for( char c : name )
+    {
+        if( ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) || c == '_' || c == '-' )
+        {
+            result += c;
+        }
+        else if( c == ' ' || c == '\t' )
+        {
+            result += '_';
+        }
+    }
+    if( result.empty() ) result = "unknown";
+    return result;
+}
+
+std::string GenerateOutputFilename( const std::string& outputDir, const std::string& programName, const std::string& address, uint16_t port )
+{
+    std::string base = SanitizeName( programName ) + "_" + address + "_" + std::to_string( port );
+    std::string candidate = base + ".tracy";
+    std::string path = outputDir + "/" + candidate;
+    
+    int idx = 0;
+    while( g_outputFiles.count( path ) || std::filesystem::exists( path ) )
+    {
+        idx++;
+        candidate = base + "_" + std::to_string( idx ) + ".tracy";
+        path = outputDir + "/" + candidate;
+    }
+    
+    g_outputFiles.insert( path );
+    return path;
+}
+
+bool MatchesFilters( const tracy::BroadcastMessage& msg )
+{
+    if( !g_filterName.empty() )
+    {
+        if( strstr( msg.programName, g_filterName.c_str() ) == nullptr )
+        {
+            return false;
+        }
+    }
+    if( g_filterPort > 0 && msg.listenPort != g_filterPort )
+    {
+        return false;
+    }
+    return true;
+}
+
+void CaptureThread( ClientSession* session, const std::string& address, uint16_t port, int64_t memoryLimit, const std::string& outputFile )
+{
+    printf( "Connecting to %s:%u...\n", address.c_str(), port );
+    fflush( stdout );
+    
+    tracy::Worker worker( address.c_str(), port, memoryLimit );
+    
+    int result = WaitForConnection( worker );
+    if( result != 0 )
+    {
+        session->active = false;
+        session->finished = true;
+        return;
+    }
+    
+    printf( "Connected to %s (%s:%u)\n", session->programName.c_str(), address.c_str(), port );
+    
+    int64_t firstTime = worker.GetFirstTime();
+    session->stats.firstTime = firstTime;
+    
+    while( session->active && worker.IsConnected() )
+    {
+        auto& lock = worker.GetMbpsDataLock();
+        lock.lock();
+        float mbps = worker.GetMbpsData().back();
+        int64_t txTotal = worker.GetDataTransferred();
+        lock.unlock();
+        
+        session->stats.mbps = mbps;
+        session->stats.txBytes = txTotal;
+        session->stats.memUsage = tracy::memUsage.load( std::memory_order_relaxed );
+        
+        std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
+    }
+    
+    printf( "\nSaving %s...", outputFile.c_str() );
+    fflush( stdout );
+    
+    auto file = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( outputFile.c_str(), tracy::FileCompression::Zstd, 3, 4 ) );
+    if( file )
+    {
+        worker.Write( *file, false );
+        file->Finish();
+        auto stats = file->GetCompressionStatistics();
+        session->fileSize = stats.second;
+        AnsiPrintf( ANSI_GREEN ANSI_BOLD, " done!\n" );
+    }
+    else
+    {
+        AnsiPrintf( ANSI_RED ANSI_BOLD, " failed!\n" );
+    }
+    
+    session->finished = true;
+    session->active = false;
+}
+
+void RefreshDisplay( const std::string& listenAddr )
+{
+    if( !IsTerminal() ) return;
+    
+    printf( "\033[H\033[J" );
+    
+    size_t clientCount = 0;
+    {
+        std::lock_guard<std::mutex> lock( g_clientsMutex );
+        clientCount = g_clients.size();
+    }
+    
+    printf( "[%zu client%s] Listening on %s:%u... Press Ctrl+C to stop\n\n", clientCount, clientCount == 1 ? "" : "s", listenAddr.c_str(), g_listenPort );
+    
+    int idx = 1;
+    float totalMbps = 0;
+    int64_t totalTx = 0;
+    int64_t totalMem = 0;
+    
+    {
+        std::lock_guard<std::mutex> lock( g_clientsMutex );
+        for( auto& [id, session] : g_clients )
+        {
+            printf( "  [%d] %s @ %s:%u    ", idx, session->programName.c_str(), session->address.c_str(), session->port );
+            
+            if( session->finished )
+            {
+                printf( "finished (" );
+                printf( "%s", tracy::MemSizeToString( session->fileSize.load() ) );
+                printf( ")" );
+            }
+            else if( session->active )
+            {
+                float mbps = session->stats.mbps.load();
+                int64_t tx = session->stats.txBytes.load();
+                int64_t mem = session->stats.memUsage.load();
+                int64_t firstTime = session->stats.firstTime.load();
+                
+                printf( "%.1f Mbps | %s | %s", mbps, tracy::MemSizeToString( tx ), tracy::MemSizeToString( mem ) );
+                
+                totalMbps += mbps;
+                totalTx += tx;
+                totalMem += mem;
+            }
+            else
+            {
+                printf( "connecting..." );
+            }
+            printf( "\n" );
+            idx++;
+        }
+    }
+    
+    printf( "\nTotal: %.1f Mbps | %s | Mem: %s", totalMbps, tracy::MemSizeToString( totalTx ), tracy::MemSizeToString( totalMem ) );
+    fflush( stdout );
+}
+
+void PrintSummary()
+{
+    printf( "\n\n=== Capture Summary ===\n" );
+    
+    std::lock_guard<std::mutex> lock( g_clientsMutex );
+    int idx = 1;
+    int64_t totalSize = 0;
+    
+    for( auto& [id, session] : g_clients )
+    {
+        int64_t size = session->fileSize.load();
+        totalSize += size;
+        printf( "  [%d] %s @ %s:%u -> %s (%s)\n", idx++, session->programName.c_str(), session->address.c_str(), session->port, session->outputFile.c_str(), tracy::MemSizeToString( size ) );
+    }
+    
+    printf( "\nTotal: %zu files, %s\n", g_clients.size(), tracy::MemSizeToString( totalSize ) );
+}
+
+int main( int argc, char** argv )
+{
+#ifdef _WIN32
+    if( !AttachConsole( ATTACH_PARENT_PROCESS ) )
+    {
+        AllocConsole();
+        SetConsoleMode( GetStdHandle( STD_OUTPUT_HANDLE ), 0x07 );
+    }
+#endif
+    
+    std::string outputDir;
+    
+    static struct option longOptions[] = {
+        { "output", required_argument, nullptr, 'o' },
+        { "port", required_argument, nullptr, 'p' },
+        { "memory", required_argument, nullptr, 'm' },
+        { "filter-name", required_argument, nullptr, 1 },
+        { "filter-port", required_argument, nullptr, 2 },
+        { "help", no_argument, nullptr, 'h' },
+        { "version", no_argument, nullptr, 'V' },
+        { nullptr, 0, nullptr, 0 }
+    };
+    
+    int c;
+    while( ( c = getopt_long( argc, argv, "o:p:m:hV", longOptions, nullptr ) ) != -1 )
+    {
+        switch( c )
+        {
+        case 'o':
+            outputDir = optarg;
+            break;
+        case 'p':
+            g_listenPort = atoi( optarg );
+            break;
+        case 'm':
+            g_memoryLimit = std::clamp( atoll( optarg ), 1ll, 999ll ) * tracy::GetPhysicalMemorySize() / 100;
+            break;
+        case 1:
+            g_filterName = optarg;
+            break;
+        case 2:
+            g_filterPort = atoi( optarg );
+            break;
+        case 'h':
+            Usage();
+            break;
+        case 'V':
+            printf( "tracy-capture-daemon %i.%i.%i / %s\n", tracy::Version::Major, tracy::Version::Minor, tracy::Version::Patch, tracy::GitRef );
+            exit( 0 );
+        default:
+            Usage();
+            break;
+        }
+    }
+    
+    if( outputDir.empty() )
+    {
+        fprintf( stderr, "Error: Output directory is required (-o)\n\n" );
+        Usage();
+    }
+    
+    std::filesystem::create_directories( outputDir );
+    
+    InitTerminalDetection();
+    
+#ifdef _WIN32
+    signal( SIGINT, SigInt );
+#else
+    struct sigaction sigint, oldsigint;
+    memset( &sigint, 0, sizeof( sigint ) );
+    sigint.sa_handler = SigInt;
+    sigaction( SIGINT, &sigint, &oldsigint );
+#endif
+    
+    tracy::UdpListen udpSocket;
+    if( !udpSocket.Listen( g_listenPort ) )
+    {
+        fprintf( stderr, "Error: Failed to listen on port %u\n", g_listenPort );
+        return 1;
+    }
+    
+    printf( "Listening on 0.0.0.0:%u... Press Ctrl+C to stop\n", g_listenPort );
+    printf( "Output directory: %s\n", outputDir.c_str() );
+    
+    const std::string listenAddr = "0.0.0.0";
+    auto lastDisplay = std::chrono::steady_clock::now();
+    
+    while( !g_shutdown )
+    {
+        tracy::IpAddress clientAddr;
+        size_t len;
+        const char* msg = udpSocket.Read( len, clientAddr, 100 );
+        
+        if( msg )
+        {
+            auto parsed = tracy::ParseBroadcastMessage( msg, len );
+            if( parsed )
+            {
+                std::string clientId = std::to_string( parsed->pid ) + "_" + clientAddr.GetText() + "_" + std::to_string( parsed->listenPort );
+                
+                bool isNew = false;
+                {
+                    std::lock_guard<std::mutex> lock( g_clientsMutex );
+                    isNew = g_clients.find( clientId ) == g_clients.end();
+                }
+                
+                if( isNew && MatchesFilters( *parsed ) )
+                {
+                    std::string addressStr = clientAddr.GetText();
+                    std::string outputFile = GenerateOutputFilename( outputDir, parsed->programName, addressStr, parsed->listenPort );
+                    
+                    auto session = new ClientSession();
+                    session->id = clientId;
+                    session->programName = parsed->programName;
+                    session->address = addressStr;
+                    session->port = parsed->listenPort;
+                    session->outputFile = outputFile;
+                    session->active = true;
+                    
+                    {
+                        std::lock_guard<std::mutex> lock( g_clientsMutex );
+                        g_clients[clientId] = session;
+                    }
+                    
+                    session->thread = std::thread( CaptureThread, session, addressStr, parsed->listenPort, g_memoryLimit, outputFile );
+                }
+            }
+        }
+        
+        auto now = std::chrono::steady_clock::now();
+        if( std::chrono::duration_cast<std::chrono::milliseconds>( now - lastDisplay ).count() >= 100 )
+        {
+            RefreshDisplay( listenAddr );
+            lastDisplay = now;
+        }
+    }
+    
+    printf( "\n\nShutting down... waiting for %zu client(s) to finish\n", g_clients.size() );
+    
+    {
+        std::lock_guard<std::mutex> lock( g_clientsMutex );
+        for( auto& [id, session] : g_clients )
+        {
+            session->active = false;
+        }
+    }
+    
+    {
+        std::lock_guard<std::mutex> lock( g_clientsMutex );
+        for( auto& [id, session] : g_clients )
+        {
+            if( session->thread.joinable() )
+            {
+                session->thread.join();
+            }
+        }
+    }
+    
+    PrintSummary();
+    
+    {
+        std::lock_guard<std::mutex> lock( g_clientsMutex );
+        for( auto& [id, session] : g_clients )
+        {
+            delete session;
+        }
+        g_clients.clear();
+    }
+    
+    return 0;
+}
--- a/cmake/GitRef.cmake
+++ b/cmake/GitRef.cmake
@@ -0,0 +1,37 @@
+function(add_git_ref target)
+    if(NOT DEFINED GIT_REV)
+        set(GIT_REV "HEAD")
+    endif()
+
+    get_property(_git_ref_created GLOBAL PROPERTY _GIT_REF_CREATED)
+    if(NOT _git_ref_created)
+        set_property(GLOBAL PROPERTY _GIT_REF_CREATED TRUE)
+        find_package(Git)
+        set_property(GLOBAL PROPERTY _GIT_FOUND "${Git_FOUND}")
+        if(Git_FOUND)
+            add_custom_target(git-ref
+                COMMAND ${CMAKE_COMMAND} -E echo "#pragma once" > GitRef.hpp.tmp
+                COMMAND ${GIT_EXECUTABLE} -C ${CMAKE_CURRENT_SOURCE_DIR} log -1 "--format=namespace tracy { static inline const char* GitRef = %x22%h%x22; }" ${GIT_REV} >> GitRef.hpp.tmp || echo "namespace tracy { static inline const char* GitRef = \"unknown\"; }" >> GitRef.hpp.tmp
+                COMMAND ${CMAKE_COMMAND} -E copy_if_different GitRef.hpp.tmp GitRef.hpp
+                BYPRODUCTS GitRef.hpp GitRef.hpp.tmp
+                VERBATIM
+            )
+        else()
+            message(WARNING "git not found, using 'unknown' as git ref.")
+            add_custom_command(
+                OUTPUT GitRef.hpp
+                COMMAND ${CMAKE_COMMAND} -E echo "#pragma once" > GitRef.hpp
+                COMMAND ${CMAKE_COMMAND} -E echo "namespace tracy { static inline const char* GitRef = \"unknown\"; }" >> GitRef.hpp
+                VERBATIM
+            )
+        endif()
+    endif()
+
+    target_include_directories(${target} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+    get_property(_git_found GLOBAL PROPERTY _GIT_FOUND)
+    if(_git_found)
+        add_dependencies(${target} git-ref)
+    else()
+        target_sources(${target} PUBLIC GitRef.hpp)
+    endif()
+endfunction()
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -1,8 +1,15 @@
+include(${CMAKE_CURRENT_LIST_DIR}/options.cmake)
+
+set_option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
+set_option(NO_LTO "Disable interprocedural optimization (LTO)" OFF)
+set_option(NO_MOLD_LINKER "Disable mold linker (use default linker)" OFF)
+set_option(NO_CCACHE "Disable ccache acceleration" OFF)
+
 if (NOT NO_ISA_EXTENSIONS)
    include(CheckCXXCompilerFlag)
    if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
        CHECK_CXX_COMPILER_FLAG("-mcpu=native" COMPILER_SUPPORTS_MCPU_NATIVE)
-        if(COMPILER_SUPPORTS_MARCH_NATIVE)
+        if(COMPILER_SUPPORTS_MCPU_NATIVE)
            add_compile_options(-mcpu=native)
        endif()
    else()
@@ -24,24 +31,33 @@ endif()

 if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
    if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "15")
+          message(FATAL_ERROR "Apple Clang 15 or newer is required.")
+        elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "16")
+          # AppleClang 15 has issues with to_chars in <chrono> if target is too old
+          add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-mmacosx-version-min=13.3>)
+        endif()
        add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fexperimental-library>)
    endif()
 endif()

 if(WIN32)
    add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR)
-    add_compile_options(/MP)
+    # /MP is MSVC-specific for multi-processor compilation
+    if(MSVC)
+        add_compile_options(/MP)
+    endif()
 endif()

 if(EMSCRIPTEN)
    add_compile_options(-pthread -DIMGUI_IMPL_OPENGL_ES2)
 endif()

-if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN)
+if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN AND NOT NO_LTO)
    set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
 endif()

-if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT NO_MOLD_LINKER)
    find_program(MOLD_LINKER mold)
    if(MOLD_LINKER)
        set(CMAKE_LINKER_TYPE "MOLD")
@@ -51,10 +67,12 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_SYSTEM_NAME STREQUAL "Linux"
    endif()
 endif()

-find_program(CCACHE ccache)
-if(CCACHE)
-    set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
-    set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) 
+if(NOT NO_CCACHE)
+    find_program(CCACHE ccache)
+    if(CCACHE)
+        set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+        set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+    endif()
 endif()

 file(GENERATE OUTPUT .gitignore CONTENT "*")
--- a/cmake/imgui-loader.patch
+++ b/cmake/imgui-loader.patch
@@ -1,8 +1,8 @@
-diff --git a/backends/imgui_impl_opengl3_loader.h b/backends/imgui_impl_opengl3_loader.h
-index d6ffa5a2d..e48372c64 100644
--- a/backends/imgui_impl_opengl3_loader.h
-+++ b/backends/imgui_impl_opengl3_loader.h
-@@ -179,6 +179,7 @@ typedef khronos_uint8_t GLubyte;
+diff --git i/backends/imgui_impl_opengl3_loader.h w/backends/imgui_impl_opengl3_loader.h
+index 4ca0536..a1ff572 100644
+--- i/backends/imgui_impl_opengl3_loader.h
+++ w/backends/imgui_impl_opengl3_loader.h
+@@ -180,6 +180,7 @@ typedef khronos_uint8_t GLubyte;
 #define GL_VERSION                        0x1F02
 #define GL_EXTENSIONS                     0x1F03
 #define GL_LINEAR                         0x2601
@@ -10,7 +10,7 @@ index d6ffa5a2d..e48372c64 100644
 #define GL_TEXTURE_MAG_FILTER             0x2800
 #define GL_TEXTURE_MIN_FILTER             0x2801
 #define GL_TEXTURE_WRAP_S                 0x2802
-@@ -241,8 +242,10 @@ GLAPI void APIENTRY glGenTextures (GLsizei n, GLuint *textures);
+@@ -244,8 +245,10 @@ GLAPI void APIENTRY glGenTextures (GLsizei n, GLuint *textures);
 #define GL_TEXTURE0                       0x84C0
 #define GL_ACTIVE_TEXTURE                 0x84E0
 typedef void (APIENTRYP PFNGLACTIVETEXTUREPROC) (GLenum texture);
@@ -21,16 +21,16 @@ index d6ffa5a2d..e48372c64 100644
 #endif
 #endif /* GL_VERSION_1_3 */
 #ifndef GL_VERSION_1_4
-@@ -478,7 +481,7 @@ GL3W_API GL3WglProc imgl3wGetProcAddress(const char *proc);
+@@ -481,7 +484,7 @@ GL3W_API GL3WglProc imgl3wGetProcAddress(const char *proc);
 
 /* gl3w internal state */
 union ImGL3WProcs {
-    GL3WglProc ptr[59];
-+    GL3WglProc ptr[60];
+-    GL3WglProc ptr[63];
+    GL3WglProc ptr[64];
     struct {
         PFNGLACTIVETEXTUREPROC            ActiveTexture;
         PFNGLATTACHSHADERPROC             AttachShader;
-@@ -494,6 +497,7 @@ union ImGL3WProcs {
+@@ -497,6 +500,7 @@ union ImGL3WProcs {
         PFNGLCLEARPROC                    Clear;
         PFNGLCLEARCOLORPROC               ClearColor;
         PFNGLCOMPILESHADERPROC            CompileShader;
@@ -38,7 +38,7 @@ index d6ffa5a2d..e48372c64 100644
         PFNGLCREATEPROGRAMPROC            CreateProgram;
         PFNGLCREATESHADERPROC             CreateShader;
         PFNGLDELETEBUFFERSPROC            DeleteBuffers;
-@@ -559,6 +563,7 @@ GL3W_API extern union ImGL3WProcs imgl3wProcs;
+@@ -563,6 +567,7 @@ GL3W_API extern union ImGL3WProcs imgl3wProcs;
 #define glClear                           imgl3wProcs.gl.Clear
 #define glClearColor                      imgl3wProcs.gl.ClearColor
 #define glCompileShader                   imgl3wProcs.gl.CompileShader
@@ -46,7 +46,7 @@ index d6ffa5a2d..e48372c64 100644
 #define glCreateProgram                   imgl3wProcs.gl.CreateProgram
 #define glCreateShader                    imgl3wProcs.gl.CreateShader
 #define glDeleteBuffers                   imgl3wProcs.gl.DeleteBuffers
-@@ -854,6 +859,7 @@ static const char *proc_names[] = {
+@@ -859,6 +864,7 @@ static const char *proc_names[] = {
     "glClear",
     "glClearColor",
     "glCompileShader",
--- a/cmake/imgui-no-samplers.patch
+++ b/cmake/imgui-no-samplers.patch
@@ -0,0 +1,13 @@
+diff --git a/backends/imgui_impl_opengl3.cpp b/backends/imgui_impl_opengl3.cpp
+index a9e32b7ac..2cdbc4812 100644
+--- a/backends/imgui_impl_opengl3.cpp
+++ b/backends/imgui_impl_opengl3.cpp
+@@ -1069,7 +1069,7 @@ bool    ImGui_ImplOpenGL3_Init(const char* glsl_version)
+     bd->HasPolygonMode = (!bd->GlProfileIsES2 && !bd->GlProfileIsES3);
+ #endif
+ #ifdef IMGUI_IMPL_OPENGL_MAY_HAVE_BIND_SAMPLER
+-    bd->HasBindSampler = (bd->GlVersion >= 330 || bd->GlProfileIsES3);
+    //bd->HasBindSampler = (bd->GlVersion >= 330 || bd->GlProfileIsES3);
+ #endif
+     bd->HasClipOrigin = (bd->GlVersion >= 450);
+ #ifdef IMGUI_IMPL_OPENGL_HAS_EXTENSIONS
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -0,0 +1,48 @@
+# Reusable option macros for Tracy CMake projects
+#
+# Usage:
+#   set_option(OPTION_NAME "Help text" ON/OFF [TARGET])     - for boolean options
+#   set_option_value(VAR_NAME "Help text" "value" [TARGET]) - for value options (CACHE STRING)
+#   set_option_value_as_string(VAR_NAME "Help text" "value" [TARGET]) - for value options as C string literals
+#
+# [TARGET] is optional and specifies a target to which the option will 
+# be added as a compile definition (e.g., -DOPTION_NAME or -DVAR_NAME=value).
+
+# Boolean option (ON/OFF).
+macro(set_option option help value)
+    option(${option} ${help} ${value})
+    if(${option})
+        message(STATUS "${option}: ON")
+        if(${ARGC} GREATER 3)
+            target_compile_definitions(${ARGV3} PUBLIC ${option})
+        endif()
+    else()
+        message(STATUS "${option}: OFF")
+    endif()
+endmacro()
+
+# Value option (string/number).
+macro(set_option_value var help default)
+    set(${var} ${default} CACHE STRING "${help}")
+    if(${var})
+        message(STATUS "${var}: ${${var}}")
+        if(${ARGC} GREATER 3)
+            target_compile_definitions(${ARGV3} PUBLIC ${var}=${${var}})
+        endif()
+    else()
+        message(STATUS "${var}: (not set)")
+    endif()
+endmacro()
+
+# Value option embedded as a C string literal (VAR="value").
+macro(set_option_value_as_string var help default)
+    set(${var} ${default} CACHE STRING "${help}")
+    if(${var})
+        message(STATUS "${var}: ${${var}}")
+        if(${ARGC} GREATER 3)
+            target_compile_definitions(${ARGV3} PUBLIC "${var}=\"${${var}}\"")
+        endif()
+    else()
+        message(STATUS "${var}: (not set)")
+    endif()
+endmacro()
--- a/cmake/ppqsort-semaphore.patch
+++ b/cmake/ppqsort-semaphore.patch
@@ -0,0 +1,14 @@
+diff --git i/include/ppqsort/parallel/cpp/thread_pool.h w/include/ppqsort/parallel/cpp/thread_pool.h
+--- i/include/ppqsort/parallel/cpp/thread_pool.h
+++ w/include/ppqsort/parallel/cpp/thread_pool.h
+@@ -134,7 +134,9 @@ namespace ppqsort::impl::cpp {
+             alignas(parameters::cacheline_size) std::atomic<std::size_t> pending_tasks_{0};
+             alignas(parameters::cacheline_size) std::atomic<std::size_t> total_tasks_{0};
+             alignas(parameters::cacheline_size) std::atomic<bool> to_stop_{false};
+-            std::binary_semaphore threads_done_semaphore_{0};   // used to wait for all tasks to finish
+            // counting_semaphore: multiple workers may concurrently observe total_tasks_ == 0
+            // and call release(); a binary_semaphore would assert when the count exceeds 1.
+            std::counting_semaphore<> threads_done_semaphore_{0};
+             std::mutex mtx_priority_;
+             bool stopped = false;
+     };
--- a/cmake/server.cmake
+++ b/cmake/server.cmake
@@ -14,6 +14,7 @@ list(TRANSFORM TRACY_COMMON_SOURCES PREPEND "${TRACY_COMMON_DIR}/")
 set(TRACY_SERVER_DIR ${CMAKE_CURRENT_LIST_DIR}/../server)

 set(TRACY_SERVER_SOURCES
+    TracyBroadcast.cpp
    TracyMemory.cpp
    TracyMmap.cpp
    TracyPrint.cpp
--- a/cmake/tidy-cmake.patch
+++ b/cmake/tidy-cmake.patch
@@ -0,0 +1,39 @@
+diff --git i/CMakeLists.txt w/CMakeLists.txt
+index 8efec25..c1d101e 100644
+--- i/CMakeLists.txt
+++ w/CMakeLists.txt
+@@ -17,7 +17,7 @@
+ # @date    Consult git log.
+ ##############################################################################
+ 
+-cmake_minimum_required (VERSION 2.8.12)
+cmake_minimum_required (VERSION 3.10)
+ 
+ set(LIB_NAME tidy)
+ set(LIBTIDY_DESCRIPTION "${LIB_NAME} - HTML syntax checker")
+@@ -528,6 +528,7 @@ if (UNIX AND SUPPORT_CONSOLE_APP)
+  
+         # Run the built EXE to generate xml output .
+         add_custom_command(
+            POST_BUILD
+             TARGET man
+             COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} -xml-help > ${TIDYHELP}
+             COMMENT "Generate ${TIDYHELP}"
+@@ -536,6 +537,7 @@ if (UNIX AND SUPPORT_CONSOLE_APP)
+ 
+         # Run the built EXE to generate more xml output.
+         add_custom_command(
+            POST_BUILD
+             TARGET man
+             COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} -xml-config > ${TIDYCONFIG}
+             COMMENT "Generate ${TIDYCONFIG}"
+@@ -544,8 +546,8 @@ if (UNIX AND SUPPORT_CONSOLE_APP)
+ 
+         # Run xsltproc to generate the install files.
+         add_custom_command(
+            POST_BUILD
+             TARGET man
+-            DEPENDS ${TIDYHELP}
+             COMMAND xsltproc ARGS ${TIDY1XSL} ${TIDYHELP} > ${CMAKE_CURRENT_BINARY_DIR}/${TIDY_MANFILE}
+             COMMENT "Generate ${TIDY_MANFILE}"
+             VERBATIM
--- a/cmake/vendor.cmake
+++ b/cmake/vendor.cmake
@@ -11,6 +11,8 @@ include(${CMAKE_CURRENT_LIST_DIR}/CPM.cmake)
 option(DOWNLOAD_CAPSTONE "Force download capstone" ON)
 option(DOWNLOAD_GLFW "Force download glfw" OFF)
 option(DOWNLOAD_FREETYPE "Force download freetype" OFF)
+option(DOWNLOAD_LIBCURL "Force download libcURL" OFF)
+option(DOWNLOAD_PUGIXML "Force download pugixml" OFF)

 # capstone

@@ -24,10 +26,11 @@ else()
    CPMAddPackage(
        NAME capstone
        GITHUB_REPOSITORY capstone-engine/capstone
-        GIT_TAG 6.0.0-Alpha1
+        GIT_TAG 6.0.0-Alpha9
        OPTIONS
            "CAPSTONE_X86_ATT_DISABLE ON"
            "CAPSTONE_ALPHA_SUPPORT OFF"
+            "CAPSTONE_ARC_SUPPORT OFF"
            "CAPSTONE_HPPA_SUPPORT OFF"
            "CAPSTONE_LOONGARCH_SUPPORT OFF"
            "CAPSTONE_M680X_SUPPORT OFF"
@@ -52,7 +55,7 @@ else()
    )
    add_library(TracyCapstone INTERFACE)
    target_include_directories(TracyCapstone INTERFACE ${capstone_SOURCE_DIR}/include/capstone)
-    target_link_libraries(TracyCapstone INTERFACE capstone)
+    target_link_libraries(TracyCapstone INTERFACE capstone_static)
 endif()

 # GLFW
@@ -91,7 +94,7 @@ else()
    CPMAddPackage(
        NAME freetype
        GITHUB_REPOSITORY freetype/freetype
-        GIT_TAG VER-2-13-3
+        GIT_TAG VER-2-14-3
        OPTIONS
            "FT_DISABLE_HARFBUZZ ON"
            "FT_WITH_HARFBUZZ OFF"
@@ -134,11 +137,12 @@ target_include_directories(TracyGetOpt PUBLIC ${GETOPT_DIR})
 CPMAddPackage(
    NAME ImGui
    GITHUB_REPOSITORY ocornut/imgui
-    GIT_TAG v1.91.9b-docking
+    GIT_TAG v1.92.8-docking
    DOWNLOAD_ONLY TRUE
    PATCHES
        "${CMAKE_CURRENT_LIST_DIR}/imgui-emscripten.patch"
        "${CMAKE_CURRENT_LIST_DIR}/imgui-loader.patch"
+        "${CMAKE_CURRENT_LIST_DIR}/imgui-no-samplers.patch"
 )

 set(IMGUI_SOURCES
@@ -157,9 +161,16 @@ add_library(TracyImGui STATIC EXCLUDE_FROM_ALL ${IMGUI_SOURCES})
 target_include_directories(TracyImGui PUBLIC ${ImGui_SOURCE_DIR})
 target_link_libraries(TracyImGui PUBLIC TracyFreetype)
 target_compile_definitions(TracyImGui PRIVATE "IMGUI_ENABLE_FREETYPE")
+target_compile_definitions(TracyImGui PUBLIC "IMGUI_USE_WCHAR32")
+#target_compile_definitions(TracyImGui PUBLIC "IMGUI_DISABLE_OBSOLETE_FUNCTIONS")
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND LEGACY)
+    find_package(X11 REQUIRED)
+    target_link_libraries(TracyImGui PUBLIC ${X11_LIBRARIES})
+endif()

 if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
-    target_compile_definitions(TracyImGui PRIVATE "IMGUI_DISABLE_DEBUG_TOOLS")
+    target_compile_definitions(TracyImGui PRIVATE "IMGUI_DISABLE_DEBUG_TOOLS" "IMGUI_DISABLE_DEMO_WINDOWS")
 endif()

 # NFD
@@ -174,7 +185,7 @@ if(NOT NO_FILESELECTOR AND NOT EMSCRIPTEN)
    CPMAddPackage(
        NAME nfd
        GITHUB_REPOSITORY btzy/nativefiledialog-extended
-        GIT_TAG v1.2.1
+        GIT_TAG v1.3.0
        EXCLUDE_FROM_ALL TRUE
        OPTIONS
            "NFD_PORTAL ${NFD_PORTAL}"
@@ -186,8 +197,111 @@ endif()
 CPMAddPackage(
    NAME PPQSort
    GITHUB_REPOSITORY GabTux/PPQSort
-    VERSION 1.0.5
+    VERSION 1.0.6
    PATCHES
        "${CMAKE_CURRENT_LIST_DIR}/ppqsort-nodebug.patch"
+        "${CMAKE_CURRENT_LIST_DIR}/ppqsort-semaphore.patch"
    EXCLUDE_FROM_ALL TRUE
 )
+
+# json
+
+CPMAddPackage(
+    NAME json
+    GITHUB_REPOSITORY nlohmann/json
+    GIT_TAG v3.12.0
+    EXCLUDE_FROM_ALL TRUE
+)
+
+# md4c
+
+CPMAddPackage(
+    NAME md4c
+    GITHUB_REPOSITORY mity/md4c
+    GIT_TAG 755ce49acdc7cd682d4502b4796db5ed6a1230fb
+    OPTIONS
+        "BUILD_SHARED_LIBS OFF"
+    EXCLUDE_FROM_ALL TRUE
+)
+
+if(NOT EMSCRIPTEN)
+
+    # base64
+
+    set(BUILD_SHARED_LIBS_SAVE ${BUILD_SHARED_LIBS})
+    set(BUILD_SHARED_LIBS OFF)
+    CPMAddPackage(
+        NAME base64
+        GITHUB_REPOSITORY aklomp/base64
+        GIT_TAG v0.5.2
+        OPTIONS
+            "BASE64_BUILD_CLI OFF"
+            "BASE64_WITH_OpenMP OFF"
+        EXCLUDE_FROM_ALL TRUE
+    )
+    set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_SAVE})
+
+    # tidy
+
+    CPMAddPackage(
+        NAME tidy
+        GITHUB_REPOSITORY htacg/tidy-html5
+        GIT_TAG 5.8.0
+        PATCHES
+            "${CMAKE_CURRENT_LIST_DIR}/tidy-cmake.patch"
+        EXCLUDE_FROM_ALL TRUE
+    )
+
+    # usearch
+
+    CPMAddPackage(
+        NAME usearch
+        GITHUB_REPOSITORY unum-cloud/usearch
+        GIT_TAG v2.25.2
+        EXCLUDE_FROM_ALL TRUE
+    )
+
+    # pugixml
+
+    pkg_check_modules(PUGIXML pugixml)
+    if (PUGIXML_FOUND AND NOT DOWNLOAD_PUGIXML)
+        add_library(TracyPugixml INTERFACE)
+        target_include_directories(TracyPugixml INTERFACE ${PUGIXML_INCLUDE_DIRS})
+        target_link_libraries(TracyPugixml INTERFACE ${PUGIXML_LINK_LIBRARIES})
+    else()
+        CPMAddPackage(
+            NAME pugixml
+            GITHUB_REPOSITORY zeux/pugixml
+            GIT_TAG v1.16
+            EXCLUDE_FROM_ALL TRUE
+        )
+        add_library(TracyPugixml INTERFACE)
+        target_link_libraries(TracyPugixml INTERFACE pugixml)
+    endif()
+
+    # libcurl
+
+    pkg_check_modules(LIBCURL libcurl>=7.87.0)
+    if (LIBCURL_FOUND AND NOT DOWNLOAD_LIBCURL)
+        add_library(TracyLibcurl INTERFACE)
+        target_include_directories(TracyLibcurl INTERFACE ${LIBCURL_INCLUDE_DIRS})
+        target_link_libraries(TracyLibcurl INTERFACE ${LIBCURL_LINK_LIBRARIES})
+    else()
+        CPMAddPackage(
+            NAME libcurl
+            GITHUB_REPOSITORY curl/curl
+            GIT_TAG curl-8_20_0
+            OPTIONS
+                "BUILD_STATIC_LIBS ON"
+                "BUILD_SHARED_LIBS OFF"
+                "HTTP_ONLY ON"
+                "CURL_ZSTD OFF"
+                "CURL_USE_LIBPSL OFF"
+            EXCLUDE_FROM_ALL TRUE
+        )
+        add_library(TracyLibcurl INTERFACE)
+        target_link_libraries(TracyLibcurl INTERFACE libcurl_static)
+        target_include_directories(TracyLibcurl INTERFACE ${libcurl_SOURCE_DIR}/include)
+    endif()
+
+endif()
--- a/csvexport/CMakeLists.txt
+++ b/csvexport/CMakeLists.txt
@@ -1,7 +1,5 @@
 cmake_minimum_required(VERSION 3.16)

-option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
-
 set(NO_STATISTICS OFF)

 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
@@ -17,12 +15,14 @@ project(
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/GitRef.cmake)

 set(PROGRAM_FILES
    src/csvexport.cpp
 )

 add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
+add_git_ref(${PROJECT_NAME})
 target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
 set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})

--- a/csvexport/src/csvexport.cpp
+++ b/csvexport/src/csvexport.cpp
@@ -16,22 +16,27 @@
 #include "../../server/TracyFileRead.hpp"
 #include "../../server/TracyWorker.hpp"
 #include "../../getopt/getopt.h"
+#include "../../public/common/TracyVersion.hpp"
+#include "GitRef.hpp"

 void print_usage_exit(int e)
 {
+    fprintf(stderr, "tracy-csvexport %i.%i.%i / %s\n\n", tracy::Version::Major, tracy::Version::Minor, tracy::Version::Patch, tracy::GitRef);
    fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
    fprintf(stderr, "Usage:\n");
    fprintf(stderr, "  extract [OPTION...] <trace file>\n");
    fprintf(stderr, "\n");
-    fprintf(stderr, "  -h, --help        Print usage\n");
-    fprintf(stderr, "  -f, --filter arg  Filter zone names (default: "")\n");
-    fprintf(stderr, "  -s, --sep arg     CSV separator (default: ,)\n");
-    fprintf(stderr, "  -c, --case        Case sensitive filtering\n");
-    fprintf(stderr, "  -e, --self        Get self times\n");
-    fprintf(stderr, "  -u, --unwrap      Report each cpu zone event\n");
-    fprintf(stderr, "  -g, --gpu         Report each gpu zone event\n" );
-    fprintf(stderr, "  -m, --messages    Report only messages\n");
-    fprintf(stderr, "  -p, --plot        Report plot data (only with -u)\n");
+    fprintf(stderr, "  -h, --help               Print usage\n");
+    fprintf(stderr, "  -V, --version            Show version information\n");
+    fprintf(stderr, "  -f, --filter arg         Filter zone names (default: "")\n");
+    fprintf(stderr, "  -s, --sep arg            CSV separator (default: ,)\n");
+    fprintf(stderr, "  -c, --case               Case sensitive filtering\n");
+    fprintf(stderr, "  -e, --self               Get self times\n");
+    fprintf(stderr, "  -u, --unwrap             Report each cpu zone event\n");
+    fprintf(stderr, "  -g, --gpu                Report each gpu zone event\n" );
+    fprintf(stderr, "  -m, --messages           Report only messages\n");
+    fprintf(stderr, "  -p, --plot               Report plot data (only with -u)\n");
+    fprintf(stderr, "  -t, --truncated_mean arg Report truncated mean (arg is the percentile. Default is 90)\n");

    exit(e);
 }
@@ -46,6 +51,7 @@ struct Args {
    bool show_gpu;
    bool unwrapMessages;
    bool plot;
+    int truncated_mean_percentile;
 };

 Args parse_args(int argc, char** argv)
@@ -55,10 +61,11 @@ Args parse_args(int argc, char** argv)
        print_usage_exit(1);
    }

-    Args args = { "", ",", "", false, false, false, false, false, false };
+    Args args = { "", ",", "", false, false, false, false, false, false, 0};

    struct option long_opts[] = {
        { "help", no_argument, NULL, 'h' },
+        { "version", no_argument, NULL, 'V' },
        { "filter", optional_argument, NULL, 'f' },
        { "sep", optional_argument, NULL, 's' },
        { "case", no_argument, NULL, 'c' },
@@ -67,17 +74,21 @@ Args parse_args(int argc, char** argv)
        { "gpu", no_argument, NULL, 'g' },
        { "messages", no_argument, NULL, 'm' },
        { "plot", no_argument, NULL, 'p' },
+        { "truncated_mean", optional_argument, NULL, 't' },
        { NULL, 0, NULL, 0 }
    };

    int c;
-    while ((c = getopt_long(argc, argv, "hf:s:ceugmp", long_opts, NULL)) != -1)
+    while ((c = getopt_long(argc, argv, "hf:s:ceugmpV", long_opts, NULL)) != -1)
    {
        switch (c)
        {
        case 'h':
            print_usage_exit(0);
            break;
+        case 'V':
+            printf( "tracy-csvexport %i.%i.%i / %s\n", tracy::Version::Major, tracy::Version::Minor, tracy::Version::Patch, tracy::GitRef );
+            exit( 0 );
        case 'f':
            args.filter = optarg;
            break;
@@ -102,6 +113,9 @@ Args parse_args(int argc, char** argv)
        case 'p':
            args.plot = true;
            break;
+        case 't':
+            args.truncated_mean_percentile = std::clamp<int>(optarg ? std::atoi(optarg) : 90, 1, 99);
+            break;
        default:
            print_usage_exit(1);
            break;
@@ -163,6 +177,53 @@ std::string join(const T& v, const char* sep) {
    return s.str();
 }

+// Returns {pN, truncated_mean}
+std::pair<int64_t, int64_t> percentile_and_truncated_mean(std::vector<int64_t>& data, const double p)
+{
+    assert(p >= 0.0 && p <= 1.0);
+
+    if (data.empty()) {
+        return {0, 0};
+    }
+
+    std::sort(data.begin(), data.end());
+
+    const std::size_t n = data.size();
+    const double idx = p * (static_cast<double>(n) - 1.0);
+    const std::size_t idxLow = static_cast<std::size_t>(std::floor(idx));
+    const std::size_t idxHigh = std::min(idxLow + 1, n - 1);
+    const double frac = idx - static_cast<double>(idxLow);
+
+    const double low = static_cast<double>(data[idxLow]);
+    const double high = static_cast<double>(data[idxHigh]);
+
+    // percentile value
+    const double pval_double = low + (high - low) * frac;
+    const int64_t pval_int = static_cast<int64_t>(std::llround(pval_double));
+
+    // Compute truncated mean: average of all values <= pval_double
+    int64_t sum = 0;
+    std::size_t count = 0;
+    for (std::size_t i = 0; i < n; ++i) {
+        if (static_cast<double>(data[i]) <= pval_double) {
+            sum += data[i];
+            ++count;
+        } else {
+            break; // sorted, so we can stop once we hit > pval_double
+        }
+    }
+
+    if (count == 0) {
+        // should not happen for p in [0,1] unless data empty, but keep defensive behaviour
+        return {pval_int, 0};
+    }
+
+    const int64_t truncated_mean = sum / count;
+
+    return {pval_int, truncated_mean};
+}
+
+
 // From TracyView.cpp
 int64_t GetZoneChildTimeFast(
    const tracy::Worker& worker,
@@ -353,6 +414,12 @@ int main(int argc, char** argv)
            "name", "src_file", "src_line", "total_ns", "total_perc",
            "counts", "mean_ns", "min_ns", "max_ns", "std_ns"
        };
+
+        if(args.truncated_mean_percentile)
+        {
+            columns.push_back("percentile_ns");
+            columns.push_back("truncated_mean_ns");
+        }
    }
    std::string header = join(columns, args.separator);
    printf("%s\n", header.data());
@@ -404,10 +471,11 @@ int main(int argc, char** argv)
            values[3] = std::to_string(time);
            values[4] = std::to_string(100. * time / last_time);

-            values[5] = std::to_string(zone_data.zones.size());
+            const auto sz = zone_data.zones.size();
+            values[5] = std::to_string(sz);
+
+            const auto avg = time / sz;

-            const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
-                / zone_data.zones.size();
            values[6] = std::to_string(avg);

            const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
@@ -415,7 +483,6 @@ int main(int argc, char** argv)
            values[7] = std::to_string(tmin);
            values[8] = std::to_string(tmax);

-            const auto sz = zone_data.zones.size();
            const auto ss = zone_data.sumSq
                - 2. * zone_data.total * avg
                + avg * avg * sz;
@@ -424,6 +491,24 @@ int main(int argc, char** argv)
                std = sqrt(ss / (sz - 1));
            values[9] = std::to_string(std);

+            if(args.truncated_mean_percentile)
+            {
+                std::vector<int64_t> samples;
+                samples.reserve( zone_data.zones.size() );
+                for(const auto& zone_thread_data : zone_data.zones)
+                {
+                    const auto zone_event = zone_thread_data.Zone();
+                    auto timespan = zone_event->End() - zone_event->Start();
+                    if(args.self_time)
+                        timespan -= GetZoneChildTimeFast( worker, *zone_event );
+                    samples.push_back( timespan );
+                }
+
+                std::pair<int64_t, int64_t> pN = percentile_and_truncated_mean(samples, args.truncated_mean_percentile / 100.0);
+                values[10] = std::to_string(pN.first);
+                values[11] = std::to_string(pN.second);
+            }
+
            std::string row = join(values, args.separator);
            printf("%s\n", row.data());
        }
--- a/examples/CustomPlatform/CustomPlatform.cpp
+++ b/examples/CustomPlatform/CustomPlatform.cpp
@@ -0,0 +1,57 @@
+// Template implementations of the tracy::Platform* hooks. Pair with the
+// platform header (see CustomPlatform.h) and link this into your final
+// binary.
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "CustomPlatform.h"
+
+namespace tracy
+{
+
+uint32_t PlatformGetThreadId()
+{
+    return 0;
+}
+
+void PlatformGetHostname( char* buf, size_t size )
+{
+    const char* placeholder = "(?)";
+    if( size == 0 ) return;
+    const size_t n = strlen( placeholder );
+    const size_t copy = n < size - 1 ? n : size - 1;
+    memcpy( buf, placeholder, copy );
+    buf[copy] = '\0';
+}
+
+const char* PlatformGetUserLogin()
+{
+    return "(?)";
+}
+
+const char* PlatformGetUserFullName()
+{
+    return nullptr;
+}
+
+bool PlatformSafeMemcpy( void* dst, const void* src, size_t size )
+{
+    // Stub: report failure so Tracy skips the snapshot. Real impls use SEH
+    // on Win32, pipe(2) on POSIX, or an equivalent probe-and-copy primitive.
+    (void)dst; (void)src; (void)size;
+    return false;
+}
+
+// Stubs forward to the C runtime. Swap in the allocator you actually want.
+
+void* PlatformMalloc( size_t size )                { return malloc( size ); }
+void  PlatformFree( void* ptr )                    { free( ptr ); }
+void* PlatformRealloc( void* ptr, size_t size )    { return realloc( ptr, size ); }
+
+void PlatformAllocatorInit()         {}
+void PlatformAllocatorThreadInit()   {}
+void PlatformAllocatorFinalize()     {}
+void PlatformAllocatorThreadFinalize(){}
+
+}
--- a/examples/CustomPlatform/CustomPlatform.h
+++ b/examples/CustomPlatform/CustomPlatform.h
@@ -0,0 +1,73 @@
+// Template platform header for unsupported targets.
+//
+// Copy into your project, fill in the sections you need, and point Tracy at
+// it via -DTRACY_PLATFORM_HEADER="\"my_platform.h\"". Provide the
+// implementations in any TU linked into your final binary (see
+// CustomPlatform.cpp).
+//
+// Use this only for the TRACY_HAS_CUSTOM_* hooks and matching Platform*
+// declarations — don't set unrelated TRACY_* options here. Some are checked
+// before this header is included, so the result would depend on which TU
+// consulted them; set those at the build system level instead.
+//
+// For platform-specific features without a custom hook (call stacks,
+// context switches, crash handling, system tracing, etc.), disable them at
+// the build system level with the matching TRACY_NO_* macro.
+
+#ifndef __MY_TRACY_PLATFORM_H__
+#define __MY_TRACY_PLATFORM_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace tracy
+{
+
+// --- Thread id --------------------------------------------------------------
+//
+// Required if defaults in TracySystem.cpp  do not matches your platform.
+// Note pthread_self() is NOT suitable, it returns a library handle, not a kernel id.
+//#define TRACY_HAS_CUSTOM_THREAD_ID
+uint32_t PlatformGetThreadId();
+
+
+// --- User info --------------------------------------------------------------
+//
+// Identifies the machine and user in the trace header. Return placeholder
+// strings (e.g. "(?)") from any of these if your platform has no equivalent
+// notion.
+//#define TRACY_HAS_CUSTOM_USER_INFO
+void        PlatformGetHostname( char* buf, size_t size );
+const char* PlatformGetUserLogin();
+const char* PlatformGetUserFullName();
+
+
+// --- Safe memory copy -------------------------------------------------------
+//
+// Tracy uses this to snapshot potentially-unmapped memory during sampling.
+// Must not crash on unreadable input — return false instead. Plain memcpy()
+// is NOT a valid implementation.
+//#define TRACY_HAS_CUSTOM_SAFE_COPY
+bool PlatformSafeMemcpy( void* dst, const void* src, size_t size );
+
+
+// --- Allocator --------------------------------------------------------------
+//
+// Replaces Tracy's internal allocator. Drop in the system allocator, an
+// in-house one, or any third-party allocator you like. Malloc/Free/Realloc
+// must be thread-safe; ThreadInit is an optional prime, not a precondition.
+// Finalize must also tear down the calling thread's per-thread state, the
+// way rpmalloc_finalize() does — Tracy does not call ThreadFinalize for the
+// shutdown thread before Finalize.
+//#define TRACY_HAS_CUSTOM_ALLOCATOR
+void* PlatformMalloc( size_t size );
+void  PlatformFree( void* ptr );
+void* PlatformRealloc( void* ptr, size_t size );
+void  PlatformAllocatorInit();
+void  PlatformAllocatorThreadInit();
+void  PlatformAllocatorFinalize();
+void  PlatformAllocatorThreadFinalize();
+
+}
+
+#endif
--- a/examples/ToyPathTracer/Source/MathSimd.h
+++ b/examples/ToyPathTracer/Source/MathSimd.h
@@ -8,7 +8,7 @@

 #define kSimdWidth 4

-#if !defined(__arm__) && !defined(__arm64__) && !defined(__EMSCRIPTEN__)
+#if !defined(__arm__) && !defined(__arm64__) && !defined(__EMSCRIPTEN__) && !defined(_M_ARM64)

 // ---- SSE implementation

@@ -141,8 +141,14 @@ VM_INLINE float hmin(float4 v)
 // Returns a 4-bit code where bit0..bit3 is X..W
 VM_INLINE unsigned mask(float4 v)
 {
+#if defined(_M_ARM64)
+    static const uint32_t values[4] = { 1u, 2u, 4u, 8u };
+    const uint32x4_t movemask = vld1q_u32( values );
+    const uint32x4_t highbit = vdupq_n_u32( 0x80000000u );
+#else
    static const uint32x4_t movemask = { 1, 2, 4, 8 };
    static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+#endif
    uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
    uint32x4_t t1 = vtstq_u32(t0, highbit);
    uint32x4_t t2 = vandq_u32(t1, movemask);
--- a/examples/ToyPathTracer/Source/Maths.h
+++ b/examples/ToyPathTracer/Source/Maths.h
@@ -12,7 +12,7 @@
 #if DO_FLOAT3_WITH_SIMD


-#if !defined(__arm__) && !defined(__arm64__)
+#if !defined(__arm__) && !defined(__arm64__) && !defined(_M_ARM64)

 // ---- SSE implementation

@@ -223,8 +223,14 @@ VM_INLINE float3 cross(float3 a, float3 b)
 // Returns a 3-bit code where bit0..bit2 is X..Z
 VM_INLINE unsigned mask(float3 v)
 {
+#if defined(_M_ARM64)
+    static const uint32_t values[4] = { 1u, 2u, 4u, 8u };
+    const uint32x4_t movemask = vld1q_u32( values );
+    const uint32x4_t highbit = vdupq_n_u32( 0x80000000u );
+#else 
    static const uint32x4_t movemask = { 1, 2, 4, 8 };
    static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+#endif
    uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
    uint32x4_t t1 = vtstq_u32(t0, highbit);
    uint32x4_t t2 = vandq_u32(t1, movemask);
--- a/examples/ToyPathTracer/Source/enkiTS/TaskScheduler.cpp
+++ b/examples/ToyPathTracer/Source/enkiTS/TaskScheduler.cpp
@@ -72,7 +72,11 @@ namespace
 		#if defined _M_IX86  || defined _M_X64
 			#pragma intrinsic(_mm_pause)
 			inline void Pause() { _mm_pause(); }
-		#endif
+        #elif defined(_M_ARM64)
+			inline void Pause() { __yield(); }
+        #else
+            inline void Pause() { /* No ops*/ }
+        #endif
 	#elif defined __i386__ || defined __x86_64__
 		inline void Pause() { __asm__ __volatile__("pause;"); }
 	#else
--- a/examples/cuda/README.md
+++ b/examples/cuda/README.md
--- a/examples/cuda/graph/CMakeLists.txt
+++ b/examples/cuda/graph/CMakeLists.txt
@@ -0,0 +1,39 @@
+cmake_minimum_required(VERSION 3.18)
+project(CUDAGraphDemo LANGUAGES CXX CUDA)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CUDA_STANDARD 17)
+
+if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
+    set(CMAKE_CUDA_ARCHITECTURES native)
+endif()
+
+set(TRACY_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../.."
+    CACHE PATH "Root of the Tracy repository")
+set(TRACY_PUBLIC "${TRACY_PATH}/public")
+
+find_package(CUDAToolkit REQUIRED)
+find_package(Threads REQUIRED)
+
+# cuda-graph-demo.cu embeds Tracy via #include <TracyClient.cpp> (unity build),
+# so no separate TracyClient library is needed — just expose the public headers.
+add_executable(cuda-graph-demo cuda-graph-demo.cu)
+target_include_directories(cuda-graph-demo PRIVATE ${TRACY_PUBLIC})
+target_link_libraries(cuda-graph-demo PRIVATE
+    CUDA::cupti CUDA::cuda_driver Threads::Threads ${CMAKE_DL_LIBS})
+
+# ctest-related integration below
+# to run the binaries via ctest:
+# ctest --test-dir <cmake-build-dir> -R <binary-name> -C <build-config>
+
+enable_testing()
+add_test(NAME cuda-graph-demo COMMAND cuda-graph-demo)
+
+# On Windows, CUPTI's DLL must be on PATH at runtime.
+if(WIN32)
+    set(_cupti_dir "$<TARGET_FILE_DIR:CUDA::cupti>")
+    set_target_properties(cuda-graph-demo PROPERTIES
+        VS_DEBUGGER_ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}")
+    set_tests_properties(cuda-graph-demo PROPERTIES
+        ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}")
+endif()
--- a/examples/cuda/graph/build.sh
+++ b/examples/cuda/graph/build.sh
@@ -0,0 +1,11 @@
+TRACY_PATH=<path-to-tracy>
+CUDA_TOOLKIT_PATH=/usr/local/cuda
+CUDA_CUPTI_PATH=${CUDA_TOOLKIT_PATH}/extras/CUPTI
+
+# pass -v to nvcc for verbose build information
+nvcc -O2 -std=c++17 cuda-graph-demo.cu \
+     -o cuda-graph-demo \
+     -I "${TRACY_PATH}/public" \
+     -I "${CUDA_CUPTI_PATH}/include" -I "${CUDA_TOOLKIT_PATH}/include" \
+     -L "${CUDA_CUPTI_PATH}/lib64"   -L "${CUDA_TOOLKIT_PATH}/lib64" \
+     -lcupti -lcuda
--- a/examples/cuda/graph/cuda-graph-demo.cu
+++ b/examples/cuda/graph/cuda-graph-demo.cu
@@ -0,0 +1,146 @@
+#include <cuda_runtime.h>
+
+// WARN: for simplicity, we enable and "embed" the Tracy client directly into the code
+#define TRACY_ENABLE
+#include <TracyClient.cpp>
+
+#include <tracy/Tracy.hpp>
+#include <tracy/TracyCUDA.hpp>
+
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+
+#define CUDA_CHECK(call)                                                          \
+    do {                                                                          \
+        cudaError_t err__ = (call);                                               \
+        if (err__ != cudaSuccess) {                                               \
+            std::fprintf(stderr, "CUDA error %s at %s:%d: %s\n",                  \
+                         cudaGetErrorName(err__), __FILE__, __LINE__,             \
+                         cudaGetErrorString(err__));                              \
+            std::exit(EXIT_FAILURE);                                              \
+        }                                                                         \
+    } while (0)
+
+__global__ void saxpy(float a, const float* x, float* y, int n)
+{
+    int i = blockIdx.x * blockDim.x + threadIdx.x;
+    if (i < n) y[i] = a * x[i] + y[i];
+}
+
+int main()
+{
+    // CUPTI-backed Tracy context. Auto-captures all CUDA activity from the
+    // point StartProfiling() is called until StopProfiling(). The background
+    // collector thread flushes activity into Tracy; the explicit Collect()
+    // calls below just force a flush at known phase boundaries.
+    auto* cudaCtx = TracyCUDAContext();
+    {
+        constexpr char ctxName[] = "CUDA Graph Demo";
+        TracyCUDAContextName(cudaCtx, ctxName, sizeof(ctxName) - 1);
+    }
+    TracyCUDAStartProfiling(cudaCtx);
+
+    constexpr int N = 1 << 16;              // small N => kernel is short => launch overhead dominates
+    constexpr int KERNELS_PER_GRAPH = 32;   // chain length captured into the graph
+    constexpr int OUTER_ITERS = 2000;       // how many times we replay the chain
+
+    // allocate device buffers
+    float *dX = nullptr, *dY = nullptr;
+    CUDA_CHECK(cudaMalloc(&dX, N * sizeof(float)));
+    CUDA_CHECK(cudaMalloc(&dY, N * sizeof(float)));
+
+    std::vector<float> hX(N, 1.0f);
+    CUDA_CHECK(cudaMemcpy(dX, hX.data(), N * sizeof(float), cudaMemcpyHostToDevice));
+
+    cudaStream_t stream = nullptr;
+    CUDA_CHECK(cudaStreamCreate(&stream));
+
+    const dim3 block(256);
+    const dim3 grid((N + block.x - 1) / block.x);
+
+    cudaEvent_t evStart, evStop;
+    CUDA_CHECK(cudaEventCreate(&evStart));
+    CUDA_CHECK(cudaEventCreate(&evStop));
+
+    // warm-up (so first-launch lazy-init and/or JIT doesn't bias the measurement)
+    saxpy<<<grid, block, 0, stream>>>(0.0f, dX, dY, N);
+    CUDA_CHECK(cudaStreamSynchronize(stream));
+
+    // baseline: launch each kernel directly on the stream
+    float msStream = 0.0f;
+    {
+        ZoneScopedN("stream-launches");
+        CUDA_CHECK(cudaMemsetAsync(dY, 0, N * sizeof(float), stream));
+        CUDA_CHECK(cudaEventRecord(evStart, stream));
+        for (int outer = 0; outer < OUTER_ITERS; ++outer) {
+            for (int k = 0; k < KERNELS_PER_GRAPH; ++k) {
+                saxpy<<<grid, block, 0, stream>>>(1.0e-6f, dX, dY, N);
+            }
+        }
+        CUDA_CHECK(cudaEventRecord(evStop, stream));
+        CUDA_CHECK(cudaEventSynchronize(evStop));
+        CUDA_CHECK(cudaEventElapsedTime(&msStream, evStart, evStop));
+        TracyCUDACollect(cudaCtx);
+    }
+
+    // capture: record the same kernel chain into a graph
+    cudaGraph_t     graph     = nullptr;
+    cudaGraphExec_t graphExec = nullptr;
+    {
+        ZoneScopedN("graph-capture");
+        // cudaStreamCaptureModeRelaxed allows the calling thread to perform
+        // unrelated CUDA work during capture; ThreadLocal is stricter if you need
+        // isolation. Most short, single-stream captures work fine in either mode.
+        CUDA_CHECK(cudaStreamBeginCapture(stream, cudaStreamCaptureModeRelaxed));
+        for (int k = 0; k < KERNELS_PER_GRAPH; ++k) {
+            saxpy<<<grid, block, 0, stream>>>(1.0e-6f, dX, dY, N);
+        }
+        CUDA_CHECK(cudaStreamEndCapture(stream, &graph));
+
+        // Instantiate once -> reusable executable graph.
+        CUDA_CHECK(cudaGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
+
+        // The template graph isn't needed once instantiated.
+        CUDA_CHECK(cudaGraphDestroy(graph));
+    }
+
+    // replay: launch the instantiated graph OUTER_ITERS times
+    float msGraph = 0.0f;
+    {
+        ZoneScopedN("graph-launches");
+        CUDA_CHECK(cudaMemsetAsync(dY, 0, N * sizeof(float), stream));
+        CUDA_CHECK(cudaEventRecord(evStart, stream));
+        for (int outer = 0; outer < OUTER_ITERS; ++outer) {
+            CUDA_CHECK(cudaGraphLaunch(graphExec, stream));
+        }
+        CUDA_CHECK(cudaEventRecord(evStop, stream));
+        CUDA_CHECK(cudaEventSynchronize(evStop));
+        CUDA_CHECK(cudaEventElapsedTime(&msGraph, evStart, evStop));
+        TracyCUDACollect(cudaCtx);
+    }
+
+    // sanity check: y[i] = OUTER_ITERS * KERNELS_PER_GRAPH * 1e-6 * x[i]
+    std::vector<float> hY(N);
+    CUDA_CHECK(cudaMemcpy(hY.data(), dY, N * sizeof(float), cudaMemcpyDeviceToHost));
+    const float expected = float(OUTER_ITERS) * float(KERNELS_PER_GRAPH) * 1.0e-6f;
+
+    std::printf("Stream launches: %8.3f ms  (%d kernels)\n",
+                msStream, OUTER_ITERS * KERNELS_PER_GRAPH);
+    std::printf("Graph  launches: %8.3f ms  (%d graph launches x %d kernels)\n",
+                msGraph, OUTER_ITERS, KERNELS_PER_GRAPH);
+    std::printf("Speedup        : %8.2fx\n", msStream / msGraph);
+    std::printf("hY[0] = %.6e  (expected %.6e)\n", hY[0], expected);
+
+    // shutdown
+    CUDA_CHECK(cudaGraphExecDestroy(graphExec));
+    CUDA_CHECK(cudaEventDestroy(evStart));
+    CUDA_CHECK(cudaEventDestroy(evStop));
+    CUDA_CHECK(cudaStreamDestroy(stream));
+    CUDA_CHECK(cudaFree(dX));
+    CUDA_CHECK(cudaFree(dY));
+
+    TracyCUDAStopProfiling(cudaCtx);
+    TracyCUDAContextDestroy(cudaCtx);
+    return 0;
+}
--- a/examples/dyna/CMakeLists.txt
+++ b/examples/dyna/CMakeLists.txt
@@ -0,0 +1,63 @@
+cmake_minimum_required(VERSION 3.29)
+project(dyna LANGUAGES C CXX)
+
+option(TRACY_ENABLE "Enable Tracy" ON)
+
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+set(CMAKE_COLOR_DIAGNOSTICS ON)
+
+include(cmake/CPM.cmake)
+
+CPMAddPackage(
+  NAME glad
+  VERSION 2.0.8
+  GIT_REPOSITORY https://github.com/Dav1dde/glad.git
+  GIT_TAG glad2
+)
+
+add_subdirectory(${glad_SOURCE_DIR}/cmake ${CMAKE_CURRENT_BINARY_DIR}/glad)
+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../.. client/)
+
+glad_add_library(glad_gl_core_33 STATIC API gl:core=3.3)
+
+find_package(SDL3 REQUIRED)
+find_package(SDL3_image REQUIRED)
+
+add_executable(dyna
+  src/main.cpp
+  src/datapath.cpp
+  src/timer.cpp
+  src/gfx.cpp
+  src/texture.cpp
+  src/entity.cpp
+  src/world.cpp
+  src/map.cpp
+  src/player.cpp
+  src/monster.cpp
+  src/bomb.cpp
+  src/bonus.cpp
+  src/game.cpp
+)
+
+target_link_libraries(dyna
+  PRIVATE
+    glad_gl_core_33
+    SDL3::SDL3
+    SDL3_image::SDL3_image
+    Tracy::TracyClient
+)
+
+target_include_directories(dyna PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+# Mirror the data/ tree next to the executable so the game finds its assets
+# when launched from the build directory (paths are resolved via SDL_GetBasePath).
+add_custom_command(TARGET dyna POST_BUILD
+  COMMAND ${CMAKE_COMMAND} -E copy_directory
+          ${CMAKE_CURRENT_SOURCE_DIR}/data
+          $<TARGET_FILE_DIR:dyna>/data
+  COMMENT "Copying data/ next to dyna executable"
+)
+
+file(GENERATE OUTPUT .gitignore CONTENT "*")
--- a/examples/dyna/LICENSE
+++ b/examples/dyna/LICENSE
@@ -0,0 +1,7 @@
+Dyna.net copyright 2005 by Bartosz Taudul and Ralf Wrześniewski.
+
+This program (including source code and the asset it uses) is NOT licensed
+for any use other than being an example of how to integrate Tracy Profiler.
+
+The license terms written in other parts of this repository DO NOT apply
+here.
--- a/examples/dyna/cmake/CPM.cmake
+++ b/examples/dyna/cmake/CPM.cmake
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: MIT
+#
+# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors
+
+set(CPM_DOWNLOAD_VERSION 0.42.3)
+set(CPM_HASH_SUM "a609e875fd532b067174250f6abbc3dac22fe2d64869783fb1e80bda1625c844")
+
+if(CPM_SOURCE_CACHE)
+  set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+elseif(DEFINED ENV{CPM_SOURCE_CACHE})
+  set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+else()
+  set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
+endif()
+
+# Expand relative path. This is important if the provided path contains a tilde (~)
+get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
+
+file(DOWNLOAD
+     https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
+     ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
+)
+
+include(${CPM_DOWNLOAD_LOCATION})
--- a/examples/dyna/data/gfx/Bomb.png
+++ b/examples/dyna/data/gfx/Bomb.png
--- a/examples/dyna/data/gfx/Player.png
+++ b/examples/dyna/data/gfx/Player.png
--- a/examples/dyna/data/gfx/bonusy.png
+++ b/examples/dyna/data/gfx/bonusy.png
--- a/examples/dyna/data/gfx/crate.png
+++ b/examples/dyna/data/gfx/crate.png
--- a/examples/dyna/data/gfx/menu.png
+++ b/examples/dyna/data/gfx/menu.png
--- a/examples/dyna/data/gfx/monster1.png
+++ b/examples/dyna/data/gfx/monster1.png
--- a/examples/dyna/data/gfx/monster2.png
+++ b/examples/dyna/data/gfx/monster2.png
--- a/examples/dyna/data/gfx/monster3.png
+++ b/examples/dyna/data/gfx/monster3.png
--- a/examples/dyna/data/gfx/portal.png
+++ b/examples/dyna/data/gfx/portal.png
--- a/examples/dyna/data/gfx/sand.png
+++ b/examples/dyna/data/gfx/sand.png
--- a/examples/dyna/data/gfx/wall.png
+++ b/examples/dyna/data/gfx/wall.png
--- a/examples/dyna/data/levels/1
+++ b/examples/dyna/data/levels/1
@@ -0,0 +1,12 @@
+10 1 0 0
+@............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
--- a/examples/dyna/data/levels/2
+++ b/examples/dyna/data/levels/2
@@ -0,0 +1,12 @@
+20 4 0 0
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#@#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
--- a/examples/dyna/data/levels/3
+++ b/examples/dyna/data/levels/3
@@ -0,0 +1,12 @@
+40 3 2 0
+@............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
--- a/examples/dyna/data/levels/4
+++ b/examples/dyna/data/levels/4
@@ -0,0 +1,12 @@
+40 3 3 0
+@............
+.###.#.#.###.
+.............
+.#.#.#.#.#.#.
+.............
+.###.#.#.###.
+.............
+.#.#.#.#.#.#.
+.............
+.###.#.#.###.
+.............
--- a/examples/dyna/data/levels/5
+++ b/examples/dyna/data/levels/5
@@ -0,0 +1,12 @@
+40 2 4 1
+@............
+.###.#.#.###.
+.#.........#.
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.#.........#.
+.###.#.#.###.
+.............
--- a/examples/dyna/data/levels/6
+++ b/examples/dyna/data/levels/6
@@ -0,0 +1,12 @@
+50 2 2 3
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#.
+.....#.#.....
+.#.###.###.#.
+.............
+.#.###.###.#.
+.....#.#.....
+.#.#.#.#.#.#.
+.............
+.#.#.#.#.#.#@
--- a/examples/dyna/data/levels/7
+++ b/examples/dyna/data/levels/7
@@ -0,0 +1,12 @@
+60 3 3 3
+@............
+.#.#.###.#.#.
+.............
+.###.#.#.###.
+.............
+.#.#.###.#.#.
+.............
+.###.#.#.###.
+.............
+.#.#.###.#.#.
+.............
--- a/examples/dyna/data/levels/8
+++ b/examples/dyna/data/levels/8
@@ -0,0 +1,12 @@
+60 5 3 3
+@............
+.#.#.#.#.#.#.
+.............
+.#...#.#...#.
+.............
+.#.#.#.#.#.#.
+.............
+.#...#.#...#.
+.............
+.#.#.#.#.#.#.
+.............
--- a/examples/dyna/data/levels/9
+++ b/examples/dyna/data/levels/9
@@ -0,0 +1,12 @@
+90 5 5 5
+@............
+.#.........#.
+.............
+.............
+.............
+.............
+.............
+.............
+.............
+.#.........#.
+.............
--- a/examples/dyna/data/levels/menu
+++ b/examples/dyna/data/levels/menu
@@ -0,0 +1,12 @@
+30 4 4 4
+@............
+.#.#.#.#.#.#.
+.............
+.#...#.#...#.
+.............
+.#.#.#.#.#.#.
+.............
+.#...#.#...#.
+.............
+.#.#.#.#.#.#.
+.............
--- a/examples/dyna/src/bomb.cpp
+++ b/examples/dyna/src/bomb.cpp
@@ -0,0 +1,143 @@
+#include "bomb.hpp"
+
+#include "gfx.hpp"
+#include "map.hpp"
+#include "texture.hpp"
+#include "timer.hpp"
+#include "world.hpp"
+
+#include <tracy/Tracy.hpp>
+
+namespace dyna
+{
+
+Bomb::Bomb( int x_, int y_ )
+    : x( x_ )
+    , y( y_ )
+    , left( 9 )
+{
+}
+
+void Bomb::draw()
+{
+    ZoneScoped;
+    if( stage == Stage::exploding )
+        return;
+
+    if( stage == Stage::appear )
+    {
+        Textures::bomb_appear.bind( 9 - left );
+    }
+    else
+    {
+        int frame = static_cast<int>( ( time - left ) / static_cast<float>( time ) * 8 );
+        if( Timer::get_timestamp() / 100 % 2 == 0 )
+            frame++;
+        Textures::bomb.bind( frame );
+    }
+
+    Gfx::draw_square( x, y );
+}
+
+void Bomb::tick( World& world )
+{
+    ZoneScoped;
+    delta += Timer::delta;
+
+    while( delta > 10 )
+    {
+        delta -= 10;
+
+        if( stage == Stage::appear )
+        {
+            if( left > 0 )
+            {
+                delta -= 10;   // the fade-in advances at double speed
+                left--;
+            }
+            else
+            {
+                stage = Stage::ticking;
+                left = time;
+            }
+        }
+        else if( left > 0 )
+        {
+            left--;
+        }
+        else if( stage == Stage::ticking )
+        {
+            explode( world );
+        }
+        else
+        {
+            die( world );
+        }
+    }
+}
+
+void Bomb::explode( World& world )
+{
+    ZoneScoped;
+    stage = Stage::exploding;
+    left = 200;
+
+    Map& map = world.map();
+    map.at( x, y ) = Field::explosion( Field::ExplosionType::center );
+
+    struct Dir
+    {
+        int dx, dy;
+        Field::ExplosionType through, tip;
+    };
+    const Dir dirs[4] = {
+        { -1, 0, Field::ExplosionType::horizontal, Field::ExplosionType::left },
+        { 1, 0, Field::ExplosionType::horizontal, Field::ExplosionType::right },
+        { 0, -1, Field::ExplosionType::vertical, Field::ExplosionType::up },
+        { 0, 1, Field::ExplosionType::vertical, Field::ExplosionType::down },
+    };
+
+    for( const Dir& d : dirs )
+    {
+        for( int i = 1; i <= maxrange; i++ )
+        {
+            int tx = x + d.dx * i;
+            int ty = y + d.dy * i;
+
+            if( tx < 0 || tx > map.getx() - 1 || ty < 0 || ty > map.gety() - 1 )
+                break;
+
+            Destruction destr = map.at( tx, ty ).destructible();
+            if( destr == Destruction::none )
+                break;
+
+            etiles.emplace_back( tx, ty );
+
+            if( map.at( tx, ty ).kind == Field::Kind::crate )
+                world.crates_left--;
+
+            if( i == maxrange || destr == Destruction::single )
+            {
+                map.at( tx, ty ) = Field::explosion( d.tip );
+                break;
+            }
+            else
+            {
+                map.at( tx, ty ) = Field::explosion( d.through );
+            }
+        }
+    }
+}
+
+void Bomb::die( World& world )
+{
+    ZoneScoped;
+    dead = true;
+
+    Map& map = world.map();
+    map.at( x, y ) = Field::floor();
+    for( const auto& [tx, ty] : etiles )
+        map.at( tx, ty ) = Field::floor();
+}
+
+}
--- a/examples/dyna/src/bomb.hpp
+++ b/examples/dyna/src/bomb.hpp
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <utility>
+#include <vector>
+
+namespace dyna
+{
+
+class World;
+
+// A bomb on the grid: fades in, counts down, then paints a cross-shaped
+// explosion onto the map and clears it again. Ported from bomb.cs.
+class Bomb
+{
+public:
+    Bomb( int x, int y );
+
+    void draw();
+    void tick( World& world );
+
+    bool is_dead() const { return dead; }
+
+private:
+    void explode( World& world );
+    void die( World& world );
+
+    enum class Stage
+    {
+        appear,
+        ticking,
+        exploding
+    };
+
+    int x, y;                 // grid coordinates
+    Stage stage = Stage::appear;
+    int left;
+    int delta = 0;
+    static constexpr int time = 150;
+    static constexpr int maxrange = 1;
+    std::vector<std::pair<int, int>> etiles;   // tiles to revert to floor
+    bool dead = false;
+};
+
+}
--- a/examples/dyna/src/bonus.cpp
+++ b/examples/dyna/src/bonus.cpp
@@ -0,0 +1,58 @@
+#include "bonus.hpp"
+
+#include "gfx.hpp"
+#include "texture.hpp"
+#include "timer.hpp"
+
+#include <tracy/Tracy.hpp>
+
+namespace dyna
+{
+
+Vortex::Vortex( int gx, int gy )
+{
+    x = gx;   // stored in grid units, drawn via draw_square
+    y = gy;
+    set_action( Action::appear );
+    left = 79;
+}
+
+void Vortex::draw()
+{
+    ZoneScoped;
+    int frame = static_cast<int>( ( Timer::get_timestamp() - action_start ) / 40 );
+
+    switch( action )
+    {
+    case Action::appear:
+        Textures::vortex_appear.bind( frame );
+        break;
+    case Action::wait:
+        Textures::vortex.bind( frame );
+        break;
+    default:
+        break;
+    }
+
+    Gfx::draw_square( x, y );
+}
+
+void Vortex::tick( World& )
+{
+    ZoneScoped;
+    delta += Timer::delta;
+
+    while( delta > 10 )
+    {
+        delta -= 10;
+
+        if( left > 0 )
+            left--;
+        else if( action == Action::appear )
+            set_action( Action::wait );
+    }
+}
+
+void Vortex::die( World& ) {}
+
+}
--- a/examples/dyna/src/bonus.hpp
+++ b/examples/dyna/src/bonus.hpp
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "entity.hpp"
+
+namespace dyna
+{
+
+// The level-exit portal. Unlike the other entities its coordinates are stored in
+// grid units (it draws via draw_square), matching bonus.cs.
+class Vortex : public Entity
+{
+public:
+    Vortex( int gx, int gy );
+
+    void draw() override;
+    void tick( World& world ) override;
+    void die( World& world ) override;
+};
+
+}
--- a/examples/dyna/src/datapath.cpp
+++ b/examples/dyna/src/datapath.cpp
@@ -0,0 +1,25 @@
+#include "datapath.hpp"
+
+#include <SDL3/SDL.h>
+
+#include <tracy/Tracy.hpp>
+
+namespace dyna
+{
+
+std::string data_path( const std::string& rel )
+{
+    ZoneScoped;
+    ZoneText( rel.c_str(), rel.size() );
+
+    // SDL_GetBasePath returns the executable's directory (with a trailing
+    // separator) and is owned by SDL, so cache it for the program's lifetime.
+    static const std::string base = []
+    {
+        const char* p = SDL_GetBasePath();
+        return std::string( p ? p : "" );
+    }();
+    return base + rel;
+}
+
+}
--- a/examples/dyna/src/datapath.hpp
+++ b/examples/dyna/src/datapath.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <string>
+
+namespace dyna
+{
+
+// Resolve a path relative to the directory containing the executable, so the
+// game finds its data files regardless of the current working directory (e.g.
+// when launched from the build tree). The data/ tree is copied next to the
+// binary at build time; see CMakeLists.txt.
+std::string data_path( const std::string& rel );
+
+}
--- a/examples/dyna/src/entity.cpp
+++ b/examples/dyna/src/entity.cpp
@@ -0,0 +1,39 @@
+#include "entity.hpp"
+
+#include "map.hpp"
+#include "timer.hpp"
+
+namespace dyna
+{
+
+void Entity::set_action( Action a )
+{
+    action = a;
+    action_start = Timer::get_timestamp();
+}
+
+bool Entity::can_move( Action a, const Map& map ) const
+{
+    switch( a )
+    {
+    case Action::up:
+        return y > 0 && !map.at( x / 64, y / 64 - 1 ).solid();
+    case Action::down:
+        return y / 64 < map.gety() - 1 && !map.at( x / 64, y / 64 + 1 ).solid();
+    case Action::left:
+        return x > 0 && !map.at( x / 64 - 1, y / 64 ).solid();
+    case Action::right:
+        return x / 64 < map.getx() - 1 && !map.at( x / 64 + 1, y / 64 ).solid();
+    default:
+        return true;
+    }
+}
+
+bool Entity::killed( const Map& map ) const
+{
+    int tx = ( x + 32 ) / 64;
+    int ty = ( y + 32 ) / 64;
+    return map.at( tx, ty ).kind == Field::Kind::explosion;
+}
+
+}
--- a/examples/dyna/src/entity.hpp
+++ b/examples/dyna/src/entity.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <cstdint>
+
+namespace dyna
+{
+
+class Map;
+class World;
+
+// Movement/state verbs shared by the player and monsters. In the C# source this
+// lived as Entity.Action; promoted to namespace scope so Game can refer to it.
+enum class Action
+{
+    wait,
+    up,
+    down,
+    left,
+    right,
+    death,
+    place_bomb,
+    appear
+};
+
+// Base for everything that moves on the grid. Coordinates are in pixels
+// (64 per tile) and laid out top-left origin, matching entity.cs.
+class Entity
+{
+public:
+    virtual ~Entity() = default;
+
+    virtual void set_action( Action a );
+
+    int getx() const { return x; }
+    int gety() const { return y; }
+
+    virtual void draw() = 0;
+    virtual void tick( World& world ) = 0;
+    virtual void die( World& world ) = 0;
+
+protected:
+    bool can_move( Action a, const Map& map ) const;
+    virtual bool killed( const Map& map ) const;
+
+    int x = 0, y = 0;
+    std::int64_t action_start = 0;
+    int delta = 0;
+    Action action = Action::wait;
+    int left = 0;
+};
+
+}
--- a/examples/dyna/src/game.cpp
+++ b/examples/dyna/src/game.cpp
@@ -0,0 +1,210 @@
+#include "game.hpp"
+
+#include "datapath.hpp"
+#include "gfx.hpp"
+#include "map.hpp"
+#include "player.hpp"
+#include "timer.hpp"
+#include "world.hpp"
+
+#include <SDL3/SDL.h>
+#include <tracy/Tracy.hpp>
+
+#include <string>
+
+namespace dyna
+{
+
+namespace Game
+{
+
+namespace
+{
+
+struct TracySection
+{
+    explicit TracySection( const char* name ) { Enter( name ); }
+    ~TracySection() { Leave(); }
+
+    void Enter( const char* name )
+    {
+        idx = TracySectionEnter( "%s", name );
+    }
+
+    void Leave()
+    {
+        if( idx > 0 )
+        {
+            TracySectionLeave( idx );
+            idx = 0;
+        }
+    }
+
+private:
+    uint32_t idx;
+};
+
+SDL_Keycode key = 0;   // most recently pressed movement key
+bool help = false;
+
+// Run one level to completion. Returns true if the player asked to quit the
+// whole application (window close), false if the level simply ended (death,
+// escape, or reaching the exit) and control should return to the caller.
+bool level_loop( World& world )
+{
+    TracySection section( ( std::string( "Level " ) + world.name() ).c_str() );
+
+    Player* p = world.player();
+
+    for( ;; )
+    {
+        SDL_Event ev;
+        while( SDL_PollEvent( &ev ) )
+        {
+            if( ev.type == SDL_EVENT_QUIT )
+                return true;
+
+            if( ev.type == SDL_EVENT_KEY_DOWN && !ev.key.repeat )
+            {
+                switch( ev.key.key )
+                {
+                case SDLK_ESCAPE:
+                    world.killed = true;
+                    return false;
+                case SDLK_LEFT:
+                    key = SDLK_LEFT;
+                    p->move( Action::left );
+                    break;
+                case SDLK_RIGHT:
+                    key = SDLK_RIGHT;
+                    p->move( Action::right );
+                    break;
+                case SDLK_UP:
+                    key = SDLK_UP;
+                    p->move( Action::up );
+                    break;
+                case SDLK_DOWN:
+                    key = SDLK_DOWN;
+                    p->move( Action::down );
+                    break;
+                case SDLK_SPACE:
+                    world.map().place_bomb( ( p->getx() + 32 ) / 64, ( p->gety() + 32 ) / 64 );
+                    break;
+                default:
+                    break;
+                }
+            }
+
+            if( ev.type == SDL_EVENT_KEY_UP )
+            {
+                switch( ev.key.key )
+                {
+                case SDLK_LEFT:
+                    if( key == SDLK_LEFT ) p->move( Action::wait );
+                    break;
+                case SDLK_RIGHT:
+                    if( key == SDLK_RIGHT ) p->move( Action::wait );
+                    break;
+                case SDLK_UP:
+                    if( key == SDLK_UP ) p->move( Action::wait );
+                    break;
+                case SDLK_DOWN:
+                    if( key == SDLK_DOWN ) p->move( Action::wait );
+                    break;
+                default:
+                    break;
+                }
+            }
+        }
+
+        Gfx::clear();
+
+        Timer::tick();
+
+        world.tick();
+        world.draw();
+
+        Gfx::swap();
+
+        if( world.killed || world.next_level )
+            return false;
+    }
+}
+
+// Play through the levels in order. Returns true if the application should quit.
+bool new_game()
+{
+    TracySection section( "In-game" );
+
+    int level = 1;
+
+    for( ;; )
+    {
+        World world( data_path( "data/levels/" + std::to_string( level ) ), true );
+
+        if( level_loop( world ) )
+            return true;   // window closed
+
+        if( world.killed )
+            return false;   // died or escaped to the menu
+        if( ++level >= 10 )
+            return false;   // cleared the last level
+    }
+}
+
+} // namespace
+
+void menu_loop()
+{
+    constexpr const char* sectionName = "Main menu";
+    TracySection section( sectionName );
+
+    World world( data_path( "data/levels/menu" ), false );
+
+    for( ;; )
+    {
+        SDL_Event ev;
+        while( SDL_PollEvent( &ev ) )
+        {
+            if( ev.type == SDL_EVENT_QUIT )
+                return;
+
+            if( ev.type == SDL_EVENT_KEY_DOWN && !ev.key.repeat )
+            {
+                switch( ev.key.key )
+                {
+                case SDLK_ESCAPE:
+                    return;
+                case SDLK_SPACE:
+                    section.Leave();
+                    if( new_game() )
+                        return;   // window closed during play
+                    section.Enter( sectionName );
+                    break;
+                case SDLK_H:
+                    help = !help;
+                    break;
+                default:
+                    break;
+                }
+            }
+        }
+
+        Gfx::clear();
+
+        Timer::tick();
+        world.tick();
+        world.draw();
+
+        if( help )
+            Gfx::show_help();
+        else
+            Gfx::show_menu();
+
+        Gfx::swap();
+    }
+}
+
+} // namespace Game
+
+}
--- a/examples/dyna/src/game.hpp
+++ b/examples/dyna/src/game.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+namespace dyna
+{
+
+// Top-level game flow, ported from game.cs. The C# original kept the running
+// game's state (player, map, win/lose flags) in static fields; that state now
+// lives in a World object owned by the loops below, so nothing leaks out here.
+namespace Game
+{
+void menu_loop();
+}
+
+}
--- a/examples/dyna/src/gfx.cpp
+++ b/examples/dyna/src/gfx.cpp
@@ -0,0 +1,517 @@
+#include "gfx.hpp"
+
+#include "texture.hpp"
+#include "timer.hpp"
+
+#include <SDL3/SDL.h>
+#include <tracy/Tracy.hpp>
+
+#include <cassert>
+#include <cstdio>
+#include <vector>
+
+namespace dyna
+{
+
+namespace
+{
+
+SDL_Window* g_window = nullptr;
+SDL_GLContext g_gl_context = nullptr;
+
+GLuint g_program = 0;
+GLuint g_vao = 0;
+GLuint g_vbo = 0;
+
+// Current draw state, applied to every quad appended to the batch.
+GLuint g_current_tex = 0;
+int g_current_layer = 0;
+float g_alpha = 1.0f;
+
+// One vertex of the streaming batch: screen position, atlas-array texcoord,
+// the array layer to sample and a per-vertex alpha multiplier.
+struct GlVert
+{
+    float px, py, tx, ty, layer, a;
+};
+
+// A run of consecutive vertices that share one texture, drawn in a single call.
+struct DrawCmd
+{
+    GLuint tex;
+    GLsizei count;
+};
+
+std::vector<GlVert> g_verts;
+std::vector<DrawCmd> g_cmds;
+
+const char* VERT_SRC = R"(
+#version 330 core
+uniform mat4 uProjection;
+layout(location = 0) in vec2 aPosition;
+layout(location = 1) in vec2 aTexCoord;
+layout(location = 2) in float aLayer;
+layout(location = 3) in float aAlpha;
+out vec3 vTexCoord;
+out float vAlpha;
+void main() {
+    gl_Position = uProjection * vec4(aPosition, 0.0, 1.0);
+    vTexCoord = vec3(aTexCoord, aLayer);
+    vAlpha = aAlpha;
+}
+)";
+
+const char* FRAG_SRC = R"(
+#version 330 core
+uniform sampler2DArray uTexture;
+in vec3 vTexCoord;
+in float vAlpha;
+out vec4 fragColor;
+void main() {
+    fragColor = texture(uTexture, vTexCoord) * vec4(1.0, 1.0, 1.0, vAlpha);
+}
+)";
+
+GLuint compile_shader( GLenum type, const char* src )
+{
+    ZoneScoped;
+    GLuint s = glCreateShader( type );
+    glShaderSource( s, 1, &src, nullptr );
+    glCompileShader( s );
+    GLint ok = 0;
+    glGetShaderiv( s, GL_COMPILE_STATUS, &ok );
+    if( !ok )
+    {
+        char log[512];
+        glGetShaderInfoLog( s, 512, nullptr, log );
+        std::fprintf( stderr, "Shader compile error: %s\n", log );
+        glDeleteShader( s );
+        return 0;
+    }
+    return s;
+}
+
+bool init_shaders()
+{
+    ZoneScoped;
+    GLuint vs = compile_shader( GL_VERTEX_SHADER, VERT_SRC );
+    if( !vs ) return false;
+    GLuint fs = compile_shader( GL_FRAGMENT_SHADER, FRAG_SRC );
+    if( !fs )
+    {
+        glDeleteShader( vs );
+        return false;
+    }
+
+    g_program = glCreateProgram();
+    glAttachShader( g_program, vs );
+    glAttachShader( g_program, fs );
+    glLinkProgram( g_program );
+    glDeleteShader( vs );
+    glDeleteShader( fs );
+
+    GLint ok = 0;
+    glGetProgramiv( g_program, GL_LINK_STATUS, &ok );
+    if( !ok )
+    {
+        char log[512];
+        glGetProgramInfoLog( g_program, 512, nullptr, log );
+        std::fprintf( stderr, "Program link error: %s\n", log );
+        glDeleteProgram( g_program );
+        g_program = 0;
+        return false;
+    }
+
+    // Bottom-left origin orthographic projection, matching the original
+    // gluOrtho2D(0, w, 0, h) so the ported draw code carries over verbatim.
+    float l = 0.0f, r = static_cast<float>( Gfx::w );
+    float b = 0.0f, t = static_cast<float>( Gfx::h );
+    float proj[16] = {
+        2.0f / ( r - l ), 0.0f, 0.0f, 0.0f,
+        0.0f, 2.0f / ( t - b ), 0.0f, 0.0f,
+        0.0f, 0.0f, -1.0f, 0.0f,
+        -( r + l ) / ( r - l ), -( t + b ) / ( t - b ), 0.0f, 1.0f };
+
+    glUseProgram( g_program );
+    glUniformMatrix4fv( glGetUniformLocation( g_program, "uProjection" ), 1, GL_FALSE, proj );
+    glUniform1i( glGetUniformLocation( g_program, "uTexture" ), 0 );
+    glUseProgram( 0 );
+    return true;
+}
+
+void init_quad_vao()
+{
+    ZoneScoped;
+    glGenVertexArrays( 1, &g_vao );
+    glGenBuffers( 1, &g_vbo );
+
+    glBindVertexArray( g_vao );
+    glBindBuffer( GL_ARRAY_BUFFER, g_vbo );
+
+    const GLsizei stride = sizeof( GlVert );
+    glEnableVertexAttribArray( 0 );
+    glVertexAttribPointer( 0, 2, GL_FLOAT, GL_FALSE, stride, (void*)0 );
+    glEnableVertexAttribArray( 1 );
+    glVertexAttribPointer( 1, 2, GL_FLOAT, GL_FALSE, stride, (void*)8 );
+    glEnableVertexAttribArray( 2 );
+    glVertexAttribPointer( 2, 1, GL_FLOAT, GL_FALSE, stride, (void*)16 );
+    glEnableVertexAttribArray( 3 );
+    glVertexAttribPointer( 3, 1, GL_FLOAT, GL_FALSE, stride, (void*)20 );
+
+    glBindVertexArray( 0 );
+    glBindBuffer( GL_ARRAY_BUFFER, 0 );
+}
+
+// Draw and clear everything accumulated since the last flush, in submission
+// order. Consecutive quads that share a texture collapse into one draw call.
+void flush_batch()
+{
+    ZoneScoped;
+    if( g_verts.empty() )
+        return;
+
+    glBindBuffer( GL_ARRAY_BUFFER, g_vbo );
+    glBufferData( GL_ARRAY_BUFFER,
+                  static_cast<GLsizeiptr>( g_verts.size() * sizeof( GlVert ) ),
+                  g_verts.data(), GL_STREAM_DRAW );
+
+    glUseProgram( g_program );
+    glBindVertexArray( g_vao );
+
+    GLint offset = 0;
+    for( const DrawCmd& cmd : g_cmds )
+    {
+        glBindTexture( GL_TEXTURE_2D_ARRAY, cmd.tex );
+        glDrawArrays( GL_TRIANGLES, offset, cmd.count );
+        offset += cmd.count;
+    }
+
+    glBindVertexArray( 0 );
+    glUseProgram( 0 );
+    glBindBuffer( GL_ARRAY_BUFFER, 0 );
+
+    g_verts.clear();
+    g_cmds.clear();
+}
+
+// Frame image capture, following the OpenGL example in the Tracy manual. The
+// backbuffer is downscaled on the GPU to a small fixed size and read back
+// asynchronously, so a screenshot can be attached to every frame without
+// stalling the CPU on the GPU. Several buffer sets are cycled because rendering
+// runs a few frames ahead of the GPU.
+// Half the render resolution, preserving its aspect ratio; both dimensions
+// stay divisible by 4 as FrameImage requires.
+constexpr int FI_W = Gfx::w / 2;
+constexpr int FI_H = Gfx::h / 2;
+constexpr int FI_COUNT = 4;
+
+GLuint g_fi_texture[FI_COUNT];
+GLuint g_fi_framebuffer[FI_COUNT];
+GLuint g_fi_pbo[FI_COUNT];
+GLsync g_fi_fence[FI_COUNT] = {};
+int g_fi_idx = 0;
+std::vector<int> g_fi_queue;
+
+void init_frame_images()
+{
+    ZoneScoped;
+    glGenTextures( FI_COUNT, g_fi_texture );
+    glGenFramebuffers( FI_COUNT, g_fi_framebuffer );
+    glGenBuffers( FI_COUNT, g_fi_pbo );
+    for( int i = 0; i < FI_COUNT; i++ )
+    {
+        glBindTexture( GL_TEXTURE_2D, g_fi_texture[i] );
+        glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+        glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+        glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, FI_W, FI_H, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr );
+
+        glBindFramebuffer( GL_FRAMEBUFFER, g_fi_framebuffer[i] );
+        glFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, g_fi_texture[i], 0 );
+
+        glBindBuffer( GL_PIXEL_PACK_BUFFER, g_fi_pbo[i] );
+        glBufferData( GL_PIXEL_PACK_BUFFER, FI_W * FI_H * 4, nullptr, GL_STREAM_READ );
+    }
+    glBindFramebuffer( GL_FRAMEBUFFER, 0 );
+    glBindBuffer( GL_PIXEL_PACK_BUFFER, 0 );
+}
+
+void shutdown_frame_images()
+{
+    ZoneScoped;
+    glDeleteTextures( FI_COUNT, g_fi_texture );
+    glDeleteFramebuffers( FI_COUNT, g_fi_framebuffer );
+    glDeleteBuffers( FI_COUNT, g_fi_pbo );
+}
+
+// Send any captures the GPU has already finished, then queue a capture of the
+// frame just rendered. Call after the batch is flushed but before swapping.
+void capture_frame_image()
+{
+    ZoneScoped;
+
+    // Hand finished captures from earlier frames to the profiler. The queue
+    // size is the number of frames we are still ahead of the GPU, which is the
+    // frame lag Tracy needs as the FrameImage offset.
+    while( !g_fi_queue.empty() )
+    {
+        const int idx = g_fi_queue.front();
+        if( glClientWaitSync( g_fi_fence[idx], 0, 0 ) == GL_TIMEOUT_EXPIRED ) break;
+        glDeleteSync( g_fi_fence[idx] );
+        glBindBuffer( GL_PIXEL_PACK_BUFFER, g_fi_pbo[idx] );
+        void* ptr = glMapBufferRange( GL_PIXEL_PACK_BUFFER, 0, FI_W * FI_H * 4, GL_MAP_READ_BIT );
+        FrameImage( ptr, FI_W, FI_H, g_fi_queue.size(), true );
+        glUnmapBuffer( GL_PIXEL_PACK_BUFFER );
+        g_fi_queue.erase( g_fi_queue.begin() );
+    }
+
+    // Downscale the current backbuffer into the next buffer set and start an
+    // asynchronous read-back, signalled by a fence.
+    assert( g_fi_queue.empty() || g_fi_queue.front() != g_fi_idx );  // buffer overrun
+    glBindFramebuffer( GL_DRAW_FRAMEBUFFER, g_fi_framebuffer[g_fi_idx] );
+    glBlitFramebuffer( 0, 0, Gfx::w, Gfx::h, 0, 0, FI_W, FI_H, GL_COLOR_BUFFER_BIT, GL_LINEAR );
+    glBindFramebuffer( GL_DRAW_FRAMEBUFFER, 0 );
+    glBindFramebuffer( GL_READ_FRAMEBUFFER, g_fi_framebuffer[g_fi_idx] );
+    glBindBuffer( GL_PIXEL_PACK_BUFFER, g_fi_pbo[g_fi_idx] );
+    glReadPixels( 0, 0, FI_W, FI_H, GL_RGBA, GL_UNSIGNED_BYTE, nullptr );
+    glBindFramebuffer( GL_READ_FRAMEBUFFER, 0 );
+    g_fi_fence[g_fi_idx] = glFenceSync( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
+    g_fi_queue.emplace_back( g_fi_idx );
+    g_fi_idx = ( g_fi_idx + 1 ) % FI_COUNT;
+}
+
+} // namespace
+
+namespace Render
+{
+
+bool init()
+{
+    ZoneScoped;
+    if( !init_shaders() ) return false;
+    init_quad_vao();
+    init_frame_images();
+    return true;
+}
+
+void shutdown()
+{
+    ZoneScoped;
+    shutdown_frame_images();
+    if( g_vbo ) glDeleteBuffers( 1, &g_vbo );
+    if( g_vao ) glDeleteVertexArrays( 1, &g_vao );
+    if( g_program ) glDeleteProgram( g_program );
+    g_vbo = g_vao = g_program = 0;
+}
+
+void use_texture( GLuint tex, int layer )
+{
+    g_current_tex = tex;
+    g_current_layer = layer;
+}
+
+GLuint make_texture( int w, int h, int layers, const void* rgba )
+{
+    ZoneScoped;
+    GLuint tex = 0;
+    glGenTextures( 1, &tex );
+    glBindTexture( GL_TEXTURE_2D_ARRAY, tex );
+    glTexParameteri( GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+    glTexParameteri( GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
+    glTexParameteri( GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
+    glPixelStorei( GL_UNPACK_ALIGNMENT, 1 );
+    glTexImage3D( GL_TEXTURE_2D_ARRAY, 0, GL_RGBA, w, h, layers, 0, GL_RGBA, GL_UNSIGNED_BYTE, rgba );
+    return tex;
+}
+
+} // namespace Render
+
+namespace Gfx
+{
+
+void clear()
+{
+    glClear( GL_COLOR_BUFFER_BIT );
+}
+
+void swap()
+{
+    ZoneScoped;
+    flush_batch();
+    capture_frame_image();
+    SDL_GL_SwapWindow( g_window );
+    FrameMark;
+}
+
+void alpha( float a )
+{
+    g_alpha = a;
+}
+
+void draw_quad( const Vertex corners[4] )
+{
+    ZoneScoped;
+    // Two triangles, vertices appended in submission order so painter ordering
+    // (and the transient per-monster alpha) is preserved by the batch.
+    const int idx[6] = { 0, 1, 2, 0, 2, 3 };
+    for( int i : idx )
+    {
+        const Vertex& c = corners[i];
+        g_verts.push_back( { c.x, c.y, c.u, c.v,
+                             static_cast<float>( g_current_layer ), g_alpha } );
+    }
+
+    if( !g_cmds.empty() && g_cmds.back().tex == g_current_tex )
+        g_cmds.back().count += 6;
+    else
+        g_cmds.push_back( { g_current_tex, 6 } );
+}
+
+void draw_sprite( int x, int y )
+{
+    ZoneScoped;
+    float fx = static_cast<float>( x );
+    float fy = static_cast<float>( y );
+    float top = static_cast<float>( h ) - fy;
+    float bottom = static_cast<float>( h ) - ( fy + 64.0f );
+    Vertex corners[4] = {
+        { fx, top, 0.0f, 0.0f },
+        { fx + 64.0f, top, 1.0f, 0.0f },
+        { fx + 64.0f, bottom, 1.0f, 1.0f },
+        { fx, bottom, 0.0f, 1.0f },
+    };
+    draw_quad( corners );
+}
+
+void draw_square( int x, int y )
+{
+    draw_sprite( x * 64, y * 64 );
+}
+
+void show_help()
+{
+    ZoneScoped;
+    Textures::menu.bind();
+
+    const float fw = static_cast<float>( w );
+    const float fh = static_cast<float>( h );
+    Vertex bg[4] = {
+        { 0.0f, fh, 0.0f, 0.0f },
+        { fw, fh, 832.0f / 1024, 0.0f },
+        { fw, 0.0f, 832.0f / 1024, 704.0f / 1024 },
+        { 0.0f, 0.0f, 0.0f, 704.0f / 1024 },
+    };
+    draw_quad( bg );
+
+    int t = static_cast<int>( Timer::get_timestamp() / 40 );
+
+    Textures::p_r.bind( t );
+    draw_sprite( 150, 85 );
+    Textures::m1_r.bind( t );
+    draw_sprite( 75, 160 );
+    Textures::m2_r.bind( t );
+    draw_sprite( 150, 160 );
+    Textures::m3_r.bind( t );
+    draw_sprite( 225, 160 );
+    Textures::bomb.bind( static_cast<int>( Timer::get_timestamp() / 100 % 2 ) );
+    draw_sprite( 150, 235 );
+    Textures::wall.bind();
+    draw_sprite( 150, 310 );
+    Textures::crate.bind();
+    draw_sprite( 150, 385 );
+    Textures::vortex.bind( t );
+    draw_sprite( 150, 460 );
+    Textures::bonus1.bind( t );
+    draw_sprite( 112, 535 );
+    Textures::bonus2.bind( t );
+    draw_sprite( 187, 535 );
+}
+
+void show_menu()
+{
+    ZoneScoped;
+    Textures::menu.bind();
+
+    Vertex logo[4] = {
+        { float( ( w - 594 ) / 2 ), float( h - 50 ), 1.0f, 0.0f },
+        { float( ( w + 594 ) / 2 ), float( h - 50 ), 1.0f, 594.0f / 1024 },
+        { float( ( w + 594 ) / 2 ), float( h - 50 - 180 ), 1.0f - 180.0f / 1024, 594.0f / 1024 },
+        { float( ( w - 594 ) / 2 ), float( h - 50 - 180 ), 1.0f - 180.0f / 1024, 0.0f },
+    };
+    draw_quad( logo );
+
+    Vertex prompt[4] = {
+        { float( ( w - 527 ) / 2 ), 335.0f, 0.0f, 704.0f / 1024 },
+        { float( ( w + 527 ) / 2 ), 335.0f, 527.0f / 1024, 704.0f / 1024 },
+        { float( ( w + 527 ) / 2 ), 20.0f, 527.0f / 1024, 1019.0f / 1024 },
+        { float( ( w - 527 ) / 2 ), 20.0f, 0.0f, 1019.0f / 1024 },
+    };
+    draw_quad( prompt );
+}
+
+} // namespace Gfx
+
+namespace Init
+{
+
+bool all()
+{
+    ZoneScoped;
+    if( !SDL_Init( SDL_INIT_VIDEO ) )
+    {
+        std::fprintf( stderr, "SDL_Init failed: %s\n", SDL_GetError() );
+        return false;
+    }
+
+    SDL_GL_SetAttribute( SDL_GL_DOUBLEBUFFER, 1 );
+    SDL_GL_SetAttribute( SDL_GL_CONTEXT_MAJOR_VERSION, 3 );
+    SDL_GL_SetAttribute( SDL_GL_CONTEXT_MINOR_VERSION, 3 );
+    SDL_GL_SetAttribute( SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE );
+
+    g_window = SDL_CreateWindow( "Dyna.net", Gfx::w, Gfx::h, SDL_WINDOW_OPENGL );
+    if( !g_window )
+    {
+        std::fprintf( stderr, "SDL_CreateWindow failed: %s\n", SDL_GetError() );
+        return false;
+    }
+
+    g_gl_context = SDL_GL_CreateContext( g_window );
+    if( !g_gl_context )
+    {
+        std::fprintf( stderr, "SDL_GL_CreateContext failed: %s\n", SDL_GetError() );
+        return false;
+    }
+
+    int version = gladLoadGL( (GLADloadfunc)SDL_GL_GetProcAddress );
+    if( version == 0 )
+    {
+        std::fprintf( stderr, "gladLoadGL failed\n" );
+        return false;
+    }
+
+    SDL_GL_SetSwapInterval( 1 );   // vsync; the game is time-based so speed is unaffected
+
+    glViewport( 0, 0, Gfx::w, Gfx::h );
+    glClearColor( 0.0f, 0.0f, 0.0f, 1.0f );
+    glEnable( GL_BLEND );
+    glBlendFunc( GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA );
+
+    if( !Render::init() ) return false;
+
+    Timer::reset();
+    Textures::preload();
+    return true;
+}
+
+void shutdown()
+{
+    ZoneScoped;
+    Render::shutdown();
+    if( g_gl_context ) SDL_GL_DestroyContext( g_gl_context );
+    if( g_window ) SDL_DestroyWindow( g_window );
+    SDL_Quit();
+}
+
+} // namespace Init
+
+}
--- a/examples/dyna/src/gfx.hpp
+++ b/examples/dyna/src/gfx.hpp
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <glad/gl.h>
+
+namespace dyna
+{
+
+// Screen dimensions, matching the original 13x11 grid of 64px tiles.
+namespace Gfx
+{
+constexpr int w = 832;
+constexpr int h = 704;
+
+void clear();
+void swap();
+
+// Drawing primitives ported from gfx.cs. They render with the currently
+// bound texture (see Texture::bind) and the current alpha. The coordinate
+// system is bottom-left origin with y growing upward, exactly as the C#
+// gluOrtho2D setup; draw_sprite/draw_square take y measured from the top
+// and flip internally, so game-side coordinates stay top-left based.
+void alpha( float a );
+void draw_sprite( int x, int y );   // pixel position of the top-left corner
+void draw_square( int x, int y );   // grid position (multiplied by 64)
+
+// A single textured quad given four explicit (position, texcoord) corners,
+// used by the menu/help screens which sample rotated regions of the atlas.
+struct Vertex
+{
+    float x, y, u, v;
+};
+void draw_quad( const Vertex corners[4] );
+
+void show_help();
+void show_menu();
+}
+
+// Renderer back end shared by the texture loaders.
+namespace Render
+{
+bool init();         // shaders + streaming VBO/VAO
+void shutdown();     // delete the program and buffers
+
+// Select the array texture (and layer within it) used by subsequent draws.
+void use_texture( GLuint tex, int layer );
+
+// Upload `layers` tightly packed RGBA8 images of size w*h as one
+// GL_TEXTURE_2D_ARRAY and return its name (0 on failure).
+GLuint make_texture( int w, int h, int layers, const void* rgba );
+}
+
+// One-time startup/shutdown, ported from the Init class in gfx.cs.
+namespace Init
+{
+bool all();        // SDL, GL context, renderer, textures, timer
+void shutdown();
+}
+
+}
--- a/examples/dyna/src/main.cpp
+++ b/examples/dyna/src/main.cpp
@@ -0,0 +1,38 @@
+#include "game.hpp"
+#include "gfx.hpp"
+
+#include <SDL3/SDL_main.h>
+#include <tracy/Tracy.hpp>
+
+#include <cstdlib>
+#include <new>
+
+// Route every heap allocation through Tracy so the profiler can track memory
+// usage. The default array forms (operator new[]/delete[]) and the nothrow
+// forms forward to these, so overriding the scalar operators covers them too.
+void* operator new( std::size_t count )
+{
+    void* ptr = std::malloc( count );
+    if( !ptr ) throw std::bad_alloc();
+    TracyAlloc( ptr, count );
+    return ptr;
+}
+
+void operator delete( void* ptr ) noexcept
+{
+    TracyFree( ptr );
+    std::free( ptr );
+}
+
+int main( int /*argc*/, char* /*argv*/[] )
+{
+    TracyNoop;
+
+    if( !dyna::Init::all() )
+        return 1;
+
+    dyna::Game::menu_loop();
+
+    dyna::Init::shutdown();
+    return 0;
+}
--- a/examples/dyna/src/map.cpp
+++ b/examples/dyna/src/map.cpp
@@ -0,0 +1,334 @@
+#include "map.hpp"
+
+#include "bomb.hpp"
+#include "bonus.hpp"
+#include "gfx.hpp"
+#include "monster.hpp"
+#include "player.hpp"
+#include "texture.hpp"
+#include "timer.hpp"
+#include "world.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+#include <fstream>
+#include <sstream>
+
+#include <tracy/Tracy.hpp>
+
+namespace dyna
+{
+
+// ---- Field --------------------------------------------------------------
+
+Field Field::explosion( ExplosionType t )
+{
+    Field f;
+    f.kind = Kind::explosion;
+    f.etype = t;
+    f.tstart = Timer::get_timestamp();
+    return f;
+}
+
+bool Field::solid() const
+{
+    switch( kind )
+    {
+    case Kind::wall:
+    case Kind::crate:
+    case Kind::bomb:
+        return true;
+    default:
+        return false;
+    }
+}
+
+Destruction Field::destructible() const
+{
+    switch( kind )
+    {
+    case Kind::floor:
+        return Destruction::multi;
+    case Kind::crate:
+        return Destruction::single;
+    default:
+        return Destruction::none;
+    }
+}
+
+void Field::draw( int x, int y ) const
+{
+    switch( kind )
+    {
+    case Kind::wall:
+        Textures::wall.bind();
+        Gfx::draw_square( x, y );
+        break;
+
+    case Kind::crate:
+        Textures::sand.bind();
+        Gfx::draw_square( x, y );
+        Textures::crate.bind();
+        Gfx::draw_square( x, y );
+        break;
+
+    case Kind::explosion: {
+        Textures::sand.bind();
+        Gfx::draw_square( x, y );
+
+        int frame = static_cast<int>( ( Timer::get_timestamp() - tstart ) / 40 % 8 );
+        if( frame > 4 ) frame = 8 - frame;
+
+        switch( etype )
+        {
+        case ExplosionType::center: Textures::e_c.bind( frame ); break;
+        case ExplosionType::vertical: Textures::e_v.bind( frame ); break;
+        case ExplosionType::horizontal: Textures::e_h.bind( frame ); break;
+        case ExplosionType::left: Textures::e_le.bind( frame ); break;
+        case ExplosionType::right: Textures::e_re.bind( frame ); break;
+        case ExplosionType::up: Textures::e_ue.bind( frame ); break;
+        case ExplosionType::down: Textures::e_de.bind( frame ); break;
+        }
+        Gfx::draw_square( x, y );
+        break;
+    }
+
+        // floor, bomb and vortex tiles all show plain sand; the bomb and vortex
+        // sprites themselves are drawn by their entities.
+    case Kind::floor:
+    case Kind::bomb:
+    case Kind::vortex:
+    default:
+        Textures::sand.bind();
+        Gfx::draw_square( x, y );
+        break;
+    }
+}
+
+// ---- Map ----------------------------------------------------------------
+
+Map::Map( const std::string& fn )
+{
+    ZoneScoped;
+    ZoneText( fn.c_str(), fn.size() );
+
+    load( fn );
+    generate_destructibles();
+    populate_map();
+}
+
+Map::~Map() = default;
+
+void Map::load( const std::string& fn )
+{
+    ZoneScoped;
+    std::ifstream f( fn );
+    if( !f )
+    {
+        std::fprintf( stderr, "Cannot open level %s\n", fn.c_str() );
+        grid.assign( X * Y, Field::floor() );
+        return;
+    }
+
+    std::stringstream buf;
+    buf << f.rdbuf();
+    std::string content = buf.str();
+
+    size_t nl = content.find( '\n' );
+    std::string header = ( nl == std::string::npos ) ? content : content.substr( 0, nl );
+    std::sscanf( header.c_str(), "%d %d %d %d", &destructibles, &m1, &m2, &m3 );
+
+    grid.assign( X * Y, Field::floor() );
+    px = -1;
+
+    size_t p = ( nl == std::string::npos ) ? content.size() : nl + 1;
+    for( int ry = 0; ry < Y; ry++ )
+    {
+        for( int rx = 0; rx < X; rx++ )
+        {
+            char c = ( p < content.size() ) ? content[p++] : '\0';
+            switch( c )
+            {
+            case '.':
+                at( rx, ry ) = Field::floor();
+                break;
+            case '#':
+                at( rx, ry ) = Field::wall();
+                break;
+            case '@':
+                at( rx, ry ) = Field::floor();
+                px = rx;
+                py = ry;
+                break;
+            case '\n':
+                rx--;   // newlines don't consume a grid cell
+                break;
+            default:
+                break;
+            }
+        }
+    }
+}
+
+bool Map::monster_ok( int rx, int ry, int pxx, int pyy, int r ) const
+{
+    const Field& f = at( rx, ry );
+    return f.is_floor_family() && f.kind != Field::Kind::crate &&
+           ( std::abs( rx - pxx ) > r || std::abs( ry - pyy ) > r );
+}
+
+void Map::generate_destructibles()
+{
+    ZoneScoped;
+    int i = destructibles;
+    while( i != 0 )
+    {
+        int rx = RNG::next( X );
+        int ry = RNG::next( Y );
+        if( monster_ok( rx, ry, px, py, 1 ) )
+        {
+            at( rx, ry ) = Field::crate();
+            i--;
+        }
+    }
+}
+
+void Map::populate_map()
+{
+    ZoneScoped;
+    for( int type = 1; type <= 3; type++ )
+    {
+        int count = ( type == 1 ) ? m1 : ( type == 2 ) ? m2
+                                                       : m3;
+        while( count != 0 )
+        {
+            int rx = RNG::next( X );
+            int ry = RNG::next( Y );
+            if( monster_ok( rx, ry, px, py, 2 ) )
+            {
+                monsters.push_back( std::make_unique<Monster>( type, rx, ry ) );
+                count--;
+            }
+        }
+    }
+}
+
+void Map::draw()
+{
+    ZoneScoped;
+    for( int ry = 0; ry < Y; ry++ )
+        for( int rx = 0; rx < X; rx++ )
+            at( rx, ry ).draw( rx, ry );
+
+    for( auto& b : bombs ) b->draw();
+    for( auto& e : monsters ) e->draw();
+    for( auto& e : bonuses ) e->draw();
+}
+
+void Map::tick( World& world )
+{
+    ZoneScoped;
+    // Bombs.
+    for( auto& b : bombs ) b->tick( world );
+    bombs.erase( std::remove_if( bombs.begin(), bombs.end(),
+                                 []( const std::unique_ptr<Bomb>& b ) { return b->is_dead(); } ),
+                 bombs.end() );
+
+    // Monsters: tick, then retire the dead and queue their respawn timers.
+    for( auto& e : monsters ) e->tick( world );
+    for( auto& e : monsters )
+    {
+        if( e->is_dead() )
+        {
+            int delay = ( e->type() == 1 ) ? 10000 : ( e->type() == 2 ) ? 20000
+                                                                        : 30000;
+            mwait.push_back( { e->type(), Timer::get_timestamp() + delay } );
+        }
+    }
+    monsters.erase( std::remove_if( monsters.begin(), monsters.end(),
+                                    []( const std::unique_ptr<Monster>& e ) { return e->is_dead(); } ),
+                    monsters.end() );
+
+    // The respawn and exit-portal placement below need the player's position;
+    // they only fire during gameplay (a monster died, or every crate is gone),
+    // never on the player-less menu screen.
+    Player* player = world.player();
+
+    // Respawn monsters whose wait has elapsed.
+    std::int64_t now = Timer::get_timestamp();
+    std::vector<MWait> still_waiting;
+    for( const MWait& m : mwait )
+    {
+        if( m.time < now && player )
+        {
+            int rx = 0, ry = 0;
+            bool ok = false;
+            while( !ok )
+            {
+                rx = RNG::next( X );
+                ry = RNG::next( Y );
+                if( monster_ok( rx, ry, player->getx() / 64, player->gety() / 64, 3 ) )
+                    ok = true;
+            }
+            auto monster = std::make_unique<Monster>( m.type, rx, ry );
+            monster->set_action( Action::appear );
+            monsters.push_back( std::move( monster ) );
+        }
+        else
+        {
+            still_waiting.push_back( m );
+        }
+    }
+    mwait = std::move( still_waiting );
+
+    // Bonuses.
+    for( auto& e : bonuses ) e->tick( world );
+
+    // Once every crate is gone, open the exit portal somewhere clear.
+    if( world.crates_left == 0 && player )
+    {
+        world.crates_left--;
+
+        int rx = 0, ry = 0;
+        bool ok = false;
+        while( !ok )
+        {
+            rx = RNG::next( X );
+            ry = RNG::next( Y );
+            if( monster_ok( rx, ry, player->getx() / 64, player->gety() / 64, 4 ) )
+                ok = true;
+        }
+
+        at( rx, ry ) = Field::vortex();
+        bonuses.push_back( std::make_unique<Vortex>( rx, ry ) );
+    }
+}
+
+std::unique_ptr<Player> Map::create_player() const
+{
+    return std::make_unique<Player>( px, py );
+}
+
+void Map::place_bomb( int x, int y )
+{
+    Field& f = at( x, y );
+    if( f.is_floor_family() && f.kind != Field::Kind::bomb )
+    {
+        f = Field::bomb();
+        bombs.push_back( std::make_unique<Bomb>( x, y ) );
+    }
+}
+
+bool Map::monster_collide( int tx, int ty ) const
+{
+    for( const auto& e : monsters )
+    {
+        if( ( e->getx() + 32 ) / 64 == ( tx + 32 ) / 64 &&
+            ( e->gety() + 32 ) / 64 == ( ty + 32 ) / 64 )
+            return true;
+    }
+    return false;
+}
+
+}
--- a/examples/dyna/src/map.hpp
+++ b/examples/dyna/src/map.hpp
@@ -0,0 +1,120 @@
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace dyna
+{
+
+class Player;
+class Bomb;
+class Monster;
+class Vortex;
+class World;
+
+// How a tile reacts to an explosion sweeping through it.
+enum class Destruction
+{
+    none,    // blocks the blast (wall, bomb, existing explosion, vortex)
+    single,  // destroyed and stops the blast (crate)
+    multi    // passable, blast continues (floor)
+};
+
+// A grid cell. The C# version used a small class hierarchy rooted at a Field
+// interface; since the variants differ only in a couple of flags and how they
+// draw, this collapses them into one value type tagged by Kind. Note that in
+// the original everything except Wall derived from Floor, so the "is Floor"
+// checks there map to "kind != Wall" here.
+struct Field
+{
+    enum class Kind
+    {
+        floor,
+        wall,
+        crate,
+        bomb,       // tile occupied by a live bomb (solid, indestructible)
+        explosion,  // transient blast tile
+        vortex      // level exit portal
+    };
+
+    enum class ExplosionType
+    {
+        center,
+        vertical,
+        horizontal,
+        left,
+        right,
+        down,
+        up
+    };
+
+    Kind kind = Kind::floor;
+    ExplosionType etype = ExplosionType::center;
+    std::int64_t tstart = 0;   // explosion animation start, set on creation
+
+    static Field floor() { return Field{}; }
+    static Field wall() { return Field{ Kind::wall, {}, 0 }; }
+    static Field crate() { return Field{ Kind::crate, {}, 0 }; }
+    static Field bomb() { return Field{ Kind::bomb, {}, 0 }; }
+    static Field vortex() { return Field{ Kind::vortex, {}, 0 }; }
+    static Field explosion( ExplosionType t );
+
+    bool solid() const;
+    Destruction destructible() const;
+    void draw( int x, int y ) const;
+
+    bool is_floor_family() const { return kind != Kind::wall; }
+};
+
+class Map
+{
+public:
+    explicit Map( const std::string& fn );
+    ~Map();   // defined in map.cpp where the entity types are complete
+
+    Field& at( int x, int y ) { return grid[index( x, y )]; }
+    const Field& at( int x, int y ) const { return grid[index( x, y )]; }
+
+    void draw();
+    void tick( World& world );
+
+    int getx() const { return X; }
+    int gety() const { return Y; }
+    int get_crates() const { return destructibles; }
+
+    std::unique_ptr<Player> create_player() const;
+
+    void place_bomb( int x, int y );
+    bool monster_collide( int tx, int ty ) const;
+
+private:
+    static constexpr int X = 13, Y = 11;
+
+    // Deferred monster respawn timer, mirroring Map.MWait.
+    struct MWait
+    {
+        int type;             // 1, 2 or 3
+        std::int64_t time;    // timestamp at which it respawns
+    };
+
+    static int index( int x, int y ) { return x * Y + y; }
+
+    void load( const std::string& fn );
+    void generate_destructibles();
+    void populate_map();
+    bool monster_ok( int rx, int ry, int px, int py, int r ) const;
+
+    std::vector<Field> grid;
+    int px = -10, py = -10;
+    int destructibles = 0;
+    int m1 = 0, m2 = 0, m3 = 0;
+
+    std::vector<std::unique_ptr<Bomb>> bombs;
+    std::vector<std::unique_ptr<Monster>> monsters;
+    std::vector<std::unique_ptr<Vortex>> bonuses;
+    std::vector<MWait> mwait;
+};
+
+}
--- a/examples/dyna/src/monster.cpp
+++ b/examples/dyna/src/monster.cpp
@@ -0,0 +1,227 @@
+#include "monster.hpp"
+
+#include "gfx.hpp"
+#include "map.hpp"
+#include "texture.hpp"
+#include "timer.hpp"
+#include "world.hpp"
+
+#include <tracy/Tracy.hpp>
+
+namespace dyna
+{
+
+namespace
+{
+
+bool is_opposite( Action a, Action b )
+{
+    return ( a == Action::up && b == Action::down ) ||
+           ( a == Action::down && b == Action::up ) ||
+           ( a == Action::left && b == Action::right ) ||
+           ( a == Action::right && b == Action::left );
+}
+
+} // namespace
+
+Monster::Monster( int type, int gx, int gy )
+    : mtype( type )
+    , t( type == 1 ? 14 : type == 2 ? 11
+                                    : 7 )
+{
+    x = gx * 64;
+    y = gy * 64;
+}
+
+void Monster::set_action( Action a )
+{
+    Entity::set_action( a );
+    if( action == Action::appear )
+        left = 200;
+}
+
+std::vector<Action> Monster::possible_dirs( const Map& map ) const
+{
+    std::vector<Action> dirs;
+
+    if( x > 0 && !map.at( x / 64 - 1, y / 64 ).solid() )
+        dirs.push_back( Action::left );
+    if( x / 64 < map.getx() - 1 && !map.at( x / 64 + 1, y / 64 ).solid() )
+        dirs.push_back( Action::right );
+    if( y > 0 && !map.at( x / 64, y / 64 - 1 ).solid() )
+        dirs.push_back( Action::up );
+    if( y / 64 < map.gety() - 1 && !map.at( x / 64, y / 64 + 1 ).solid() )
+        dirs.push_back( Action::down );
+
+    return dirs;
+}
+
+bool Monster::straight( const std::vector<Action>& dirs )
+{
+    return is_opposite( dirs[0], dirs[1] );
+}
+
+Action Monster::any_dir( const Map& map )
+{
+    std::vector<Action> dirs = possible_dirs( map );
+    if( dirs.empty() )
+        return Action::wait;
+    return dirs[RNG::next( static_cast<int>( dirs.size() ) )];
+}
+
+Action Monster::rand_dir( const Map& map )
+{
+    Action tmp = any_dir( map );
+    if( is_opposite( action, tmp ) )
+        tmp = any_dir( map );
+    return tmp;
+}
+
+void Monster::think( const Map& map )
+{
+    ZoneScoped;
+    if( action == Action::wait || action == Action::appear )
+    {
+        set_action( rand_dir( map ) );
+        return;
+    }
+
+    std::vector<Action> dirs = possible_dirs( map );
+
+    if( dirs.size() == 2 && straight( dirs ) )
+    {
+        left = 64;
+    }
+    else
+    {
+        Action tmp = rand_dir( map );
+
+        if( tmp == action )
+        {
+            left = 64;
+        }
+        else
+        {
+            set_action( tmp );
+            if( tmp != Action::wait )
+                left = 64;
+        }
+    }
+}
+
+void Monster::tick( World& world )
+{
+    ZoneScoped;
+    Map& map = world.map();
+
+    delta += Timer::delta;
+
+    while( delta > t )
+    {
+        delta -= t;
+
+        if( action == Action::wait )
+        {
+            think( map );
+        }
+        else if( left > 0 )
+        {
+            left--;
+
+            switch( action )
+            {
+            case Action::down: y++; break;
+            case Action::up: y--; break;
+            case Action::left: x--; break;
+            case Action::right: x++; break;
+            default: break;
+            }
+        }
+        else
+        {
+            if( action == Action::death )
+                die( world );
+            else
+                think( map );
+        }
+
+        if( action != Action::death && killed( map ) )
+        {
+            set_action( Action::death );
+            left = 790 / t;
+        }
+    }
+}
+
+void Monster::die( World& )
+{
+    dead = true;
+}
+
+const AnimTexture& Monster::texture_for( Action a ) const
+{
+    struct Set
+    {
+        const AnimTexture* wait;
+        const AnimTexture* up;
+        const AnimTexture* down;
+        const AnimTexture* left;
+        const AnimTexture* right;
+        const AnimTexture* death;
+    };
+
+    Set s;
+    if( mtype == 1 )
+        s = { &Textures::m1_d, &Textures::m1_u, &Textures::m1_d, &Textures::m1_l, &Textures::m1_r, &Textures::m1_death };
+    else if( mtype == 2 )
+        s = { &Textures::m2_d, &Textures::m2_u, &Textures::m2_d, &Textures::m2_l, &Textures::m2_r, &Textures::m2_death };
+    else
+        s = { &Textures::m3_d, &Textures::m3_u, &Textures::m3_d, &Textures::m3_l, &Textures::m3_r, &Textures::m3_death };
+
+    switch( a )
+    {
+    case Action::up: return *s.up;
+    case Action::down: return *s.down;
+    case Action::left: return *s.left;
+    case Action::right: return *s.right;
+    case Action::death: return *s.death;
+    case Action::wait:
+    case Action::appear:
+    default: return *s.wait;   // wait/appear use the "down" sprite
+    }
+}
+
+void Monster::draw()
+{
+    ZoneScoped;
+    // The original returns without drawing for unexpected actions; monsters only
+    // ever hold the actions handled by texture_for, so always draw.
+    generic_draw( texture_for( action ) );
+}
+
+void Monster::generic_draw( const AnimTexture& tex )
+{
+    int frame;
+
+    if( action == Action::wait )
+    {
+        frame = 0;
+    }
+    else if( action == Action::appear )
+    {
+        frame = 0;
+        Gfx::alpha( static_cast<float>( 200 - left ) / 200.0f );
+    }
+    else
+    {
+        frame = static_cast<int>( ( Timer::get_timestamp() - action_start ) / 40 );
+    }
+
+    tex.bind( frame );
+    Gfx::draw_sprite( x, y );
+
+    if( action == Action::appear )
+        Gfx::alpha( 1.0f );
+}
+
+}
--- a/examples/dyna/src/monster.hpp
+++ b/examples/dyna/src/monster.hpp
@@ -0,0 +1,41 @@
+#pragma once
+
+#include "entity.hpp"
+
+#include <vector>
+
+namespace dyna
+{
+
+class AnimTexture;
+
+// The three monster variants from monster.cs differed only in speed, sprite set
+// and respawn delay, so they fold into one class parameterised by `type` (1-3).
+class Monster : public Entity
+{
+public:
+    Monster( int type, int gx, int gy );
+
+    void set_action( Action a ) override;
+    void tick( World& world ) override;
+    void draw() override;
+    void die( World& world ) override;
+
+    bool is_dead() const { return dead; }
+    int type() const { return mtype; }
+
+private:
+    std::vector<Action> possible_dirs( const Map& map ) const;
+    static bool straight( const std::vector<Action>& dirs );
+    Action rand_dir( const Map& map );
+    Action any_dir( const Map& map );        // __rand_dir in the original
+    void think( const Map& map );
+    void generic_draw( const AnimTexture& tex );
+    const AnimTexture& texture_for( Action a ) const;
+
+    int mtype;   // 1, 2 or 3
+    int t;       // ms per movement sub-step (per-type speed)
+    bool dead = false;
+};
+
+}
--- a/examples/dyna/src/player.cpp
+++ b/examples/dyna/src/player.cpp
@@ -0,0 +1,127 @@
+#include "player.hpp"
+
+#include "gfx.hpp"
+#include "map.hpp"
+#include "texture.hpp"
+#include "timer.hpp"
+#include "world.hpp"
+
+#include <tracy/Tracy.hpp>
+
+namespace dyna
+{
+
+Player::Player( int gx, int gy )
+{
+    x = gx * 64;
+    y = gy * 64;
+    set_action( Action::wait );
+    queue = Action::wait;
+}
+
+void Player::tick( World& world )
+{
+    ZoneScoped;
+    Map& map = world.map();
+
+    delta += Timer::delta;
+
+    while( delta > t )
+    {
+        delta -= t;
+
+        if( left > 0 )
+        {
+            left--;
+
+            switch( action )
+            {
+            case Action::down: y++; break;
+            case Action::up: y--; break;
+            case Action::left: x--; break;
+            case Action::right: x++; break;
+            case Action::place_bomb:
+                if( left == 0 )
+                    map.place_bomb( x / 64, y / 64 );
+                break;
+            default:
+                break;
+            }
+        }
+        else
+        {
+            if( action == Action::death )
+            {
+                die( world );
+                return;
+            }
+            if( map.at( x / 64, y / 64 ).kind == Field::Kind::vortex )
+            {
+                world.next_level = true;
+                return;
+            }
+            if( !can_move( queue, map ) )
+                queue = Action::wait;
+
+            if( action != queue )
+                set_action( queue );
+
+            if( action != Action::wait )
+                left = 64;
+            if( action == Action::place_bomb )
+                left = 32;
+        }
+
+        if( action != Action::death && killed( map ) )
+        {
+            set_action( Action::death );
+            left = 1140 / t;
+        }
+    }
+}
+
+void Player::draw()
+{
+    ZoneScoped;
+    const AnimTexture* tex = nullptr;
+
+    switch( action )
+    {
+    case Action::wait: tex = &Textures::p_wait; break;
+    case Action::up: tex = &Textures::p_u; break;
+    case Action::down: tex = &Textures::p_d; break;
+    case Action::left: tex = &Textures::p_l; break;
+    case Action::right: tex = &Textures::p_r; break;
+    case Action::death: tex = &Textures::p_death; break;
+    case Action::place_bomb: tex = &Textures::p_wait; break;
+    default:
+        return;
+    }
+
+    int frame = static_cast<int>( Timer::get_timestamp() - action_start );
+    frame /= ( action == Action::death ) ? 60 : 40;
+    tex->bind( frame );
+
+    Gfx::draw_sprite( x, y );
+}
+
+void Player::move( Action a )
+{
+    queue = a;
+}
+
+void Player::die( World& world )
+{
+    world.killed = true;
+}
+
+bool Player::killed( const Map& map ) const
+{
+    if( Entity::killed( map ) )
+        return true;
+    if( map.monster_collide( x, y ) )
+        return true;
+    return false;
+}
+
+}
--- a/examples/dyna/src/player.hpp
+++ b/examples/dyna/src/player.hpp
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "entity.hpp"
+
+namespace dyna
+{
+
+class Player : public Entity
+{
+public:
+    Player( int gx, int gy );
+
+    void tick( World& world ) override;
+    void draw() override;
+    void die( World& world ) override;
+
+    void move( Action a );   // queues the next direction; applied between tiles
+
+protected:
+    bool killed( const Map& map ) const override;
+
+private:
+    static constexpr int t = 6;   // ms per movement sub-step
+    Action queue = Action::wait;
+};
+
+}
--- a/examples/dyna/src/texture.cpp
+++ b/examples/dyna/src/texture.cpp
@@ -0,0 +1,221 @@
+#include "texture.hpp"
+
+#include "datapath.hpp"
+#include "gfx.hpp"
+
+#include <SDL3/SDL.h>
+#include <SDL3_image/SDL_image.h>
+#include <tracy/Tracy.hpp>
+
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+namespace dyna
+{
+
+void GlTexture::reset()
+{
+    if( id_ )
+    {
+        // The texture globals outlive main(), so their destructors can run after
+        // the GL context is already gone (which frees its textures anyway). Only
+        // call into GL while a context is current; otherwise just drop the name.
+        if( SDL_GL_GetCurrentContext() )
+            glDeleteTextures( 1, &id_ );
+        id_ = 0;
+    }
+}
+
+namespace
+{
+
+struct SurfaceDeleter
+{
+    void operator()( SDL_Surface* s ) const { SDL_DestroySurface( s ); }
+};
+using SurfacePtr = std::unique_ptr<SDL_Surface, SurfaceDeleter>;
+
+// Convert an arbitrary surface to tightly addressable RGBA8. Returns null on
+// failure; the result owns its pixels.
+SurfacePtr to_rgba( SDL_Surface* src )
+{
+    ZoneScoped;
+    if( !src ) return nullptr;
+    return SurfacePtr{ SDL_ConvertSurface( src, SDL_PIXELFORMAT_RGBA32 ) };
+}
+
+} // namespace
+
+bool Texture::load( const char* fn )
+{
+    ZoneScoped;
+    ZoneText( fn, strlen( fn ) );
+
+    SurfacePtr image{ IMG_Load( fn ) };
+    if( !image )
+    {
+        std::fprintf( stderr, "Cannot open texture %s: %s\n", fn, SDL_GetError() );
+        return false;
+    }
+
+    SurfacePtr rgba = to_rgba( image.get() );
+    if( !rgba )
+    {
+        std::fprintf( stderr, "Cannot convert texture %s: %s\n", fn, SDL_GetError() );
+        return false;
+    }
+
+    // Pack the surface into a tight RGBA8 block, skipping any per-row padding.
+    const int w = rgba->w, h = rgba->h;
+    std::vector<std::uint8_t> packed( static_cast<size_t>( w ) * h * 4 );
+    const auto* pixels = static_cast<const std::uint8_t*>( rgba->pixels );
+    for( int row = 0; row < h; row++ )
+    {
+        std::memcpy( &packed[static_cast<size_t>( row ) * w * 4],
+                     pixels + static_cast<size_t>( row ) * rgba->pitch,
+                     static_cast<size_t>( w ) * 4 );
+    }
+
+    tex_ = GlTexture{ Render::make_texture( w, h, 1, packed.data() ) };
+    return static_cast<bool>( tex_ );
+}
+
+void Texture::bind() const
+{
+    Render::use_texture( tex_.get(), 0 );
+}
+
+void AnimTexture::load( SDL_Surface* sheet, int tilex, int tiley, int n )
+{
+    ZoneScoped;
+
+    SurfacePtr rgba = to_rgba( sheet );
+    if( !rgba )
+    {
+        std::fprintf( stderr, "Cannot convert sprite sheet: %s\n", SDL_GetError() );
+        return;
+    }
+
+    const auto* pixels = static_cast<const std::uint8_t*>( rgba->pixels );
+    const int pitch = rgba->pitch;
+
+    // Lay the n frames out back to back as the layers of an array texture.
+    constexpr int frame_bytes = 64 * 64 * 4;
+    std::vector<std::uint8_t> frames( static_cast<size_t>( n ) * frame_bytes );
+    for( int i = 0; i < n; i++ )
+    {
+        for( int fy = 0; fy < 64; fy++ )
+        {
+            int srcy = 64 * ( tiley + i ) + fy;
+            int srcx = 64 * tilex;
+            std::memcpy( &frames[static_cast<size_t>( i ) * frame_bytes + static_cast<size_t>( fy ) * 64 * 4],
+                         pixels + static_cast<size_t>( srcy ) * pitch + static_cast<size_t>( srcx ) * 4,
+                         static_cast<size_t>( 64 ) * 4 );
+        }
+    }
+
+    tex_ = GlTexture{ Render::make_texture( 64, 64, n, frames.data() ) };
+    frames_ = n;
+}
+
+void AnimTexture::bind( int frame ) const
+{
+    if( frames_ <= 0 ) return;
+    int layer = frame % frames_;
+    if( layer < 0 ) layer += frames_;
+    Render::use_texture( tex_.get(), layer );
+}
+
+namespace Textures
+{
+Texture menu, sand, wall, crate;
+
+AnimTexture p_wait, p_u, p_d, p_l, p_r, p_death;
+
+AnimTexture bomb, bomb_appear, e_c, e_h, e_v, e_le, e_re, e_de, e_ue;
+
+AnimTexture m1_death, m1_l, m1_r, m1_d, m1_u;
+AnimTexture m2_death, m2_l, m2_r, m2_d, m2_u;
+AnimTexture m3_death, m3_l, m3_r, m3_d, m3_u;
+
+AnimTexture bonus1, bonus2;
+
+AnimTexture vortex_appear, vortex;
+
+void preload()
+{
+    ZoneScoped;
+
+    menu.load( data_path( "data/gfx/menu.png" ).c_str() );
+    sand.load( data_path( "data/gfx/sand.png" ).c_str() );
+    wall.load( data_path( "data/gfx/wall.png" ).c_str() );
+    crate.load( data_path( "data/gfx/crate.png" ).c_str() );
+
+    {
+        SurfacePtr img{ IMG_Load( data_path( "data/gfx/Player.png" ).c_str() ) };
+        p_wait.load( img.get(), 0, 0, 20 );
+        p_d.load( img.get(), 1, 0, 20 );
+        p_u.load( img.get(), 2, 0, 20 );
+        p_l.load( img.get(), 3, 0, 20 );
+        p_r.load( img.get(), 4, 0, 20 );
+        p_death.load( img.get(), 5, 0, 20 );
+    }
+
+    {
+        SurfacePtr img{ IMG_Load( data_path( "data/gfx/Bomb.png" ).c_str() ) };
+        bomb.load( img.get(), 0, 0, 10 );
+        bomb_appear.load( img.get(), 5, 0, 10 );
+        e_c.load( img.get(), 1, 0, 5 );
+        e_h.load( img.get(), 2, 0, 5 );
+        e_v.load( img.get(), 1, 5, 5 );
+        e_le.load( img.get(), 3, 0, 5 );
+        e_re.load( img.get(), 2, 5, 5 );
+        e_de.load( img.get(), 4, 0, 5 );
+        e_ue.load( img.get(), 3, 5, 5 );
+    }
+
+    {
+        SurfacePtr img{ IMG_Load( data_path( "data/gfx/monster1.png" ).c_str() ) };
+        m1_death.load( img.get(), 0, 0, 20 );
+        m1_u.load( img.get(), 1, 0, 10 );
+        m1_l.load( img.get(), 2, 0, 10 );
+        m1_d.load( img.get(), 1, 10, 10 );
+        m1_r.load( img.get(), 2, 10, 10 );
+    }
+
+    {
+        SurfacePtr img{ IMG_Load( data_path( "data/gfx/monster2.png" ).c_str() ) };
+        m2_death.load( img.get(), 0, 0, 20 );
+        m2_d.load( img.get(), 1, 0, 20 );
+        m2_u.load( img.get(), 2, 0, 20 );
+        m2_l.load( img.get(), 3, 0, 20 );
+        m2_r.load( img.get(), 4, 0, 20 );
+    }
+
+    {
+        SurfacePtr img{ IMG_Load( data_path( "data/gfx/monster3.png" ).c_str() ) };
+        m3_death.load( img.get(), 0, 0, 20 );
+        m3_d.load( img.get(), 1, 0, 9 );
+        m3_u.load( img.get(), 2, 0, 9 );
+        m3_l.load( img.get(), 1, 10, 9 );
+        m3_r.load( img.get(), 2, 10, 9 );
+    }
+
+    {
+        SurfacePtr img{ IMG_Load( data_path( "data/gfx/bonusy.png" ).c_str() ) };
+        bonus1.load( img.get(), 0, 0, 20 );
+        bonus2.load( img.get(), 1, 0, 20 );
+    }
+
+    {
+        SurfacePtr img{ IMG_Load( data_path( "data/gfx/portal.png" ).c_str() ) };
+        vortex_appear.load( img.get(), 0, 0, 20 );
+        vortex.load( img.get(), 1, 0, 20 );
+    }
+}
+}
+
+}
--- a/examples/dyna/src/texture.hpp
+++ b/examples/dyna/src/texture.hpp
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <glad/gl.h>
+
+struct SDL_Surface;
+
+namespace dyna
+{
+
+// Move-only RAII owner of a GL texture name. Every texture in the game is a
+// GL_TEXTURE_2D_ARRAY (static images use a single layer, animations use one
+// layer per frame) so the renderer only ever has to deal with one sampler type.
+class GlTexture
+{
+public:
+    GlTexture() = default;
+    explicit GlTexture( GLuint id ) noexcept : id_( id ) {}
+    ~GlTexture() { reset(); }
+
+    GlTexture( GlTexture&& o ) noexcept : id_( o.id_ ) { o.id_ = 0; }
+    GlTexture& operator=( GlTexture&& o ) noexcept
+    {
+        if( this != &o )
+        {
+            reset();
+            id_ = o.id_;
+            o.id_ = 0;
+        }
+        return *this;
+    }
+
+    GlTexture( const GlTexture& ) = delete;
+    GlTexture& operator=( const GlTexture& ) = delete;
+
+    GLuint get() const { return id_; }
+    explicit operator bool() const { return id_ != 0; }
+
+    void reset();   // glDeleteTextures; safe on an empty handle
+
+private:
+    GLuint id_ = 0;
+};
+
+// A single static texture loaded from a whole image file. Ported from
+// texture.cs; binding just records the texture for the next draw call.
+class Texture
+{
+public:
+    bool load( const char* fn );
+    void bind() const;
+
+private:
+    GlTexture tex_;
+};
+
+// A vertical strip of 64x64 animation frames cut out of a sprite sheet, stored
+// as the layers of one array texture. Mirrors AnimTexture in texture.cs.
+class AnimTexture
+{
+public:
+    // Extract n frames from column `tilex`, starting at row `tiley`, where each
+    // coordinate is in 64px tile units. Mirrors AnimTexture.load in texture.cs.
+    void load( SDL_Surface* sheet, int tilex, int tiley, int n );
+    void bind( int frame ) const;   // frame is taken modulo the frame count
+
+private:
+    GlTexture tex_;
+    int frames_ = 0;
+};
+
+// All game textures, loaded once at startup. Mirrors the Textures class.
+namespace Textures
+{
+extern Texture menu, sand, wall, crate;
+
+extern AnimTexture p_wait, p_u, p_d, p_l, p_r, p_death;
+
+extern AnimTexture bomb, bomb_appear, e_c, e_h, e_v, e_le, e_re, e_de, e_ue;
+
+extern AnimTexture m1_death, m1_l, m1_r, m1_d, m1_u;
+extern AnimTexture m2_death, m2_l, m2_r, m2_d, m2_u;
+extern AnimTexture m3_death, m3_l, m3_r, m3_d, m3_u;
+
+extern AnimTexture bonus1, bonus2;
+
+extern AnimTexture vortex_appear, vortex;
+
+void preload();
+}
+
+}
--- a/examples/dyna/src/timer.cpp
+++ b/examples/dyna/src/timer.cpp
@@ -0,0 +1,51 @@
+#include "timer.hpp"
+
+#include <SDL3/SDL.h>
+
+#include <random>
+
+namespace dyna
+{
+
+namespace Timer
+{
+int delta = 0;
+static std::int64_t timestamp = 0;
+
+void reset()
+{
+    delta = 0;
+    timestamp = static_cast<std::int64_t>( SDL_GetTicks() );
+}
+
+int tick()
+{
+    std::int64_t tmp = timestamp;
+    timestamp = static_cast<std::int64_t>( SDL_GetTicks() );
+    delta = static_cast<int>( timestamp - tmp );
+    return delta;
+}
+
+std::int64_t get_timestamp()
+{
+    return timestamp;
+}
+}
+
+namespace RNG
+{
+static std::mt19937& engine()
+{
+    static std::mt19937 e{ std::random_device{}() };
+    return e;
+}
+
+int next( int n )
+{
+    if( n <= 0 ) return 0;
+    std::uniform_int_distribution<int> dist( 0, n - 1 );
+    return dist( engine() );
+}
+}
+
+}
--- a/examples/dyna/src/timer.hpp
+++ b/examples/dyna/src/timer.hpp
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <cstdint>
+
+namespace dyna
+{
+
+// Frame timing, ported from timer.cs. Timestamps are milliseconds since
+// Timer::reset(); kept 64-bit so the modulo arithmetic the animation code
+// relies on never overflows during a session.
+namespace Timer
+{
+void reset();
+int tick();                  // advances the clock, returns delta in ms
+std::int64_t get_timestamp();
+extern int delta;            // ms elapsed during the last tick()
+}
+
+// Thin wrapper over a single global PRNG, mirroring the C# RNG helper.
+namespace RNG
+{
+int next( int n );             // uniform in [0, n)
+}
+
+}
--- a/examples/dyna/src/world.cpp
+++ b/examples/dyna/src/world.cpp
@@ -0,0 +1,40 @@
+#include "world.hpp"
+
+#include "map.hpp"
+#include "player.hpp"
+
+namespace dyna
+{
+
+World::World( const std::string& level_fn, bool with_player )
+    : map_( std::make_unique<Map>( level_fn ) )
+    , name_( level_fn.substr( level_fn.rfind( '/' ) + 1 ) )
+{
+    if( with_player )
+    {
+        player_ = map_->create_player();
+        crates_left = map_->get_crates();
+    }
+    else
+    {
+        crates_left = -1;   // the menu never opens an exit portal
+    }
+}
+
+World::~World() = default;
+
+void World::tick()
+{
+    map_->tick( *this );
+    if( player_ )
+        player_->tick( *this );
+}
+
+void World::draw()
+{
+    map_->draw();
+    if( player_ )
+        player_->draw();
+}
+
+}
--- a/examples/dyna/src/world.hpp
+++ b/examples/dyna/src/world.hpp
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <memory>
+#include <string>
+
+namespace dyna
+{
+
+class Map;
+class Player;
+
+// Owns the state for one running level: the map, the player (absent on the
+// menu screen), and the flags the gameplay code used to reach through global
+// variables. Passing a World& into the tick path replaces the old Game::p /
+// Game::current_map / Game::killed globals, so there are no non-owning pointers
+// to outlive the objects they point at.
+class World
+{
+public:
+    // Loads `level_fn`; spawns a player from the map's '@' marker when
+    // with_player is set (gameplay) and leaves it null otherwise (menu).
+    World( const std::string& level_fn, bool with_player );
+    ~World();
+
+    World( const World& ) = delete;
+    World& operator=( const World& ) = delete;
+
+    Map& map() { return *map_; }
+    const Map& map() const { return *map_; }
+    Player* player() { return player_.get(); }   // null on the menu screen
+    const std::string& name() const { return name_; }
+
+    void tick();
+    void draw();
+
+    bool killed = false;
+    bool next_level = false;
+    int crates_left = 0;
+
+private:
+    std::unique_ptr<Map> map_;
+    std::unique_ptr<Player> player_;
+    std::string name_;
+};
+
+}
--- a/examples/opengl/triangle/CMakeLists.txt
+++ b/examples/opengl/triangle/CMakeLists.txt
@@ -0,0 +1,83 @@
+# CMakeLists.txt — OpenGL spinning triangle demo
+#
+#   macOS:
+#     cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -B build/ninja .
+#     cmake --build build/ninja
+#
+#   Linux (requires libsdl3-dev libgl1-mesa-dev):
+#     cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -B build/ninja .
+#     cmake --build build/ninja
+#
+#   Windows:
+#     cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -B build/ninja .
+#     cmake --build build/ninja
+
+cmake_minimum_required(VERSION 3.16)
+project(gl_spinning_triangle LANGUAGES C CXX)
+
+# ---------------------------------------------------------------------------
+# Tracy root — defaults to three directories above this CMakeLists.txt.
+# ---------------------------------------------------------------------------
+set(TRACY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
+option(TRACY_ENABLE                  "Enable Tracy profiling"                    ON)
+
+# ---------------------------------------------------------------------------
+# Platform — SDL3 (cross-platform windowing, must be installed on the system)
+# ---------------------------------------------------------------------------
+find_package(SDL3 REQUIRED)
+
+# ---------------------------------------------------------------------------
+# GL extension loader — GLEW (Windows + Linux, fetched automatically)
+# ---------------------------------------------------------------------------
+if(NOT APPLE)
+    include(FetchContent)
+    set(glew-cmake_BUILD_SHARED OFF CACHE BOOL "" FORCE)
+    set(ONLY_LIBS               ON  CACHE BOOL "" FORCE)
+    FetchContent_Declare(glew
+        GIT_REPOSITORY https://github.com/Perlmint/glew-cmake.git
+        GIT_TAG        master   # pin to a specific commit for reproducible builds
+        GIT_SHALLOW    TRUE
+    )
+    FetchContent_MakeAvailable(glew)
+endif()
+
+set(PLATFORM_SOURCES  platform/platform_sdl3.cpp)
+
+if(APPLE)
+    set(PLATFORM_LIBS SDL3::SDL3 "-framework OpenGL")
+elseif(WIN32)
+    set(PLATFORM_LIBS SDL3::SDL3 opengl32 libglew_static)
+else()
+    set(PLATFORM_LIBS SDL3::SDL3 GL libglew_static)
+endif()
+
+# ---------------------------------------------------------------------------
+# Target
+# ---------------------------------------------------------------------------
+add_executable(gl_spinning_triangle
+    spinning_triangle.cpp
+    "${TRACY_DIR}/public/TracyClient.cpp"
+    ${PLATFORM_SOURCES}
+)
+
+# Suppress upstream warnings from TracyClient.cpp
+if(MSVC)
+    set_source_files_properties("${TRACY_DIR}/public/TracyClient.cpp"
+        PROPERTIES COMPILE_FLAGS "/w"
+    )
+else()
+    set_source_files_properties("${TRACY_DIR}/public/TracyClient.cpp"
+        PROPERTIES COMPILE_FLAGS "-w"
+    )
+endif()
+
+target_compile_features(gl_spinning_triangle PRIVATE cxx_std_17)
+
+if(TRACY_ENABLE)
+    target_compile_definitions(gl_spinning_triangle PRIVATE TRACY_ENABLE)
+endif()
+
+target_include_directories(gl_spinning_triangle PRIVATE
+    "${TRACY_DIR}/public"
+)
+target_link_libraries(gl_spinning_triangle PRIVATE ${PLATFORM_LIBS})
--- a/examples/opengl/triangle/platform/platform.h
+++ b/examples/opengl/triangle/platform/platform.h
@@ -0,0 +1,37 @@
+// platform.h — interface between platform-agnostic code and platform backends
+//
+// Each platform_*.mm / platform_*.cpp file implements these four functions.
+// Exactly one backend must be linked into the final binary.
+
+#pragma once
+
+#ifdef __APPLE__
+#  include <OpenGL/gl3.h>
+#else
+#  include <GL/glew.h>
+#endif
+
+// Initialize the windowing system, create a window, and make an OpenGL 3.3
+// Core Profile context current on the calling thread.
+// Returns true on success.
+bool platformInit(int width, int height, const char* title);
+
+// Load OpenGL function pointers (no-op on macOS where the framework exports them directly).
+// Must be called after platformInit() while the GL context is current.
+// Returns true on success.
+bool platformInitGL();
+
+// Elapsed wall-clock time in seconds since platformInit().
+double platformGetTime();
+
+// Swap front and back buffers (present the rendered frame).
+void platformSwapBuffers();
+
+// Pixel scaling factor relative to the logical window size (1.0 on non-HiDPI displays).
+// Must be called after platformInit().
+void platformGetPixelDensityScale(float* x, float* y);
+
+// Enter the platform event/render loop.
+// Calls render() each frame at ~60 fps.
+// Calls shutdown() exactly once before returning.
+void platformRunLoop(void (*render)(), void (*shutdown)());
--- a/examples/opengl/triangle/platform/platform_sdl3.cpp
+++ b/examples/opengl/triangle/platform/platform_sdl3.cpp
@@ -0,0 +1,85 @@
+// platform_sdl3.cpp — SDL3 windowing backend (cross-platform)
+#include "platform.h"   // GL headers first (gl3.h / glew.h) so SDL sees guards set
+
+#define SDL_MAIN_HANDLED    // we don't want SDL_main
+#include <SDL3/SDL.h>
+
+#include <chrono>
+#include <cstdio>
+
+static SDL_Window*   sWin = nullptr;
+static SDL_GLContext sCtx = nullptr;
+static std::chrono::steady_clock::time_point sStartTime;
+
+bool platformInit(int width, int height, const char* title) {
+    if (!SDL_Init(SDL_INIT_VIDEO)) {
+        fprintf(stderr, "ERROR: SDL_Init failed: %s\n", SDL_GetError());
+        return false;
+    }
+
+    SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3);
+    SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
+    SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
+
+    sWin = SDL_CreateWindow(title, width, height, SDL_WINDOW_OPENGL);
+    if (!sWin) {
+        fprintf(stderr, "ERROR: SDL_CreateWindow failed: %s\n", SDL_GetError());
+        SDL_Quit();
+        return false;
+    }
+    SDL_SetWindowPosition(sWin, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED);
+
+    sCtx = SDL_GL_CreateContext(sWin);
+    if (!sCtx) {
+        fprintf(stderr, "ERROR: SDL_GL_CreateContext failed: %s\n", SDL_GetError());
+        SDL_DestroyWindow(sWin);
+        SDL_Quit();
+        return false;
+    }
+
+    SDL_GL_SetSwapInterval(1);
+    sStartTime = std::chrono::steady_clock::now();
+    return true;
+}
+
+bool platformInitGL() {
+#ifndef __APPLE__
+    glewExperimental = GL_TRUE;
+    if (glewInit() != GLEW_OK) {
+        fprintf(stderr, "Failed to initialize GLEW\n");
+        return false;
+    }
+#endif
+    return true;
+}
+
+double platformGetTime() {
+    return std::chrono::duration<double>(
+        std::chrono::steady_clock::now() - sStartTime).count();
+}
+
+void platformSwapBuffers() { SDL_GL_SwapWindow(sWin); }
+
+void platformGetPixelDensityScale(float* x, float* y) {
+    int pw, ph, ww, wh;
+    SDL_GetWindowSizeInPixels(sWin, &pw, &ph);
+    SDL_GetWindowSize(sWin, &ww, &wh);
+    *x = (ww > 0) ? (float)pw / (float)ww : 1.0f;
+    *y = (wh > 0) ? (float)ph / (float)wh : 1.0f;
+}
+
+void platformRunLoop(void (*render)(), void (*shutdown)()) {
+    bool running = true;
+    while (running) {
+        SDL_Event e;
+        while (SDL_PollEvent(&e)) {
+            if (e.type == SDL_EVENT_QUIT) running = false;
+            if (e.type == SDL_EVENT_KEY_DOWN && e.key.key == SDLK_ESCAPE) running = false;
+        }
+        if (running) render();
+    }
+    shutdown();
+    SDL_GL_DestroyContext(sCtx);
+    SDL_DestroyWindow(sWin);
+    SDL_Quit();
+}
--- a/examples/opengl/triangle/spinning_triangle.cpp
+++ b/examples/opengl/triangle/spinning_triangle.cpp
@@ -0,0 +1,145 @@
+// spinning_triangle.cpp — OpenGL spinning triangle demo with Tracy GPU profiling.
+
+#ifdef __APPLE__
+// NOTE: OpenGL is only available on MacOS (no iOS support)
+// Including and using anything related to OpenGL on Apple (like <OpenGL/gl3.h>)
+// will emit deprecation warnings, unless GL_SILENCE_DEPRECATION is defined
+#define GL_SILENCE_DEPRECATION
+// NOTE: TracyOpenGL.hpp will not work as expected even on Apple devices that
+// support OpenGL, because the OpenGL drivers do not implement ARB_timer_query
+// properly (querying GL_TIMESTAMP always resolves to 0). TracyOpenGL.hpp will
+// emit a compiler warning, and a Tracy message to the trace/profiler, but the
+// program will still run.
+#endif
+
+#include "platform/platform.h"  // also includes OpenGL headers
+
+#include <tracy/Tracy.hpp>
+
+// NOTE: opt-in toggle for periodic recalibrations during Collect()
+#define TRACY_OPENGL_AUTO_CALIBRATION
+#include <tracy/TracyOpenGL.hpp>
+
+static const int kWidth  = 800;
+static const int kHeight = 600;
+
+static GLuint gProgram  = 0;
+static GLuint gVao      = 0;
+static GLint  gAngleLoc = -1;
+
+// Vertex colors and positions are baked in; rotation is driven by a uniform.
+static const char* kVertSrc = R"(
+#version 150 core
+uniform float uAngle;
+const vec2 kPos[3] = vec2[3](
+    vec2( 0.0,    0.5  ),
+    vec2(-0.433, -0.25 ),
+    vec2( 0.433, -0.25 )
+);
+const vec3 kCol[3] = vec3[3](
+    vec3(1.0, 0.0, 0.0),
+    vec3(0.0, 1.0, 0.0),
+    vec3(0.0, 0.0, 1.0)
+);
+out vec3 vColor;
+void main() {
+    float c = cos(uAngle);
+    float s = sin(uAngle);
+    vec2  p = kPos[gl_VertexID];
+    gl_Position = vec4(p.x*c - p.y*s, p.x*s + p.y*c, 0.0, 1.0);
+    vColor = kCol[gl_VertexID];
+}
+)";
+
+static const char* kFragSrc = R"(
+#version 150 core
+in  vec3 vColor;
+out vec4 fragColor;
+void main() { fragColor = vec4(vColor, 1.0); }
+)";
+
+static GLuint compileShader(GLenum type, const char* src) {
+    GLuint s = glCreateShader(type);
+    glShaderSource(s, 1, &src, nullptr);
+    glCompileShader(s);
+    GLint ok = 0;
+    glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
+    if (!ok) {
+        char log[512];
+        glGetShaderInfoLog(s, sizeof(log), nullptr, log);
+        fprintf(stderr, "Shader compile error: %s\n", log);
+        glDeleteShader(s);
+        return 0;
+    }
+    return s;
+}
+
+static int initGL() {
+    if (!platformInitGL()) return 1;
+
+    TracyGpuContext;
+    TracyGpuContextName("OpenGL", 6);
+
+    GLuint vert = compileShader(GL_VERTEX_SHADER,   kVertSrc);
+    GLuint frag = compileShader(GL_FRAGMENT_SHADER, kFragSrc);
+    if (!vert || !frag) return 1;
+
+    gProgram = glCreateProgram();
+    glAttachShader(gProgram, vert);
+    glAttachShader(gProgram, frag);
+    glLinkProgram(gProgram);
+    glDeleteShader(vert);
+    glDeleteShader(frag);
+
+    GLint ok = 0;
+    glGetProgramiv(gProgram, GL_LINK_STATUS, &ok);
+    if (!ok) {
+        char log[512];
+        glGetProgramInfoLog(gProgram, sizeof(log), nullptr, log);
+        fprintf(stderr, "Program link error: %s\n", log);
+        return 1;
+    }
+
+    gAngleLoc = glGetUniformLocation(gProgram, "uAngle");
+
+    // Core profile requires a bound VAO even with no vertex attributes.
+    glGenVertexArrays(1, &gVao);
+    glBindVertexArray(gVao);
+
+    glClearColor(0.05f, 0.05f, 0.08f, 1.0f);
+    float scaleX, scaleY;
+    platformGetPixelDensityScale(&scaleX, &scaleY);
+    glViewport(0, 0, (int)(kWidth * scaleX), (int)(kHeight * scaleY));
+    return 0;
+}
+
+static void renderFrame() {
+    ZoneScoped;
+
+    glClear(GL_COLOR_BUFFER_BIT);
+    glUseProgram(gProgram);
+
+    {
+        TracyGpuZone("triangle draw");
+        glUniform1f(gAngleLoc, (float)platformGetTime());
+        glDrawArrays(GL_TRIANGLES, 0, 3);
+    }
+
+    platformSwapBuffers();
+    TracyGpuCollect;
+}
+
+static void shutdown() {
+    fprintf(stderr, "application is shutting down...\n");
+    glDeleteVertexArrays(1, &gVao);
+    glDeleteProgram(gProgram);
+}
+
+int main() {
+    if (!platformInit(kWidth, kHeight, "OpenGL Spinning Triangle"))
+        return 1;
+    if (initGL() != 0)
+        return 2;
+    platformRunLoop(renderFrame, shutdown);
+    return 0;
+}
--- a/examples/webgpu/triangle/CMakeLists.txt
+++ b/examples/webgpu/triangle/CMakeLists.txt
@@ -0,0 +1,157 @@
+# CMakeLists.txt — WebGPU spinning triangle demo
+#
+#   macOS:
+#     clang++ -std=c++17 -ObjC++ spinning_triangle.cpp platform/platform_macos.mm \
+#         -I/path/to/wgpu/include -L/path/to/wgpu/lib -lwgpu_native \
+#         -Wl,-rpath,@executable_path \
+#         -framework Cocoa -framework Metal -framework QuartzCore \
+#         -framework Foundation -framework IOKit -framework IOSurface \
+#         -o spinning_triangle
+#
+#   Windows (MSVC):
+#     cl /std:c++17 spinning_triangle.cpp platform/platform_windows.cpp \
+#         /I\path\to\wgpu\include \path\to\wgpu\lib\wgpu_native.lib \
+#         user32.lib gdi32.lib /Fe:spinning_triangle.exe
+#
+#   Linux (requires libsdl3-dev):
+#     g++ -std=c++17 spinning_triangle.cpp platform/platform_wayland.cpp \
+#         xdg-shell-protocol.c \
+#         -I/path/to/wgpu/include -L/path/to/wgpu/lib -lwgpu_native \
+#         -lwayland-client -o spinning_triangle
+
+cmake_minimum_required(VERSION 3.16)
+project(spinning_triangle LANGUAGES C CXX)
+
+# ---------------------------------------------------------------------------
+# WebGPU backend — set WGPU_PATH to your wgpu-native or Dawn installation.
+# The library name differs between backends:
+#   wgpu-native  →  wgpu_native
+#   Dawn         →  webgpu_dawn
+# ---------------------------------------------------------------------------
+set(WGPU_PATH "" CACHE PATH "Root of the WebGPU native installation (contains include/ and lib/)")
+set(WGPU_LIB  "" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty")
+
+if(NOT WGPU_PATH)
+    message(FATAL_ERROR "Set WGPU_PATH to the root of your WebGPU native installation.")
+endif()
+
+# When WGPU_PATH changes, discard any previously auto-detected WGPU_LIB so
+# detection re-runs against the new path.
+if(NOT "${WGPU_PATH}" STREQUAL "${_WGPU_PATH_LAST}")
+    unset(WGPU_LIB CACHE)
+    set(WGPU_LIB "" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty")
+endif()
+set(_WGPU_PATH_LAST "${WGPU_PATH}" CACHE INTERNAL "")
+
+if(NOT WGPU_LIB)
+    unset(_WGPU_NATIVE_LIB CACHE)
+    unset(_WEBGPU_DAWN_LIB CACHE)
+    find_library(_WGPU_NATIVE_LIB NAMES wgpu_native wgpu_native.dll PATHS "${WGPU_PATH}/lib" NO_DEFAULT_PATH)
+    find_library(_WEBGPU_DAWN_LIB NAMES webgpu_dawn                 PATHS "${WGPU_PATH}/lib" NO_DEFAULT_PATH)
+    if(_WGPU_NATIVE_LIB)
+        set(WGPU_LIB "wgpu_native" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty" FORCE)
+    elseif(_WEBGPU_DAWN_LIB)
+        set(WGPU_LIB "webgpu_dawn" CACHE STRING "WebGPU library name (wgpu_native or webgpu_dawn); auto-detected if empty" FORCE)
+    else()
+        message(FATAL_ERROR "Could not detect a WebGPU library in ${WGPU_PATH}/lib. Set WGPU_LIB explicitly (wgpu_native or webgpu_dawn).")
+    endif()
+    message(STATUS "WebGPU library auto-detected: ${WGPU_LIB}")
+endif()
+
+# ---------------------------------------------------------------------------
+# Tracy root — defaults to two directories above this CMakeLists.txt.
+# ---------------------------------------------------------------------------
+set(TRACY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
+option(TRACY_ENABLE "Enable Tracy profiling" ON)
+
+# ---------------------------------------------------------------------------
+# macOS quarantine — pre-built WebGPU binaries downloaded from the internet
+# carry a com.apple.quarantine extended attribute that prevents dyld from
+# loading them ("damaged or incomplete" / Gatekeeper block).  Strip it once
+# at configure time so the linker and the runtime loader can both access the
+# library directory without further user intervention.
+# ---------------------------------------------------------------------------
+if(APPLE)
+    execute_process(
+        COMMAND xattr -dr com.apple.quarantine "${WGPU_PATH}/lib"
+    )
+endif()
+
+# ---------------------------------------------------------------------------
+# Platform — SDL3 (cross-platform windowing, must be installed on the system)
+# ---------------------------------------------------------------------------
+find_package(SDL3 REQUIRED)
+
+set(PLATFORM_SOURCES platform/platform_sdl3.cpp)
+
+if(APPLE)
+    set(PLATFORM_LIBS
+        SDL3::SDL3
+        "-framework Cocoa"
+        "-framework Metal"
+        "-framework QuartzCore"
+        "-framework Foundation"
+        "-framework IOKit"
+        "-framework IOSurface"
+    )
+elseif(WIN32)
+    # wgpu-native (Rust stdlib) pull-ins: NtReadFile, GetUserProfileDirectoryW, ...
+    set(WGPU_NATIVE_WIN32_LIBS ntdll userenv)
+    # Dawn pull-ins: WKPDID_D3DDebugObjectName GUID, CompareObjectHandles, ...
+    set(WEBGPU_DAWN_WIN32_LIBS dxguid onecore)
+    set(PLATFORM_LIBS SDL3::SDL3 ${WGPU_NATIVE_WIN32_LIBS} ${WEBGPU_DAWN_WIN32_LIBS})
+else()
+    set(PLATFORM_LIBS SDL3::SDL3)
+endif()
+
+# ---------------------------------------------------------------------------
+# Target
+# ---------------------------------------------------------------------------
+add_executable(spinning_triangle
+    spinning_triangle.cpp
+    "${TRACY_DIR}/public/TracyClient.cpp"
+    ${PLATFORM_SOURCES}
+)
+
+# Treat TracyClient.cpp as third-party code — suppress all warnings so that
+# upstream changes don't pollute our build output.
+if(MSVC)
+    set_source_files_properties("${TRACY_DIR}/public/TracyClient.cpp"
+        PROPERTIES COMPILE_FLAGS "/w"
+    )
+else()
+    set_source_files_properties("${TRACY_DIR}/public/TracyClient.cpp"
+        PROPERTIES COMPILE_FLAGS "-w"
+    )
+endif()
+
+target_compile_features(spinning_triangle PRIVATE cxx_std_17)
+
+if(TRACY_ENABLE)
+    target_compile_definitions(spinning_triangle PRIVATE TRACY_ENABLE)
+endif()
+
+target_include_directories(spinning_triangle PRIVATE
+    "${WGPU_PATH}/include"
+    "${TRACY_DIR}/public"
+)
+
+target_link_directories(spinning_triangle PRIVATE "${WGPU_PATH}/lib")
+
+target_link_libraries(spinning_triangle PRIVATE
+    ${WGPU_LIB}
+    ${PLATFORM_LIBS}
+)
+
+# Embed the rpath so the binary finds the WebGPU dylib/so next to itself.
+if(APPLE)
+    set_target_properties(spinning_triangle PROPERTIES
+        BUILD_RPATH "${WGPU_PATH}/lib"
+        INSTALL_RPATH "@executable_path"
+    )
+elseif(UNIX)
+    set_target_properties(spinning_triangle PROPERTIES
+        BUILD_RPATH "${WGPU_PATH}/lib"
+        INSTALL_RPATH "$ORIGIN"
+    )
+endif()
--- a/examples/webgpu/triangle/platform/platform.h
+++ b/examples/webgpu/triangle/platform/platform.h
@@ -0,0 +1,23 @@
+// platform.h — interface between platform-agnostic code and platform backends
+//
+// Each platform_*.mm / platform_*.cpp file implements these five functions.
+// Exactly one backend must be linked into the final binary.
+
+#pragma once
+#include <webgpu/webgpu.h>
+
+// Initialize the windowing system and create a window of the given dimensions.
+// Returns true on success.
+bool platformInit(int width, int height, const char* title);
+
+// Create a WebGPU surface backed by the platform window.
+// Must be called after wgpuCreateInstance() and platformInit().
+WGPUSurface platformCreateSurface(WGPUInstance instance);
+
+// Elapsed wall-clock time in seconds since platformInit().
+double platformGetTime();
+
+// Enter the platform event/render loop.
+// Calls render() each frame at ~60 fps.
+// Calls shutdown() exactly once before returning.
+void platformRunLoop(void (*render)(), void (*shutdown)());
--- a/examples/webgpu/triangle/platform/platform_sdl3.cpp
+++ b/examples/webgpu/triangle/platform/platform_sdl3.cpp
@@ -0,0 +1,95 @@
+// platform_sdl3.cpp — SDL3 windowing backend for the WebGPU example
+#include "platform.h"   // webgpu/webgpu.h first
+
+#define SDL_MAIN_HANDLED    // we don't want SDL_main
+#include <SDL3/SDL.h>
+
+#ifdef __APPLE__
+#  include <SDL3/SDL_metal.h>
+#endif
+
+#include <chrono>
+#include <cstdio>
+
+static SDL_Window* sWin = nullptr;
+static std::chrono::steady_clock::time_point sStartTime;
+#ifdef __APPLE__
+static SDL_MetalView sMetalView = nullptr;
+#endif
+
+bool platformInit(int width, int height, const char* title) {
+    if (!SDL_Init(SDL_INIT_VIDEO)) {
+        fprintf(stderr, "ERROR: SDL_Init failed: %s\n", SDL_GetError());
+        return false;
+    }
+
+    SDL_WindowFlags flags = 0;
+#ifdef __APPLE__
+    flags |= SDL_WINDOW_METAL;
+#endif
+
+    sWin = SDL_CreateWindow(title, width, height, flags);
+    if (!sWin) {
+        fprintf(stderr, "ERROR: SDL_CreateWindow failed: %s\n", SDL_GetError());
+        SDL_Quit();
+        return false;
+    }
+    SDL_SetWindowPosition(sWin, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED);
+
+    sStartTime = std::chrono::steady_clock::now();
+    return true;
+}
+
+WGPUSurface platformCreateSurface(WGPUInstance instance) {
+    WGPUSurfaceDescriptor desc = {};
+    SDL_PropertiesID props = SDL_GetWindowProperties(sWin);
+
+#if defined(__APPLE__)
+    sMetalView = SDL_Metal_CreateView(sWin);
+    if (!sMetalView) {
+        fprintf(stderr, "ERROR: SDL_Metal_CreateView failed\n");
+        return nullptr;
+    }
+    WGPUSurfaceSourceMetalLayer metalDesc = {};
+    metalDesc.chain.sType = WGPUSType_SurfaceSourceMetalLayer;
+    metalDesc.layer       = SDL_Metal_GetLayer(sMetalView);
+    desc.nextInChain      = &metalDesc.chain;
+#elif defined(_WIN32)
+    WGPUSurfaceSourceWindowsHWND hwndDesc = {};
+    hwndDesc.chain.sType = WGPUSType_SurfaceSourceWindowsHWND;
+    hwndDesc.hinstance   = SDL_GetPointerProperty(props, SDL_PROP_WINDOW_WIN32_INSTANCE_POINTER, nullptr);
+    hwndDesc.hwnd        = SDL_GetPointerProperty(props, SDL_PROP_WINDOW_WIN32_HWND_POINTER, nullptr);
+    desc.nextInChain     = &hwndDesc.chain;
+#else   // Linux / X11
+    WGPUSurfaceSourceXlibWindow x11Desc = {};
+    x11Desc.chain.sType = WGPUSType_SurfaceSourceXlibWindow;
+    x11Desc.display     = SDL_GetPointerProperty(props, SDL_PROP_WINDOW_X11_DISPLAY_POINTER, nullptr);
+    x11Desc.window      = (uint32_t)SDL_GetNumberProperty(props, SDL_PROP_WINDOW_X11_WINDOW_NUMBER, 0);
+    desc.nextInChain    = &x11Desc.chain;
+#endif
+
+    return wgpuInstanceCreateSurface(instance, &desc);
+}
+
+double platformGetTime() {
+    return std::chrono::duration<double>(
+        std::chrono::steady_clock::now() - sStartTime).count();
+}
+
+void platformRunLoop(void (*render)(), void (*shutdown)()) {
+    bool running = true;
+    while (running) {
+        SDL_Event e;
+        while (SDL_PollEvent(&e)) {
+            if (e.type == SDL_EVENT_QUIT) running = false;
+            if (e.type == SDL_EVENT_KEY_DOWN && e.key.key == SDLK_ESCAPE) running = false;
+        }
+        if (running) render();
+    }
+    shutdown();
+#ifdef __APPLE__
+    SDL_Metal_DestroyView(sMetalView);
+#endif
+    SDL_DestroyWindow(sWin);
+    SDL_Quit();
+}
--- a/examples/webgpu/triangle/spinning_triangle.cpp
+++ b/examples/webgpu/triangle/spinning_triangle.cpp
@@ -0,0 +1,352 @@
+// spinning_triangle.cpp — platform-agnostic WebGPU spinning triangle demo.
+
+#include "platform/platform.h"
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <webgpu/webgpu.h>
+
+#include <tracy/Tracy.hpp>
+#include <tracy/TracyWebGPU.hpp>
+
+// ---------------------------------------------------------------------------
+// Globals
+// ---------------------------------------------------------------------------
+
+static const int kWidth  = 800;
+static const int kHeight = 600;
+
+static WGPUInstance       gInstance   = nullptr;
+static WGPUSurface        gSurface    = nullptr;
+static WGPUAdapter        gAdapter    = nullptr;
+static WGPUDevice         gDevice     = nullptr;
+static WGPUQueue          gQueue      = nullptr;
+static WGPURenderPipeline gPipeline   = nullptr;
+static WGPUBuffer         gUniformBuf = nullptr;
+static WGPUBindGroup      gBindGroup  = nullptr;
+
+static TracyWebGPUCtx     gTracyCtx   = nullptr;
+
+static WGPUTextureFormat gSurfaceFormat = WGPUTextureFormat_BGRA8Unorm;
+
+// TODO: this can become platformError() instead
+int error(int code, const char* message) {
+    fprintf(stderr, "ERROR: %s (code: %d)\n", message, code);
+    return code;
+}
+
+// ---------------------------------------------------------------------------
+// WGSL shader — vertex colours baked in, rotation via a uniform float.
+// ---------------------------------------------------------------------------
+
+static const char* kShaderSource = R"(
+struct Uniforms {
+    angle: f32,
+};
+@group(0) @binding(0) var<uniform> u: Uniforms;
+
+struct VSOut {
+    @builtin(position) pos: vec4f,
+    @location(0) color: vec3f,
+};
+
+@vertex
+fn vs_main(@builtin(vertex_index) vi: u32) -> VSOut {
+    var positions = array<vec2f, 3>(
+        vec2f( 0.0,  0.5),
+        vec2f(-0.433, -0.25),
+        vec2f( 0.433, -0.25),
+    );
+    var colors = array<vec3f, 3>(
+        vec3f(1.0, 0.0, 0.0),
+        vec3f(0.0, 1.0, 0.0),
+        vec3f(0.0, 0.0, 1.0),
+    );
+
+    let c = cos(u.angle);
+    let s = sin(u.angle);
+    let p = positions[vi];
+    let rotated = vec2f(p.x * c - p.y * s, p.x * s + p.y * c);
+
+    var out: VSOut;
+    out.pos   = vec4f(rotated, 0.0, 1.0);
+    out.color = colors[vi];
+    return out;
+}
+
+@fragment
+fn fs_main(@location(0) color: vec3f) -> @location(0) vec4f {
+    return vec4f(color, 1.0);
+}
+)";
+
+// ---------------------------------------------------------------------------
+// Adapter / Device request callbacks  (current wgpu-native API)
+// ---------------------------------------------------------------------------
+
+static void onAdapterReady(WGPURequestAdapterStatus status,
+                           WGPUAdapter adapter,
+                           WGPUStringView message,
+                           void* userdata1, void* /*userdata2*/) {
+    if (status == WGPURequestAdapterStatus_Success) {
+        *(WGPUAdapter*)userdata1 = adapter;
+    } else {
+        fprintf(stderr, "Adapter request failed: %.*s\n",
+                (int)message.length, message.data);
+    }
+}
+
+static void onDeviceReady(WGPURequestDeviceStatus status,
+                          WGPUDevice device,
+                          WGPUStringView message,
+                          void* userdata1, void* /*userdata2*/) {
+    if (status == WGPURequestDeviceStatus_Success) {
+        *(WGPUDevice*)userdata1 = device;
+    } else {
+        fprintf(stderr, "Device request failed: %.*s\n",
+                (int)message.length, message.data);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// WebGPU init
+// ---------------------------------------------------------------------------
+
+static int initWebGPU() {
+    // Adapter
+    WGPURequestAdapterOptions adapterOpts = {};
+    adapterOpts.compatibleSurface = gSurface;
+
+    WGPURequestAdapterCallbackInfo adapterCB = {};
+    adapterCB.mode     = WGPUCallbackMode_AllowProcessEvents;
+    adapterCB.callback  = onAdapterReady;
+    adapterCB.userdata1 = &gAdapter;
+    wgpuInstanceRequestAdapter(gInstance, &adapterOpts, adapterCB);
+    while (!gAdapter) { wgpuInstanceProcessEvents(gInstance); }
+    if (!gAdapter) return error(11, "No adapter");
+
+    WGPUUncapturedErrorCallbackInfo errorCB = {};
+    errorCB.callback = [](WGPUDevice const*, WGPUErrorType type,
+                          WGPUStringView message, void*, void*) {
+        fprintf(stderr, "[WGPU ERROR] type=%d  %.*s\n",
+                (int)type, (int)message.length, message.data);
+    };
+
+    WGPUDeviceDescriptor deviceDesc = {};
+    deviceDesc.uncapturedErrorCallbackInfo = errorCB;
+
+    TracyWebGPUSetupDeviceDescriptor(deviceDesc);
+
+    WGPURequestDeviceCallbackInfo deviceCB = {};
+    deviceCB.mode      = WGPUCallbackMode_AllowProcessEvents;
+    deviceCB.callback  = onDeviceReady;
+    deviceCB.userdata1 = &gDevice;
+    wgpuAdapterRequestDevice(gAdapter, &deviceDesc, deviceCB);
+    while (!gDevice) { wgpuInstanceProcessEvents(gInstance); }
+    if (!gDevice) return error(12, "No device");
+
+    gQueue = wgpuDeviceGetQueue(gDevice);
+    gTracyCtx = TracyWebGPUContext(gInstance, gDevice, gQueue);
+    TracyWebGPUContextName(gTracyCtx, "WebGPU", 6);
+
+    // Configure surface
+    WGPUSurfaceConfiguration config = {};
+    config.device      = gDevice;
+    config.format      = gSurfaceFormat;
+    config.usage       = WGPUTextureUsage_RenderAttachment;
+    config.alphaMode   = WGPUCompositeAlphaMode_Opaque;
+    config.width       = kWidth;
+    config.height      = kHeight;
+    config.presentMode = WGPUPresentMode_Fifo;
+    wgpuSurfaceConfigure(gSurface, &config);
+
+    // Shader module
+    WGPUShaderSourceWGSL wgslSrc = {};
+    wgslSrc.chain.sType = WGPUSType_ShaderSourceWGSL;
+    wgslSrc.code = { kShaderSource, WGPU_STRLEN };
+
+    WGPUShaderModuleDescriptor smDesc = {};
+    smDesc.nextInChain = (WGPUChainedStruct*)&wgslSrc;
+    WGPUShaderModule shaderMod = wgpuDeviceCreateShaderModule(gDevice, &smDesc);
+
+    // Uniform buffer (one f32 for rotation angle)
+    WGPUBufferDescriptor bufDesc = {};
+    bufDesc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+    bufDesc.size  = sizeof(float);
+    gUniformBuf = wgpuDeviceCreateBuffer(gDevice, &bufDesc);
+
+    // Bind group layout + bind group
+    WGPUBindGroupLayoutEntry bglEntry = {};
+    bglEntry.binding    = 0;
+    bglEntry.visibility = WGPUShaderStage_Vertex;
+    bglEntry.buffer.type            = WGPUBufferBindingType_Uniform;
+    bglEntry.buffer.minBindingSize  = sizeof(float);
+
+    WGPUBindGroupLayoutDescriptor bglDesc = {};
+    bglDesc.entryCount = 1;
+    bglDesc.entries    = &bglEntry;
+    WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(gDevice, &bglDesc);
+
+    WGPUBindGroupEntry bgEntry = {};
+    bgEntry.binding = 0;
+    bgEntry.buffer  = gUniformBuf;
+    bgEntry.size    = sizeof(float);
+
+    WGPUBindGroupDescriptor bgDesc = {};
+    bgDesc.layout     = bgl;
+    bgDesc.entryCount = 1;
+    bgDesc.entries    = &bgEntry;
+    gBindGroup = wgpuDeviceCreateBindGroup(gDevice, &bgDesc);
+
+    // Pipeline layout
+    WGPUPipelineLayoutDescriptor plDesc = {};
+    plDesc.bindGroupLayoutCount = 1;
+    plDesc.bindGroupLayouts     = &bgl;
+    WGPUPipelineLayout pipelineLayout = wgpuDeviceCreatePipelineLayout(gDevice, &plDesc);
+
+    // Render pipeline
+    WGPUColorTargetState colorTarget = {};
+    colorTarget.format    = gSurfaceFormat;
+    colorTarget.writeMask = WGPUColorWriteMask_All;
+
+    WGPUFragmentState fragState = {};
+    fragState.module      = shaderMod;
+    fragState.entryPoint  = { "fs_main", WGPU_STRLEN };
+    fragState.targetCount = 1;
+    fragState.targets     = &colorTarget;
+
+    WGPURenderPipelineDescriptor rpDesc = {};
+    rpDesc.layout = pipelineLayout;
+    rpDesc.vertex.module     = shaderMod;
+    rpDesc.vertex.entryPoint = { "vs_main", WGPU_STRLEN };
+    rpDesc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+    rpDesc.multisample.count  = 1;
+    rpDesc.multisample.mask   = 0xFFFFFFFF;
+    rpDesc.fragment = &fragState;
+
+    gPipeline = wgpuDeviceCreateRenderPipeline(gDevice, &rpDesc);
+
+    // Cleanup intermediates
+    wgpuShaderModuleRelease(shaderMod);
+    wgpuPipelineLayoutRelease(pipelineLayout);
+    wgpuBindGroupLayoutRelease(bgl);
+    return 0;
+}
+
+// ---------------------------------------------------------------------------
+// Frame rendering
+// ---------------------------------------------------------------------------
+
+// Returns the surface texture for the current frame, or {.texture=nullptr} on
+// a skippable condition (timeout, occlusion) or an error.
+static WGPUSurfaceTexture getWindowSurface() {
+    WGPUSurfaceTexture surfTex = {};
+    wgpuSurfaceGetCurrentTexture(gSurface, &surfTex);
+    if (surfTex.status == WGPUSurfaceGetCurrentTextureStatus_SuccessOptimal ||
+        surfTex.status == WGPUSurfaceGetCurrentTextureStatus_SuccessSuboptimal)
+        return surfTex;
+
+    // Timeout and Occluded are normal OS events (window covered / on a different Space).
+    bool silent = surfTex.status == WGPUSurfaceGetCurrentTextureStatus_Timeout;
+#ifdef WGPU_H_
+    silent = silent || surfTex.status == (WGPUSurfaceGetCurrentTextureStatus)WGPUSurfaceGetCurrentTextureStatus_Occluded;
+#endif
+    if (!silent)
+        fprintf(stderr, "Failed to get surface texture (status %d)\n", surfTex.status);
+    if (surfTex.texture) wgpuTextureRelease(surfTex.texture);
+    surfTex.texture = nullptr;
+    return surfTex;
+}
+
+static void renderFrame() {
+    ZoneScoped;
+
+    // Update rotation angle
+    float angle = (float)platformGetTime();
+    wgpuQueueWriteBuffer(gQueue, gUniformBuf, 0, &angle, sizeof(float));
+
+    WGPUSurfaceTexture surfTex = getWindowSurface();
+    if (!surfTex.texture) return;
+
+    WGPUTextureView view = wgpuTextureCreateView(surfTex.texture, nullptr);
+
+    // Command encoder
+    WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(gDevice, nullptr);
+
+    // Render pass
+    WGPURenderPassColorAttachment colorAtt = {};
+    colorAtt.view       = view;
+    colorAtt.loadOp     = WGPULoadOp_Clear;
+    colorAtt.storeOp    = WGPUStoreOp_Store;
+    colorAtt.clearValue  = { 0.05, 0.05, 0.08, 1.0 };
+    colorAtt.depthSlice  = WGPU_DEPTH_SLICE_UNDEFINED;
+
+    WGPURenderPassDescriptor passDesc = {};
+    passDesc.colorAttachmentCount = 1;
+    passDesc.colorAttachments     = &colorAtt;
+
+    {
+        ZoneScopedN("render-pass");
+        TracyWebGPUNamedZone(gTracyCtx, tracyZone, encoder, passDesc, "triangle draw", true);
+        WGPURenderPassEncoder pass = wgpuCommandEncoderBeginRenderPass(encoder, &passDesc);
+        wgpuRenderPassEncoderSetPipeline(pass, gPipeline);
+        wgpuRenderPassEncoderSetBindGroup(pass, 0, gBindGroup, 0, nullptr);
+        wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+        wgpuRenderPassEncoderEnd(pass);
+        wgpuRenderPassEncoderRelease(pass);
+    }
+
+    // Submit
+    WGPUCommandBuffer cmdBuf = wgpuCommandEncoderFinish(encoder, nullptr);
+    wgpuQueueSubmit(gQueue, 1, &cmdBuf);
+
+    // Present
+    wgpuSurfacePresent(gSurface);
+
+    // Process Events
+    wgpuInstanceProcessEvents(gInstance);
+    TracyWebGPUCollect(gTracyCtx);
+
+    // Cleanup
+    wgpuCommandBufferRelease(cmdBuf);
+    wgpuCommandEncoderRelease(encoder);
+    wgpuTextureViewRelease(view);
+    wgpuTextureRelease(surfTex.texture);
+}
+
+// ---------------------------------------------------------------------------
+// Shutdown
+// ---------------------------------------------------------------------------
+
+static void shutdown() {
+    fprintf(stderr, "application is shutting down...\n");
+    TracyWebGPUDestroy(gTracyCtx);
+    if (gBindGroup)  wgpuBindGroupRelease(gBindGroup);
+    if (gUniformBuf) wgpuBufferRelease(gUniformBuf);
+    if (gPipeline)   wgpuRenderPipelineRelease(gPipeline);
+    if (gQueue)      wgpuQueueRelease(gQueue);
+    if (gDevice)     wgpuDeviceRelease(gDevice);
+    if (gAdapter)    wgpuAdapterRelease(gAdapter);
+    if (gSurface)    wgpuSurfaceRelease(gSurface);
+    if (gInstance)   wgpuInstanceRelease(gInstance);
+}
+
+// ---------------------------------------------------------------------------
+// main
+// ---------------------------------------------------------------------------
+
+int main(int argc, char* argv[]) {
+    if (!platformInit(kWidth, kHeight, "WebGPU Spinning Triangle"))
+        return 1;
+
+    gInstance = wgpuCreateInstance(nullptr);
+    if (!gInstance) return error(2, "Failed to create WebGPU instance.");
+
+    gSurface = platformCreateSurface(gInstance);
+    if (!gSurface) return error(3, "Failed to create surface.");
+
+    if (initWebGPU() != 0) return 4;
+
+    platformRunLoop(renderFrame, shutdown);
+    return 0;
+}
--- a/extra/identify.cpp
+++ b/extra/identify.cpp
@@ -1,4 +1,4 @@
-// g++ identify.cpp -lpthread ../public/common/tracy_lz4.cpp ../zstd/common/*.c ../zstd/decompress/*.c ../zstd/decompress/huf_decompress_amd64.S
+// g++ identify.cpp -lpthread ../public/common/tracy_lz4.cpp -lzstd

 #include <memory>
 #include <stdint.h>
@@ -8,7 +8,7 @@
 #include "../public/common/TracyVersion.hpp"

 static const uint8_t FileHeader[8] { 't', 'r', 'a', 'c', 'y', tracy::Version::Major, tracy::Version::Minor, tracy::Version::Patch };
-enum { FileHeaderMagic = 5 };
+constexpr size_t FileHeaderMagic = 5;

 int main( int argc, char** argv )
 {
--- a/extra/make-build.sh
+++ b/extra/make-build.sh
@@ -1,26 +0,0 @@
-#!/bin/sh
-
-rm -rf tracy-build
-mkdir tracy-build
-
-./update-meson-version.sh
-
-if [ ! -f vswhere.exe ]; then
-    wget https://github.com/microsoft/vswhere/releases/download/2.8.4/vswhere.exe
-fi
-
-MSVC=`./vswhere.exe -property installationPath -version '[17.0,17.999]' | head -n 1`
-MSVC=`wslpath "$MSVC" | tr -d '\r'`
-MSBUILD=$MSVC/MSBuild/Current/Bin/MSBuild.exe
-
-for i in capture csvexport import-chrome update; do
-    echo $i...
-    "$MSBUILD" ../$i/build/win32/$i.sln /t:Clean /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
-    "$MSBUILD" ../$i/build/win32/$i.sln /t:Build /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
-    cp ../$i/build/win32/x64/Release/$i.exe tracy-build/
-done
-
-echo profiler...
-"$MSBUILD" ../profiler/build/win32/Tracy.sln /t:Clean /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
-"$MSBUILD" ../profiler/build/win32/Tracy.sln /t:Build /p:Configuration=Release /p:Platform=x64 /noconsolelogger /nologo -m
-cp ../profiler/build/win32/x64/Release/Tracy.exe tracy-build/
--- a/Show More
+++ b/Show More