Compare commits

..

3 Commits

Author SHA1 Message Date
Bartosz Taudul
19519bbeb0 Merge pull request #1394 from bruno-dasilva/bruno/opengl-drift-correction
fix: add opengl drift correction for gpu zones
2026-06-07 22:50:40 +02:00
Bruno Da Silva
fc4f52e61d add opengl drift correction option to meson.options/meson.build 2026-06-07 20:14:36 +00:00
Bruno Da Silva
e2ac8f7973 fix: add opengl drift correction for gpu zones 2026-06-07 00:23:14 +00:00
8 changed files with 75 additions and 20 deletions

View File

@@ -27,22 +27,8 @@ jobs:
run: git config --global --add safe.directory '*'
- name: Build profiler
run: |
# NOTE: disabling LTO to speed-up the GitHub macOS build bots (would take 30+ min otherwise)
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }} -DNO_LTO=ON -DCMAKE_CXX_FLAGS="-ftime-trace"
cmake --build profiler/build --parallel --config Release -- VERBOSE=1
- name: Profiler compile-time report
run: |
python3 -c "
import json, glob, os
times = []
for f in glob.glob('profiler/build/**/*.json', recursive=True):
try:
d = json.load(open(f))
total = next((e['dur'] for e in d['traceEvents'] if e.get('name')=='Total'), None)
if total: times.append((total/1e6, f))
except: pass
for t,f in sorted(times, reverse=True)[:20]: print(f'{t:6.1f}s {os.path.basename(f)}')
"
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
cmake --build profiler/build --parallel --config Release
- name: Build update
run: |
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}

View File

@@ -137,6 +137,7 @@ set_option(TRACY_SYMBOL_OFFLINE_RESOLVE "Instead of full runtime symbol resoluti
set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF TracyClient)
set_option(TRACY_DEBUGINFOD "Enable debuginfod support" OFF TracyClient)
set_option(TRACY_IGNORE_MEMORY_FAULTS "Ignore instrumentation errors from memory free events that do not have a matching allocation" OFF TracyClient)
set_option(TRACY_OPENGL_AUTO_CALIBRATION "Periodically recalibrate OpenGL GPU/CPU clock drift (forces a CPU/GPU sync each time)" OFF TracyClient)
# advanced
set_option(TRACY_VERBOSE "[advanced] Verbose output from the profiler" OFF TracyClient)

View File

@@ -1701,6 +1701,12 @@ logo=\bcattention
\end{itemize}
\end{bclogo}
\subparagraph{Calibrated context}
By default, the OpenGL context is uncalibrated: the CPU and GPU clocks are aligned only once, when the context is created, so over long captures the two time domains may drift apart (section~\ref{options} describes correcting this drift manually). Defining \texttt{TRACY\_OPENGL\_AUTO\_CALIBRATION} before including \texttt{TracyOpenGL.hpp} enables periodic recalibration instead: roughly once per second Tracy samples the GPU and CPU clocks together and emits a calibration event, allowing the profiler to track and remove the drift automatically.
This is opt-in because OpenGL exposes no atomic CPU+GPU timestamp query (unlike Vulkan's \texttt{VK\_EXT\_calibrated\_timestamps} or Direct3D~12, whose contexts are always calibrated). Recalibration therefore reads the GPU clock with \texttt{glGetInteger64v(GL\_TIMESTAMP)}, which forces a CPU/GPU synchronization (a pipeline stall) each time it runs. Enable it only when the improved long-capture alignment is worth the periodic stall.
\subsubsection{Vulkan}
Similarly, for Vulkan support you should include the \texttt{public/tracy/TracyVulkan.hpp} header file. Tracing Vulkan devices and queues is a bit more involved, and the Vulkan initialization macro \texttt{TracyVkContext(physdev, device, queue, cmdbuf)} returns an instance of \texttt{TracyVkCtx} object, which tracks an associated Vulkan queue. Cleanup is performed using the \texttt{TracyVkDestroy(ctx)} macro. You may create multiple Vulkan contexts. To set a custom name for the context, use the \texttt{TracyVkContextName(ctx, name, size)} macro.

View File

@@ -135,6 +135,10 @@ if get_option('ignore_memory_faults')
tracy_common_args += ['-DTRACY_IGNORE_MEMORY_FAULTS']
endif
if get_option('opengl_auto_calibration')
tracy_common_args += ['-DTRACY_OPENGL_AUTO_CALIBRATION']
endif
tracy_shared_libs = get_option('default_library') == 'shared'
if tracy_shared_libs

View File

@@ -29,3 +29,4 @@ option('verbose', type : 'boolean', value : false, description : 'Enable verbose
option('no_internal_message', type : 'boolean', value : false, description : 'Prevent the profiler from logging messages')
option('debuginfod', type : 'boolean', value : false, description : 'Enable debuginfod support')
option('ignore_memory_faults', type : 'boolean', value : false, description : 'Ignore instrumentation errors from memory free events that do not have a matching allocation')
option('opengl_auto_calibration', type : 'boolean', value : false, description : 'Periodically recalibrate OpenGL GPU/CPU clock drift (forces a CPU/GPU sync each time)')

View File

@@ -1,7 +1,7 @@
#ifndef __TRACYDISASSEMBLY_HPP__
#define __TRACYDISASSEMBLY_HPP__
#include <nlohmann/json_fwd.hpp>
#include <nlohmann/json.hpp>
#include <stdint.h>
#include <string>
#include <vector>

View File

@@ -6,7 +6,7 @@
#include <chrono>
#include <functional>
#include <memory>
#include <nlohmann/json_fwd.hpp>
#include <nlohmann/json.hpp>
#include <string>
#include <thread>
#include <vector>

View File

@@ -34,6 +34,9 @@ public:
#include <atomic>
#include <assert.h>
#include <stdlib.h>
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
# include <chrono>
#endif
#include "Tracy.hpp"
#include "../client/TracyProfiler.hpp"
@@ -106,6 +109,14 @@ public:
GLint bits;
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
// The anchor above is never refreshed; advertise calibration and emit periodic
// GpuCalibration events to correct CPU/GPU drift (see Recalibrate). Opt-in,
// because Recalibrate() calls glGetInteger64v( GL_TIMESTAMP ), which forces a
// CPU/GPU sync.
m_prevCalibration = GetHostTimeNs();
#endif
const float period = 1.f;
const auto thread = GetThreadHandle();
TracyLfqPrepare( QueueType::GpuNewContext );
@@ -114,7 +125,11 @@ public:
MemWrite( &item->gpuNewContext.thread, thread );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
MemWrite( &item->gpuNewContext.flags, GpuContextFlags( GpuContextCalibration ) );
#else
MemWrite( &item->gpuNewContext.flags, GpuContextFlags( 0 ) );
#endif
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
#ifdef TRACY_ON_DEMAND
@@ -143,8 +158,6 @@ public:
{
ZoneScopedC( Color::Red4 );
if( m_tail == m_head ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
@@ -153,6 +166,14 @@ public:
}
#endif
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
// Before the drain's early-returns, so it runs even on frames with no
// completed queries.
Recalibrate();
#endif
if( m_tail == m_head ) return;
while( m_tail != m_head )
{
GLint available;
@@ -173,6 +194,38 @@ public:
}
private:
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
// Monotonic host ns for the inter-calibration interval (cpuDelta), kept
// separate from Profiler::GetTime() as in the D3D12/Vulkan backends.
static tracy_force_inline int64_t GetHostTimeNs()
{
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch() ).count();
}
// OpenGL has no atomic CPU+GPU timestamp query, so sample back-to-back; the
// gap is negligible against the recalibration interval below. Note this forces
// a CPU/GPU sync, which is why the whole path is opt-in (TRACY_OPENGL_AUTO_CALIBRATION).
tracy_force_inline void Recalibrate()
{
const int64_t hostNow = GetHostTimeNs();
const int64_t delta = hostNow - m_prevCalibration;
if( delta < 1000ll * 1000 * 1000 ) return; // throttle: ~once per second
int64_t tgpu;
glGetInteger64v( GL_TIMESTAMP, &tgpu );
const int64_t refCpu = Profiler::GetTime();
m_prevCalibration = hostNow;
TracyLfqPrepare( QueueType::GpuCalibration );
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
MemWrite( &item->gpuCalibration.cpuDelta, delta );
MemWrite( &item->gpuCalibration.context, m_context );
TracyLfqCommit;
}
#endif
tracy_force_inline unsigned int NextQueryId()
{
const auto id = m_head;
@@ -196,6 +249,10 @@ private:
unsigned int m_head;
unsigned int m_tail;
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
int64_t m_prevCalibration; // host-ns timestamp of the last emitted calibration
#endif
};
class GpuCtxScope