mirror of
https://github.com/wolfpld/tracy.git
synced 2026-06-08 08:33:48 +00:00
Compare commits
22 Commits
slomp/cuda
...
19519bbeb0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
19519bbeb0 | ||
|
|
fc4f52e61d | ||
|
|
e2ac8f7973 | ||
|
|
e5aa8eba51 | ||
|
|
7437c41514 | ||
|
|
f441a5070b | ||
|
|
00b6abd67b | ||
|
|
e4e3d75eb8 | ||
|
|
fc5318dcad | ||
|
|
661c664b75 | ||
|
|
6dbebca666 | ||
|
|
73d78ad517 | ||
|
|
e5371d7987 | ||
|
|
9806f35714 | ||
|
|
d40289d594 | ||
|
|
86fbe529ed | ||
|
|
9b169ef3f9 | ||
|
|
64797dc735 | ||
|
|
76797799c0 | ||
|
|
7cb98245ce | ||
|
|
55d5436fb9 | ||
|
|
2b11785b05 |
@@ -137,6 +137,7 @@ set_option(TRACY_SYMBOL_OFFLINE_RESOLVE "Instead of full runtime symbol resoluti
|
||||
set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF TracyClient)
|
||||
set_option(TRACY_DEBUGINFOD "Enable debuginfod support" OFF TracyClient)
|
||||
set_option(TRACY_IGNORE_MEMORY_FAULTS "Ignore instrumentation errors from memory free events that do not have a matching allocation" OFF TracyClient)
|
||||
set_option(TRACY_OPENGL_AUTO_CALIBRATION "Periodically recalibrate OpenGL GPU/CPU clock drift (forces a CPU/GPU sync each time)" OFF TracyClient)
|
||||
|
||||
# advanced
|
||||
set_option(TRACY_VERBOSE "[advanced] Verbose output from the profiler" OFF TracyClient)
|
||||
|
||||
0
examples/cuda/README.md
Normal file
0
examples/cuda/README.md
Normal file
39
examples/cuda/graph/CMakeLists.txt
Normal file
39
examples/cuda/graph/CMakeLists.txt
Normal file
@@ -0,0 +1,39 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(CUDAGraphDemo LANGUAGES CXX CUDA)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
endif()
|
||||
|
||||
set(TRACY_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../.."
|
||||
CACHE PATH "Root of the Tracy repository")
|
||||
set(TRACY_PUBLIC "${TRACY_PATH}/public")
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
# cuda-graph-demo.cu embeds Tracy via #include <TracyClient.cpp> (unity build),
|
||||
# so no separate TracyClient library is needed — just expose the public headers.
|
||||
add_executable(cuda-graph-demo cuda-graph-demo.cu)
|
||||
target_include_directories(cuda-graph-demo PRIVATE ${TRACY_PUBLIC})
|
||||
target_link_libraries(cuda-graph-demo PRIVATE
|
||||
CUDA::cupti CUDA::cuda_driver Threads::Threads ${CMAKE_DL_LIBS})
|
||||
|
||||
# ctest-related integration below
|
||||
# to run the binaries via ctest:
|
||||
# ctest --test-dir <cmake-build-dir> -R <binary-name> -C <build-config>
|
||||
|
||||
enable_testing()
|
||||
add_test(NAME cuda-graph-demo COMMAND cuda-graph-demo)
|
||||
|
||||
# On Windows, CUPTI's DLL must be on PATH at runtime.
|
||||
if(WIN32)
|
||||
set(_cupti_dir "$<TARGET_FILE_DIR:CUDA::cupti>")
|
||||
set_target_properties(cuda-graph-demo PROPERTIES
|
||||
VS_DEBUGGER_ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}")
|
||||
set_tests_properties(cuda-graph-demo PROPERTIES
|
||||
ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}")
|
||||
endif()
|
||||
11
examples/cuda/graph/build.sh
Normal file
11
examples/cuda/graph/build.sh
Normal file
@@ -0,0 +1,11 @@
|
||||
TRACY_PATH=<path-to-tracy>
|
||||
CUDA_TOOLKIT_PATH=/usr/local/cuda
|
||||
CUDA_CUPTI_PATH=${CUDA_TOOLKIT_PATH}/extras/CUPTI
|
||||
|
||||
# pass -v to nvcc for verbose build information
|
||||
nvcc -O2 -std=c++17 cuda-graph-demo.cu \
|
||||
-o cuda-graph-demo \
|
||||
-I "${TRACY_PATH}/public" \
|
||||
-I "${CUDA_CUPTI_PATH}/include" -I "${CUDA_TOOLKIT_PATH}/include" \
|
||||
-L "${CUDA_CUPTI_PATH}/lib64" -L "${CUDA_TOOLKIT_PATH}/lib64" \
|
||||
-lcupti -lcuda
|
||||
146
examples/cuda/graph/cuda-graph-demo.cu
Normal file
146
examples/cuda/graph/cuda-graph-demo.cu
Normal file
@@ -0,0 +1,146 @@
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
// WARN: for simplicity, we enable and "embed" the Tracy client directly into the code
|
||||
#define TRACY_ENABLE
|
||||
#include <TracyClient.cpp>
|
||||
|
||||
#include <tracy/Tracy.hpp>
|
||||
#include <tracy/TracyCUDA.hpp>
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
#define CUDA_CHECK(call) \
|
||||
do { \
|
||||
cudaError_t err__ = (call); \
|
||||
if (err__ != cudaSuccess) { \
|
||||
std::fprintf(stderr, "CUDA error %s at %s:%d: %s\n", \
|
||||
cudaGetErrorName(err__), __FILE__, __LINE__, \
|
||||
cudaGetErrorString(err__)); \
|
||||
std::exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
__global__ void saxpy(float a, const float* x, float* y, int n)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (i < n) y[i] = a * x[i] + y[i];
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// CUPTI-backed Tracy context. Auto-captures all CUDA activity from the
|
||||
// point StartProfiling() is called until StopProfiling(). The background
|
||||
// collector thread flushes activity into Tracy; the explicit Collect()
|
||||
// calls below just force a flush at known phase boundaries.
|
||||
auto* cudaCtx = TracyCUDAContext();
|
||||
{
|
||||
constexpr char ctxName[] = "CUDA Graph Demo";
|
||||
TracyCUDAContextName(cudaCtx, ctxName, sizeof(ctxName) - 1);
|
||||
}
|
||||
TracyCUDAStartProfiling(cudaCtx);
|
||||
|
||||
constexpr int N = 1 << 16; // small N => kernel is short => launch overhead dominates
|
||||
constexpr int KERNELS_PER_GRAPH = 32; // chain length captured into the graph
|
||||
constexpr int OUTER_ITERS = 2000; // how many times we replay the chain
|
||||
|
||||
// allocate device buffers
|
||||
float *dX = nullptr, *dY = nullptr;
|
||||
CUDA_CHECK(cudaMalloc(&dX, N * sizeof(float)));
|
||||
CUDA_CHECK(cudaMalloc(&dY, N * sizeof(float)));
|
||||
|
||||
std::vector<float> hX(N, 1.0f);
|
||||
CUDA_CHECK(cudaMemcpy(dX, hX.data(), N * sizeof(float), cudaMemcpyHostToDevice));
|
||||
|
||||
cudaStream_t stream = nullptr;
|
||||
CUDA_CHECK(cudaStreamCreate(&stream));
|
||||
|
||||
const dim3 block(256);
|
||||
const dim3 grid((N + block.x - 1) / block.x);
|
||||
|
||||
cudaEvent_t evStart, evStop;
|
||||
CUDA_CHECK(cudaEventCreate(&evStart));
|
||||
CUDA_CHECK(cudaEventCreate(&evStop));
|
||||
|
||||
// warm-up (so first-launch lazy-init and/or JIT doesn't bias the measurement)
|
||||
saxpy<<<grid, block, 0, stream>>>(0.0f, dX, dY, N);
|
||||
CUDA_CHECK(cudaStreamSynchronize(stream));
|
||||
|
||||
// baseline: launch each kernel directly on the stream
|
||||
float msStream = 0.0f;
|
||||
{
|
||||
ZoneScopedN("stream-launches");
|
||||
CUDA_CHECK(cudaMemsetAsync(dY, 0, N * sizeof(float), stream));
|
||||
CUDA_CHECK(cudaEventRecord(evStart, stream));
|
||||
for (int outer = 0; outer < OUTER_ITERS; ++outer) {
|
||||
for (int k = 0; k < KERNELS_PER_GRAPH; ++k) {
|
||||
saxpy<<<grid, block, 0, stream>>>(1.0e-6f, dX, dY, N);
|
||||
}
|
||||
}
|
||||
CUDA_CHECK(cudaEventRecord(evStop, stream));
|
||||
CUDA_CHECK(cudaEventSynchronize(evStop));
|
||||
CUDA_CHECK(cudaEventElapsedTime(&msStream, evStart, evStop));
|
||||
TracyCUDACollect(cudaCtx);
|
||||
}
|
||||
|
||||
// capture: record the same kernel chain into a graph
|
||||
cudaGraph_t graph = nullptr;
|
||||
cudaGraphExec_t graphExec = nullptr;
|
||||
{
|
||||
ZoneScopedN("graph-capture");
|
||||
// cudaStreamCaptureModeRelaxed allows the calling thread to perform
|
||||
// unrelated CUDA work during capture; ThreadLocal is stricter if you need
|
||||
// isolation. Most short, single-stream captures work fine in either mode.
|
||||
CUDA_CHECK(cudaStreamBeginCapture(stream, cudaStreamCaptureModeRelaxed));
|
||||
for (int k = 0; k < KERNELS_PER_GRAPH; ++k) {
|
||||
saxpy<<<grid, block, 0, stream>>>(1.0e-6f, dX, dY, N);
|
||||
}
|
||||
CUDA_CHECK(cudaStreamEndCapture(stream, &graph));
|
||||
|
||||
// Instantiate once -> reusable executable graph.
|
||||
CUDA_CHECK(cudaGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
|
||||
// The template graph isn't needed once instantiated.
|
||||
CUDA_CHECK(cudaGraphDestroy(graph));
|
||||
}
|
||||
|
||||
// replay: launch the instantiated graph OUTER_ITERS times
|
||||
float msGraph = 0.0f;
|
||||
{
|
||||
ZoneScopedN("graph-launches");
|
||||
CUDA_CHECK(cudaMemsetAsync(dY, 0, N * sizeof(float), stream));
|
||||
CUDA_CHECK(cudaEventRecord(evStart, stream));
|
||||
for (int outer = 0; outer < OUTER_ITERS; ++outer) {
|
||||
CUDA_CHECK(cudaGraphLaunch(graphExec, stream));
|
||||
}
|
||||
CUDA_CHECK(cudaEventRecord(evStop, stream));
|
||||
CUDA_CHECK(cudaEventSynchronize(evStop));
|
||||
CUDA_CHECK(cudaEventElapsedTime(&msGraph, evStart, evStop));
|
||||
TracyCUDACollect(cudaCtx);
|
||||
}
|
||||
|
||||
// sanity check: y[i] = OUTER_ITERS * KERNELS_PER_GRAPH * 1e-6 * x[i]
|
||||
std::vector<float> hY(N);
|
||||
CUDA_CHECK(cudaMemcpy(hY.data(), dY, N * sizeof(float), cudaMemcpyDeviceToHost));
|
||||
const float expected = float(OUTER_ITERS) * float(KERNELS_PER_GRAPH) * 1.0e-6f;
|
||||
|
||||
std::printf("Stream launches: %8.3f ms (%d kernels)\n",
|
||||
msStream, OUTER_ITERS * KERNELS_PER_GRAPH);
|
||||
std::printf("Graph launches: %8.3f ms (%d graph launches x %d kernels)\n",
|
||||
msGraph, OUTER_ITERS, KERNELS_PER_GRAPH);
|
||||
std::printf("Speedup : %8.2fx\n", msStream / msGraph);
|
||||
std::printf("hY[0] = %.6e (expected %.6e)\n", hY[0], expected);
|
||||
|
||||
// shutdown
|
||||
CUDA_CHECK(cudaGraphExecDestroy(graphExec));
|
||||
CUDA_CHECK(cudaEventDestroy(evStart));
|
||||
CUDA_CHECK(cudaEventDestroy(evStop));
|
||||
CUDA_CHECK(cudaStreamDestroy(stream));
|
||||
CUDA_CHECK(cudaFree(dX));
|
||||
CUDA_CHECK(cudaFree(dY));
|
||||
|
||||
TracyCUDAStopProfiling(cudaCtx);
|
||||
TracyCUDAContextDestroy(cudaCtx);
|
||||
return 0;
|
||||
}
|
||||
@@ -3,3 +3,151 @@ function Link(el)
|
||||
el.attributes['reference'] = nil
|
||||
return el
|
||||
end
|
||||
|
||||
-- Drop Div wrappers (e.g. table/titlepage containers), keeping their content.
|
||||
function Div(el)
|
||||
return el.content
|
||||
end
|
||||
|
||||
-- ---------------------------------------------------------------------------
|
||||
-- LaTeX math -> plain-text approximation.
|
||||
--
|
||||
-- The target Markdown renderer has no math support, so a raw "$\frac{1}{2}$"
|
||||
-- would show verbatim. We turn each math node into the closest Unicode/ASCII
|
||||
-- equivalent: fractions become "a/b", \times becomes "x", super/subscripts use
|
||||
-- Unicode digits, and the one multi-line display equation becomes a fenced
|
||||
-- code block (Markdown collapses plain newlines, a code block keeps them).
|
||||
-- ---------------------------------------------------------------------------
|
||||
|
||||
local sup = {['0']='⁰',['1']='¹',['2']='²',['3']='³',['4']='⁴',['5']='⁵',
|
||||
['6']='⁶',['7']='⁷',['8']='⁸',['9']='⁹',['+']='⁺',['-']='⁻',
|
||||
['=']='⁼',['(']='⁽',[')']='⁾'}
|
||||
local sub = {['0']='₀',['1']='₁',['2']='₂',['3']='₃',['4']='₄',['5']='₅',
|
||||
['6']='₆',['7']='₇',['8']='₈',['9']='₉',['+']='₊',['-']='₋',
|
||||
['=']='₌',['(']='₍',[')']='₎'}
|
||||
|
||||
-- Symbol replacements, applied as literal substitutions. Longer commands must
|
||||
-- precede those that are a prefix of them (e.g. \rightarrow before \right).
|
||||
local symbols = {
|
||||
{'\\leftrightarrow','↔'}, {'\\rightarrow','→'}, {'\\leftarrow','←'},
|
||||
{'\\Rightarrow','⇒'}, {'\\Leftarrow','⇐'}, {'\\to','→'}, {'\\mapsto','↦'},
|
||||
{'\\times','×'}, {'\\cdot','·'}, {'\\div','÷'}, {'\\ast','*'}, {'\\star','*'},
|
||||
{'\\leq','≤'}, {'\\geq','≥'}, {'\\neq','≠'}, {'\\approx','≈'}, {'\\equiv','≡'},
|
||||
{'\\ll','«'}, {'\\gg','»'}, {'\\le','≤'}, {'\\ge','≥'},
|
||||
{'\\ldots','…'}, {'\\cdots','…'}, {'\\dots','…'}, {'\\infty','∞'},
|
||||
{'\\pm','±'}, {'\\mp','∓'}, {'\\propto','∝'}, {'\\sum','Σ'}, {'\\prod','Π'},
|
||||
{'\\alpha','α'}, {'\\beta','β'}, {'\\gamma','γ'}, {'\\delta','δ'}, {'\\Delta','Δ'},
|
||||
{'\\mu','µ'}, {'\\sigma','σ'}, {'\\pi','π'}, {'\\lambda','λ'}, {'\\theta','θ'},
|
||||
{'\\left',''}, {'\\right',''},
|
||||
{'\\qquad',' '}, {'\\quad',' '}, {'\\,',' '}, {'\\;',' '}, {'\\:',' '},
|
||||
{'\\ ',' '}, {'\\!',''},
|
||||
{'\\%','%'}, {'\\#','#'}, {'\\&','&'}, {'\\_','_'}, {'\\{','{'}, {'\\}','}'},
|
||||
{'\\$','$'},
|
||||
}
|
||||
|
||||
-- Literal (non-pattern) string replacement; avoids Lua pattern magic in keys.
|
||||
local function lit_replace(s, a, b)
|
||||
local out, i = {}, 1
|
||||
while true do
|
||||
local p = s:find(a, i, true)
|
||||
if not p then out[#out + 1] = s:sub(i); break end
|
||||
out[#out + 1] = s:sub(i, p - 1)
|
||||
out[#out + 1] = b
|
||||
i = p + #a
|
||||
end
|
||||
return table.concat(out)
|
||||
end
|
||||
|
||||
-- Strip the outer braces of a "%b{}" capture.
|
||||
local function grp(b) return b:sub(2, #b - 1) end
|
||||
|
||||
-- Map a string to Unicode super/subscript, or nil if any char is unsupported.
|
||||
local function map_script(txt, map)
|
||||
local res = {}
|
||||
for i = 1, #txt do
|
||||
local c = txt:sub(i, i)
|
||||
if not map[c] then return nil end
|
||||
res[#res + 1] = map[c]
|
||||
end
|
||||
return table.concat(res)
|
||||
end
|
||||
|
||||
local function convert(s)
|
||||
-- Text/font wrappers: keep the content, recurse to handle nesting.
|
||||
for _, cmd in ipairs({'text', 'mathrm', 'mathit', 'mathbf', 'mathbb',
|
||||
'mathsf', 'mathtt', 'mathcal', 'operatorname',
|
||||
'textbf', 'textit', 'textrm'}) do
|
||||
s = s:gsub('\\' .. cmd .. '(%b{})', function(b) return convert(grp(b)) end)
|
||||
end
|
||||
-- Fractions -> "num/den" (spaced when either side has spaces).
|
||||
local function frac(a, b)
|
||||
local n, d = convert(grp(a)), convert(grp(b))
|
||||
local sep = (n:find(' ', 1, true) or d:find(' ', 1, true)) and ' / ' or '/'
|
||||
return n .. sep .. d
|
||||
end
|
||||
s = s:gsub('\\frac(%b{})(%b{})', frac)
|
||||
s = s:gsub('\\dfrac(%b{})(%b{})', frac)
|
||||
s = s:gsub('\\tfrac(%b{})(%b{})', frac)
|
||||
s = s:gsub('\\sfrac(%b{})(%b{})', frac)
|
||||
-- Roots.
|
||||
s = s:gsub('\\sqrt(%b{})', function(b) return '√(' .. convert(grp(b)) .. ')' end)
|
||||
-- Single-char scripts first, so the braced fallback (e.g. "_native") below
|
||||
-- is not re-scanned and mangled into Unicode subscripts.
|
||||
s = s:gsub('%^([%w])', function(c) return sup[c] or ('^' .. c) end)
|
||||
s = s:gsub('_([%w])', function(c) return sub[c] or ('_' .. c) end)
|
||||
-- Braced scripts: Unicode when the content is all digits/signs, else keep
|
||||
-- a readable "^(...)" / "_..." form.
|
||||
s = s:gsub('%^(%b{})', function(b)
|
||||
local inner = convert(grp(b))
|
||||
return map_script(inner, sup) or ('^(' .. inner .. ')')
|
||||
end)
|
||||
s = s:gsub('_(%b{})', function(b)
|
||||
local inner = convert(grp(b))
|
||||
return map_script(inner, sub) or ('_' .. inner)
|
||||
end)
|
||||
-- Remaining symbols.
|
||||
for _, pair in ipairs(symbols) do s = lit_replace(s, pair[1], pair[2]) end
|
||||
return s
|
||||
end
|
||||
|
||||
-- Convert a display equation, preserving its line structure for a code block.
|
||||
local function convert_display(s)
|
||||
s = convert(s)
|
||||
for _, env in ipairs({'cases', 'aligned', 'align', 'array', 'matrix',
|
||||
'gathered', 'split'}) do
|
||||
s = lit_replace(s, '\\begin{' .. env .. '}', '')
|
||||
s = lit_replace(s, '\\end{' .. env .. '}', '')
|
||||
end
|
||||
s = lit_replace(s, '\\\\', '\n') -- row break
|
||||
s = s:gsub('%s*&%s*', ' ') -- column separator -> spacing
|
||||
local lines = {}
|
||||
for line in (s .. '\n'):gmatch('(.-)\n') do
|
||||
line = line:gsub('^%s+', ''):gsub('%s+$', '')
|
||||
if line ~= '' then lines[#lines + 1] = line end
|
||||
end
|
||||
for i = 2, #lines do lines[i] = ' ' .. lines[i] end -- indent continuations
|
||||
return table.concat(lines, '\n')
|
||||
end
|
||||
|
||||
function Math(el)
|
||||
if el.mathtype == 'DisplayMath' then
|
||||
return el -- handled at block level by Para, to emit a code block
|
||||
end
|
||||
return pandoc.Str(convert(el.text))
|
||||
end
|
||||
|
||||
-- A paragraph that is solely a display equation becomes a fenced code block.
|
||||
function Para(el)
|
||||
local maths, only_math = {}, true
|
||||
for _, x in ipairs(el.content) do
|
||||
if x.t == 'Math' and x.mathtype == 'DisplayMath' then
|
||||
maths[#maths + 1] = x
|
||||
elseif x.t ~= 'Space' and x.t ~= 'SoftBreak' and x.t ~= 'LineBreak' then
|
||||
only_math = false
|
||||
end
|
||||
end
|
||||
if #maths == 0 or not only_math then return nil end
|
||||
local parts = {}
|
||||
for _, m in ipairs(maths) do parts[#parts + 1] = convert_display(m.text) end
|
||||
return pandoc.CodeBlock(table.concat(parts, '\n\n'))
|
||||
end
|
||||
|
||||
@@ -7,12 +7,18 @@ sed -i -e 's@\\ctrl@Ctrl@g' _tmp.tex
|
||||
sed -i -e 's@\\shift@Shift@g' _tmp.tex
|
||||
sed -i -e 's@\\Alt@Alt@g' _tmp.tex
|
||||
sed -i -e 's@\\del@Delete@g' _tmp.tex
|
||||
python3 fa-icons.py ../profiler/src/profiler/IconsFontAwesome6.h _tmp.tex
|
||||
python3 fa-icons.py ../profiler/src/profiler/IconsFontAwesome7.h _tmp.tex
|
||||
sed -i -e 's@\\LMB{}~@@g' _tmp.tex
|
||||
sed -i -e 's@\\MMB{}~@@g' _tmp.tex
|
||||
sed -i -e 's@\\RMB{}~@@g' _tmp.tex
|
||||
sed -i -e 's@\\Scroll{}~@@g' _tmp.tex
|
||||
|
||||
# Resolve \circled{} markers and lstlisting escapeinside (@...@) snippets, which
|
||||
# pandoc would otherwise emit verbatim or drop, to their Unicode equivalents.
|
||||
sed -i -e 's|@\\circled{a}@|(a)|g' -e 's|@\\circled{b}@|(b)|g' -e 's|@\\circled{c}@|(c)|g' _tmp.tex
|
||||
sed -i -e 's|\\circled{a}|(a)|g' -e 's|\\circled{b}|(b)|g' -e 's|\\circled{c}|(c)|g' _tmp.tex
|
||||
sed -i -e 's|@\\ldots@|…|g' _tmp.tex
|
||||
|
||||
sed -i -e 's@\\nameref{quicklook}@A quick look at Tracy Profiler@g' _tmp.tex
|
||||
sed -i -e 's@\\nameref{firststeps}@First steps@g' _tmp.tex
|
||||
sed -i -e 's@\\nameref{client}@Client markup@g' _tmp.tex
|
||||
@@ -26,7 +32,10 @@ sed -i -e 's@\\nameref{configurationfiles}@Configuration files@g' _tmp.tex
|
||||
awk -f bclogo2quote.awk _tmp.tex > _tmp_quoted.tex
|
||||
mv _tmp_quoted.tex _tmp.tex
|
||||
|
||||
pandoc --wrap=none --reference-location=block --number-sections -L filter.lua -s _tmp.tex -o tracy.md
|
||||
pandoc --wrap=none --reference-location=block --number-sections -L filter.lua -t 'markdown-simple_tables-multiline_tables-grid_tables+pipe_tables' -s _tmp.tex -o tracy.md
|
||||
|
||||
awk -f tablecaption.awk tracy.md > _tmp_caption.md
|
||||
mv _tmp_caption.md tracy.md
|
||||
|
||||
sed -i -e 's/^> \*\*IMPORTANT:\([^*]*\)\*\*/> [!IMPORTANT]\
|
||||
> **\1**/' tracy.md
|
||||
@@ -37,6 +46,6 @@ sed -i -e 's/^> \*\*CAUTION:\([^*]*\)\*\*/> [!CAUTION]\
|
||||
sed -i -e 's/^> \*\*NOTE:\([^*]*\)\*\*/> [!NOTE]\
|
||||
> **\1**/' tracy.md
|
||||
|
||||
python3 icon-explain.py ../profiler/src/profiler/IconsFontAwesome6.h tracy.md
|
||||
python3 icon-explain.py ../profiler/src/profiler/IconsFontAwesome7.h tracy.md
|
||||
|
||||
rm -f _tmp.tex
|
||||
|
||||
16
manual/tablecaption.awk
Normal file
16
manual/tablecaption.awk
Normal file
@@ -0,0 +1,16 @@
|
||||
# Pandoc emits table captions as a line beginning with ": ", which GitHub
|
||||
# renders literally instead of as a caption. Strip the marker and italicize
|
||||
# the caption instead. Captions may span several physical lines when they
|
||||
# contain a hard line break (a trailing backslash). Underscores are used for
|
||||
# the emphasis so captions that already contain "*...*" markup are left intact.
|
||||
!incap && /^: / {
|
||||
incap = 1
|
||||
$0 = "_" substr($0, 3)
|
||||
}
|
||||
incap && !/\\$/ {
|
||||
print $0 "_"
|
||||
incap = 0
|
||||
next
|
||||
}
|
||||
incap { print; next }
|
||||
{ print }
|
||||
390
manual/tracy.md
390
manual/tracy.md
@@ -3,7 +3,6 @@ bibliography:
|
||||
- tracy.bib
|
||||
---
|
||||
|
||||
::: titlepage
|
||||
Tracy Profiler
|
||||
|
||||
The user manual
|
||||
@@ -12,8 +11,7 @@ The user manual
|
||||
|
||||
**Bartosz Taudul** [\<wolf@nereid.pl\>](mailto:wolf@nereid.pl)
|
||||
|
||||
2026-06-05 <https://github.com/wolfpld/tracy>
|
||||
:::
|
||||
2026-06-06 <https://github.com/wolfpld/tracy>
|
||||
|
||||
# Quick overview {#quick-overview .unnumbered}
|
||||
|
||||
@@ -95,11 +93,11 @@ The concept of Tracy being a real-time profiler may be explained in a couple of
|
||||
|
||||
It is hard to imagine how long a nanosecond is. One good analogy is to compare it with a measure of length. Let's say that one second is one meter (the average doorknob is at the height of one meter).
|
||||
|
||||
One millisecond ($\frac{1}{1000}$ of a second) would be then the length of a millimeter. The average size of a red ant or the width of a pencil is 5 or 6 mm. A modern game running at 60 frames per second has only 16 ms to update the game world and render the entire scene.
|
||||
One millisecond (1/1000 of a second) would be then the length of a millimeter. The average size of a red ant or the width of a pencil is 5 or 6 mm. A modern game running at 60 frames per second has only 16 ms to update the game world and render the entire scene.
|
||||
|
||||
One microsecond ($\frac{1}{1000}$ of a millisecond) in our comparison equals one micron. The diameter of a typical bacterium ranges from 1 to 10 microns. The diameter of a red blood cell or width of a strand of spider web silk is about 7 μm.
|
||||
One microsecond (1/1000 of a millisecond) in our comparison equals one micron. The diameter of a typical bacterium ranges from 1 to 10 microns. The diameter of a red blood cell or width of a strand of spider web silk is about 7 μm.
|
||||
|
||||
And finally, one nanosecond ($\frac{1}{1000}$ of a microsecond) would be one nanometer. The modern microprocessor transistor gate, the width of the DNA helix, or the thickness of a cell membrane are in the range of 5 nm. In one ns the light can travel only 30 cm.
|
||||
And finally, one nanosecond (1/1000 of a microsecond) would be one nanometer. The modern microprocessor transistor gate, the width of the DNA helix, or the thickness of a cell membrane are in the range of 5 nm. In one ns the light can travel only 30 cm.
|
||||
|
||||
Tracy can achieve single-digit nanosecond measurement resolution due to usage of hardware timing mechanisms on the x86 and ARM architectures[^4]. Other profilers may rely on the timers provided by the operating system, which do have significantly reduced resolution (about 300 ns -- 1 μs). This is enough to hide the subtle impact of cache access optimization, etc.
|
||||
|
||||
@@ -115,7 +113,7 @@ It is wrong to think so. Optimizing a function to execute in 430 ns, instead of
|
||||
|
||||
[^6]: This is a real optimization case. The values are median function run times and do not reflect the real execution time, which explains the discrepancy in the total reported time.
|
||||
|
||||
You also need to understand how timer precision is reflected in measurement errors. Take a look at figure [1](#timer). There you can see three discrete timer tick events, which increase the value reported by the timer by 300 ns. You can also see four readings of time ranges, marked $A_1$, $A_2$; $B_1$, $B_2$; $C_1$, $C_2$ and $D_1$, $D_2$.
|
||||
You also need to understand how timer precision is reflected in measurement errors. Take a look at figure [1](#timer). There you can see three discrete timer tick events, which increase the value reported by the timer by 300 ns. You can also see four readings of time ranges, marked A₁, A₂; B₁, B₂; C₁, C₂ and D₁, D₂.
|
||||
|
||||
<figure id="timer" data-latex-placement="h">
|
||||
|
||||
@@ -124,11 +122,11 @@ You also need to understand how timer precision is reflected in measurement erro
|
||||
|
||||
Now let's take a look at the timer readings.
|
||||
|
||||
- The $A$ and $D$ ranges both take a very short amount of time (10 ns), but the $A$ range is reported as 300 ns, and the $D$ range is reported as 0 ns.
|
||||
- The A and D ranges both take a very short amount of time (10 ns), but the A range is reported as 300 ns, and the D range is reported as 0 ns.
|
||||
|
||||
- The $B$ range takes a considerable amount of time (590 ns), but according to the timer readings, it took the same time (300 ns) as the short lived $A$ range.
|
||||
- The B range takes a considerable amount of time (590 ns), but according to the timer readings, it took the same time (300 ns) as the short lived A range.
|
||||
|
||||
- The $C$ range (610 ns) is only 20 ns longer than the $B$ range, but it is reported as 900 ns, a 600 ns difference!
|
||||
- The C range (610 ns) is only 20 ns longer than the B range, but it is reported as 900 ns, a 600 ns difference!
|
||||
|
||||
Here, you can see why using a high-precision timer is essential. While there is no escape from the measurement errors, a profiler can reduce their impact by increasing the timer accuracy.
|
||||
|
||||
@@ -190,20 +188,18 @@ You may wonder why you should use Tracy when so many other profilers are availab
|
||||
|
||||
## Performance impact {#perfimpact}
|
||||
|
||||
Let's profile an example application to check how much slowdown is introduced by using Tracy. For this purpose we have used etcpak[^10]. The input data was a $16384 \times 16384$ pixels test image, and the $4 \times 4$ pixel block compression function was selected to be instrumented. The image was compressed on 12 parallel threads, and the timing data represents a mean compression time of a single image.
|
||||
Let's profile an example application to check how much slowdown is introduced by using Tracy. For this purpose we have used etcpak[^10]. The input data was a 16384 × 16384 pixels test image, and the 4 × 4 pixel block compression function was selected to be instrumented. The image was compressed on 12 parallel threads, and the timing data represents a mean compression time of a single image.
|
||||
|
||||
[^10]: <https://github.com/wolfpld/etcpak>
|
||||
|
||||
The results are presented in table [1](#PerformanceImpact). Dividing the average of run time differences (37.7 ms) by the count of captured zones per single image (16777216) shows us that the impact of profiling is only 2.25 ns per zone (this includes two events: start and end of a zone).
|
||||
|
||||
::: {#PerformanceImpact}
|
||||
**Mode** **Zones (total)** **Zones (single image)** **Clean run** **Profiling run** **Difference**
|
||||
---------- ------------------- -------------------------- --------------- ------------------- ----------------
|
||||
ETC1 201326592 16777216 110.9 ms 148.2 ms +37.3 ms
|
||||
ETC2 201326592 16777216 212.4 ms 250.5 ms +38.1 ms
|
||||
| **Mode** | **Zones (total)** | **Zones (single image)** | **Clean run** | **Profiling run** | **Difference** |
|
||||
|:--:|:--:|:--:|:--:|:--:|:--:|
|
||||
| ETC1 | 201326592 | 16777216 | 110.9 ms | 148.2 ms | +37.3 ms |
|
||||
| ETC2 | 201326592 | 16777216 | 212.4 ms | 250.5 ms | +38.1 ms |
|
||||
|
||||
: Zone capture time cost.
|
||||
:::
|
||||
_Zone capture time cost._
|
||||
|
||||
### Assembly analysis
|
||||
|
||||
@@ -401,7 +397,7 @@ Here's a sample command to set up a build directory with profiling enabled. The
|
||||
|
||||
### Short-lived applications
|
||||
|
||||
In case you want to profile a short-lived program (for example, a compression utility that finishes its work in one second), set the `TRACY_NO_EXIT` environment variable to $1$. With this option enabled, Tracy will not exit until an incoming connection is made, even if the application has already finished executing. If your platform doesn't support an easy setup of environment variables, you may also add the `TRACY_NO_EXIT` define to your build configuration, which has the same effect.
|
||||
In case you want to profile a short-lived program (for example, a compression utility that finishes its work in one second), set the `TRACY_NO_EXIT` environment variable to 1. With this option enabled, Tracy will not exit until an incoming connection is made, even if the application has already finished executing. If your platform doesn't support an easy setup of environment variables, you may also add the `TRACY_NO_EXIT` define to your build configuration, which has the same effect.
|
||||
|
||||
### On-demand profiling {#ondemand}
|
||||
|
||||
@@ -426,11 +422,11 @@ The program name that is sent out in the broadcast messages can be customized by
|
||||
|
||||
### Client network interface
|
||||
|
||||
By default, the Tracy client will listen on all network interfaces. If you want to restrict it to only listening on the localhost interface, define the `TRACY_ONLY_LOCALHOST` macro at compile-time, or set the `TRACY_ONLY_LOCALHOST` environment variable to $1$ at runtime.
|
||||
By default, the Tracy client will listen on all network interfaces. If you want to restrict it to only listening on the localhost interface, define the `TRACY_ONLY_LOCALHOST` macro at compile-time, or set the `TRACY_ONLY_LOCALHOST` environment variable to 1 at runtime.
|
||||
|
||||
If you need to use a specific Tracy client address, such as QNX requires, define the `TRACY_CLIENT_ADDRESS` macro at compile-time as the desired string address.
|
||||
|
||||
By default, the Tracy client will listen on IPv6 interfaces, falling back to IPv4 only if IPv6 is unavailable. If you want to restrict it to only listening on IPv4 interfaces, define the `TRACY_ONLY_IPV4` macro at compile-time, or set the `TRACY_ONLY_IPV4` environment variable to $1$ at runtime.
|
||||
By default, the Tracy client will listen on IPv6 interfaces, falling back to IPv4 only if IPv6 is unavailable. If you want to restrict it to only listening on IPv4 interfaces, define the `TRACY_ONLY_IPV4` macro at compile-time, or set the `TRACY_ONLY_IPV4` environment variable to 1 at runtime.
|
||||
|
||||
### Setup for multi-DLL projects
|
||||
|
||||
@@ -522,15 +518,13 @@ The best way to run Tracy is on bare metal. Avoid profiling applications in virt
|
||||
|
||||
Additionally, you can rebuild your application with the `TRACY_DISALLOW_HW_TIMER` define, which will disable usage of the hardware timer, even if it *appears* to be available. See table [2](#timeroptions) for details.
|
||||
|
||||
::: {#timeroptions}
|
||||
**Scenario** **HW timer** **Fallback timer**
|
||||
---------------------------------------------------- -------------- -----------------------
|
||||
Neither defined Used Not compiled in
|
||||
Only `TRACY_TIMER_FALLBACK` Used Compiled in as backup
|
||||
`TRACY_DISALLOW_HW_TIMER` + `TRACY_TIMER_FALLBACK` Disabled Used
|
||||
| **Scenario** | **HW timer** | **Fallback timer** |
|
||||
|:--:|:--:|:--:|
|
||||
| Neither defined | Used | Not compiled in |
|
||||
| Only `TRACY_TIMER_FALLBACK` | Used | Compiled in as backup |
|
||||
| `TRACY_DISALLOW_HW_TIMER` + `TRACY_TIMER_FALLBACK` | Disabled | Used |
|
||||
|
||||
: Timer options interaction
|
||||
:::
|
||||
_Timer options interaction_
|
||||
|
||||
#### Docker on Linux
|
||||
|
||||
@@ -558,13 +552,13 @@ Inside that header, enable any subset of the hooks you need by defining the corr
|
||||
|
||||
The available hooks are:
|
||||
|
||||
- `TRACY_HAS_CUSTOM_THREAD_ID` $\rightarrow$ `tracy::PlatformGetThreadId()`. Required.
|
||||
- `TRACY_HAS_CUSTOM_THREAD_ID` → `tracy::PlatformGetThreadId()`. Required.
|
||||
|
||||
- `TRACY_HAS_CUSTOM_USER_INFO` $\rightarrow$ `tracy::PlatformGetHostname()`, `tracy::PlatformGetUserLogin()`, `tracy::PlatformGetUserFullName()`.
|
||||
- `TRACY_HAS_CUSTOM_USER_INFO` → `tracy::PlatformGetHostname()`, `tracy::PlatformGetUserLogin()`, `tracy::PlatformGetUserFullName()`.
|
||||
|
||||
- `TRACY_HAS_CUSTOM_SAFE_COPY` $\rightarrow$ `tracy::PlatformSafeMemcpy()`.
|
||||
- `TRACY_HAS_CUSTOM_SAFE_COPY` → `tracy::PlatformSafeMemcpy()`.
|
||||
|
||||
- `TRACY_HAS_CUSTOM_ALLOCATOR` $\rightarrow$ `tracy::PlatformMalloc()`, `tracy::PlatformFree()`, `tracy::PlatformRealloc()`, `tracy::PlatformAllocatorInit()`, `tracy::PlatformAllocatorThreadInit()`, `tracy::PlatformAllocatorFinalize()`, `tracy::PlatformAllocatorThreadFinalize()`.
|
||||
- `TRACY_HAS_CUSTOM_ALLOCATOR` → `tracy::PlatformMalloc()`, `tracy::PlatformFree()`, `tracy::PlatformRealloc()`, `tracy::PlatformAllocatorInit()`, `tracy::PlatformAllocatorThreadInit()`, `tracy::PlatformAllocatorFinalize()`, `tracy::PlatformAllocatorThreadFinalize()`.
|
||||
|
||||
Template files are provided in the repository ( `examples/CustomPlatform/CustomPlatform(.h|.cpp)` ). See `CustomPlatform.h` for the contract each `Platform*` function must satisfy (return values, threading guarantees, and footguns to avoid). Copy these files into your project, fill in the bodies for the hooks you enable, and point Tracy at the header.
|
||||
|
||||
@@ -604,11 +598,11 @@ When using Tracy Profiler, keep in mind the following requirements:
|
||||
|
||||
- If there are recursive zones at any point in a zone stack, each unique zone source location should not appear more than 255 times.
|
||||
|
||||
- Profiling session cannot be longer than 1.6 days ($2^{47}$ ns). This also includes on-demand sessions.
|
||||
- Profiling session cannot be longer than 1.6 days (2⁴⁷ ns). This also includes on-demand sessions.
|
||||
|
||||
- No more than 4 billion ($2^{32}$) memory free events may be recorded.
|
||||
- No more than 4 billion (2³²) memory free events may be recorded.
|
||||
|
||||
- No more than 16 million ($2^{24}$) unique call stacks can be captured.
|
||||
- No more than 16 million (2²⁴) unique call stacks can be captured.
|
||||
|
||||
[^18]: A source location is a place in the code, which is identified by source file name and line number, for example, when you markup a zone.
|
||||
|
||||
@@ -900,31 +894,29 @@ This is an automatic process, and it doesn't require user interaction. If you ar
|
||||
|
||||
Some features of the profiler are only available on selected platforms. Please refer to table [3](#featuretable) for details.
|
||||
|
||||
::: {#featuretable}
|
||||
**Feature** **Windows** **Linux** **Android** **OSX** **iOS** **BSD** **QNX**
|
||||
-------------------------- ------------- ----------- ------------- --------- --------- --------- ---------
|
||||
Profiling program init
|
||||
CPU zones
|
||||
Locks
|
||||
Plots
|
||||
Messages
|
||||
Memory
|
||||
GPU zones (OpenGL)
|
||||
GPU zones (Vulkan)
|
||||
GPU zones (Metal) ^*b*^ ^*b*^
|
||||
Call stacks
|
||||
Symbol resolution
|
||||
Crash handling
|
||||
CPU usage probing
|
||||
Context switches
|
||||
Wait stacks
|
||||
CPU topology information
|
||||
Call stack sampling
|
||||
Hardware sampling ^*a*^
|
||||
VSync capture
|
||||
| **Feature** | **Windows** | **Linux** | **Android** | **OSX** | **iOS** | **BSD** | **QNX** |
|
||||
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
|
||||
| Profiling program init | | | | | | | |
|
||||
| CPU zones | | | | | | | |
|
||||
| Locks | | | | | | | |
|
||||
| Plots | | | | | | | |
|
||||
| Messages | | | | | | | |
|
||||
| Memory | | | | | | | |
|
||||
| GPU zones (OpenGL) | | | | | | | |
|
||||
| GPU zones (Vulkan) | | | | | | | |
|
||||
| GPU zones (Metal) | | | | ^*b*^ | ^*b*^ | | |
|
||||
| Call stacks | | | | | | | |
|
||||
| Symbol resolution | | | | | | | |
|
||||
| Crash handling | | | | | | | |
|
||||
| CPU usage probing | | | | | | | |
|
||||
| Context switches | | | | | | | |
|
||||
| Wait stacks | | | | | | | |
|
||||
| CPU topology information | | | | | | | |
|
||||
| Call stack sampling | | | | | | | |
|
||||
| Hardware sampling | ^*a*^ | | | | | | |
|
||||
| VSync capture | | | | | | | |
|
||||
|
||||
: Feature support matrix
|
||||
:::
|
||||
_Feature support matrix_
|
||||
|
||||
-- Not possible to support due to platform limitations.\
|
||||
^*a*^Possible through WSL2. ^*b*^Only tested on Apple Silicon M1 series
|
||||
@@ -1045,7 +1037,7 @@ Images are sent using the `FrameImage(image, width, height, offset, flip)` macro
|
||||
|
||||
[^36]: For example, OpenGL flips images, but Vulkan does not.
|
||||
|
||||
Handling image data requires a lot of memory and bandwidth[^37]. To achieve sane memory usage, you should scale down taken screenshots to a suitable size, e.g., $320\times180$.
|
||||
Handling image data requires a lot of memory and bandwidth[^37]. To achieve sane memory usage, you should scale down taken screenshots to a suitable size, e.g., 320×180.
|
||||
|
||||
[^37]: One uncompressed 1080p image takes 8 MB.
|
||||
|
||||
@@ -1055,18 +1047,16 @@ To further reduce image data size, frame images are internally compressed using
|
||||
|
||||
[^39]: One pixel is stored in a nibble (4 bits) instead of 32 bits.
|
||||
|
||||
::: {#EtcSimd}
|
||||
**Implementation** **Required define** **Time**
|
||||
-------------------- --------------------- ----------
|
||||
x86 Reference --- 198.2 μs
|
||||
x86 SSE4.1^a^ `__SSE4_1__` 25.4 μs
|
||||
x86 AVX2 `__AVX2__` 17.4 μs
|
||||
ARM Reference --- 1.04 ms
|
||||
ARM32 NEON^b^ `__ARM_NEON` 529 μs
|
||||
ARM64 NEON `__ARM_NEON` 438 μs
|
||||
| **Implementation** | **Required define** | **Time** |
|
||||
|:------------------:|:-------------------:|:--------:|
|
||||
| x86 Reference | --- | 198.2 μs |
|
||||
| x86 SSE4.1^a^ | `__SSE4_1__` | 25.4 μs |
|
||||
| x86 AVX2 | `__AVX2__` | 17.4 μs |
|
||||
| ARM Reference | --- | 1.04 ms |
|
||||
| ARM32 NEON^b^ | `__ARM_NEON` | 529 μs |
|
||||
| ARM64 NEON | `__ARM_NEON` | 438 μs |
|
||||
|
||||
: Client compression time of $320\times180$ image. x86: Ryzen 9 3900X (MSVC); ARM: ODROID-C2 (gcc).
|
||||
:::
|
||||
_Client compression time of 320×180 image. x86: Ryzen 9 3900X (MSVC); ARM: ODROID-C2 (gcc)._
|
||||
|
||||
^a)^ VEX encoding; ^b)^ ARM32 NEON code compiled for ARM64
|
||||
|
||||
@@ -1077,7 +1067,7 @@ To further reduce image data size, frame images are internally compressed using
|
||||
>
|
||||
> - This second thread will be periodically woken up, even if there are no frame images to compress[^41]. If you are not using the frame image capture functionality and you don't wish this thread to be running, you can define the `TRACY_NO_FRAME_IMAGE` macro.
|
||||
>
|
||||
> - Due to implementation details of the network buffer, a single frame image cannot be greater than 256 KB after compression. Note that a $960\times540$ image fits in this limit.
|
||||
> - Due to implementation details of the network buffer, a single frame image cannot be greater than 256 KB after compression. Note that a 960×540 image fits in this limit.
|
||||
|
||||
[^40]: Small part of compression task is offloaded to the server.
|
||||
|
||||
@@ -1118,7 +1108,7 @@ Everything needs to be correctly initialized (the cleanup is left for the reader
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, 320*180*4, nullptr, GL_STREAM_READ);
|
||||
}
|
||||
|
||||
We will now set up a screen capture, which will downscale the screen contents to $320\times180$ pixels and copy the resulting image to a buffer accessible by the CPU when the operation is done. This should be placed right before *swap buffers* or *present* call.
|
||||
We will now set up a screen capture, which will downscale the screen contents to 320×180 pixels and copy the resulting image to a buffer accessible by the CPU when the operation is done. This should be placed right before *swap buffers* or *present* call.
|
||||
|
||||
assert(m_fiQueue.empty() || m_fiQueue.front() != m_fiIdx); // check for buffer overrun
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_fiFramebuffer[m_fiIdx]);
|
||||
@@ -1179,25 +1169,19 @@ With all this done, you can perform the screen capture as follows:
|
||||
While this approach is much more complex than the previously discussed one, the resulting image quality increase makes it worthwhile.
|
||||
|
||||
<figure id="highqualityss" data-latex-placement="h">
|
||||
<div class="minipage">
|
||||
<img src="images/screenshot-lo.png" style="width:90.0%" />
|
||||
</div>
|
||||
<div class="minipage">
|
||||
<img src="images/screenshot-hi.png" style="width:90.0%" />
|
||||
</div>
|
||||
<figcaption>High-quality screen shot</figcaption>
|
||||
</figure>
|
||||
|
||||
You can see the performance results you may expect in a simple application in table [5](#asynccapture). The naïve capture performs synchronous retrieval of full-screen image and resizes it using *stb_image_resize*. The proper and high-quality captures do things as described in this chapter.
|
||||
|
||||
::: {#asynccapture}
|
||||
**Resolution** **Naïve capture** **Proper capture** **High quality**
|
||||
------------------ ------------------- -------------------- ------------------
|
||||
$1280\times720$ 80 FPS 4200 FPS 2800 FPS
|
||||
$2560\times1440$ 23 FPS 3300 FPS 1600 FPS
|
||||
| **Resolution** | **Naïve capture** | **Proper capture** | **High quality** |
|
||||
|:--------------:|:-----------------:|:------------------:|:----------------:|
|
||||
| 1280×720 | 80 FPS | 4200 FPS | 2800 FPS |
|
||||
| 2560×1440 | 23 FPS | 3300 FPS | 1600 FPS |
|
||||
|
||||
: Frame capture efficiency
|
||||
:::
|
||||
_Frame capture efficiency_
|
||||
|
||||
## Marking zones {#markingzones}
|
||||
|
||||
@@ -1241,15 +1225,15 @@ Zone objects can't be moved or copied.
|
||||
>
|
||||
> {
|
||||
> ZoneNamed(Zone1, true);
|
||||
> @\circled{a}@
|
||||
> (a)
|
||||
> {
|
||||
> ZoneNamed(Zone2, true);
|
||||
> @\circled{b}@
|
||||
> (b)
|
||||
> }
|
||||
> @\circled{c}@
|
||||
> (c)
|
||||
> }
|
||||
>
|
||||
> It is valid to set the `Zone1` text or name *only* in places or . After `Zone2` is created at you can no longer perform operations on `Zone1`, until `Zone2` is destroyed.
|
||||
> It is valid to set the `Zone1` text or name *only* in places (a) or (c). After `Zone2` is created at (b) you can no longer perform operations on `Zone1`, until `Zone2` is destroyed.
|
||||
|
||||
### Filtering zones {#filteringzones}
|
||||
|
||||
@@ -1366,7 +1350,7 @@ To configure how plot values are presented by the profiler, you may use the `Tra
|
||||
|
||||
- `tracy::PlotFormatType::Memory` -- treats the values as memory sizes. Will display kilobytes, megabytes, etc.
|
||||
|
||||
- `tracy::PlotFormatType::Percentage` -- values will be displayed as percentage (with value $100$ being equal to $100\%$).
|
||||
- `tracy::PlotFormatType::Percentage` -- values will be displayed as percentage (with value 100 being equal to 100%).
|
||||
|
||||
The `step` parameter determines whether the plot will be displayed as a staircase or will smoothly change between plot points (see figure [5](#plotconfig)). The `fill` parameter can be used to disable filling the area below the plot with a solid color.
|
||||
|
||||
@@ -1678,28 +1662,26 @@ Capture of true calls stacks can be performed by using macros with the `S` postf
|
||||
|
||||
Be aware that call stack collection is a relatively slow operation. Table [6](#CallstackTimes) and figure [6](#CallstackPlot) show how long it took to perform a single capture of varying depth on multiple CPU architectures.
|
||||
|
||||
::: {#CallstackTimes}
|
||||
**Depth** **x86** **x64** **ARM** **ARM64**
|
||||
----------- --------- --------- ---------- -----------
|
||||
1 34 ns 98 ns 6.62 μs 6.63 μs
|
||||
2 35 ns 150 ns 8.08 μs 8.25 μs
|
||||
3 36 ns 168 ns 9.75 μs 10 μs
|
||||
4 39 ns 190 ns 10.92 μs 11.58 μs
|
||||
5 42 ns 206 ns 12.5 μs 13.33 μs
|
||||
10 52 ns 306 ns 19.62 μs 21.71 μs
|
||||
15 63 ns 415 ns 26.83 μs 30.13 μs
|
||||
20 77 ns 531 ns 34.25 μs 38.71 μs
|
||||
25 89 ns 630 ns 41.17 μs 47.17 μs
|
||||
30 109 ns 735 ns 48.33 μs 55.63 μs
|
||||
35 123 ns 843 ns 55.87 μs 64.09 μs
|
||||
40 142 ns 950 ns 63.12 μs 72.59 μs
|
||||
45 154 ns 1.05 μs 70.54 μs 81 μs
|
||||
50 167 ns 1.16 μs 78 μs 89.5 μs
|
||||
55 179 ns 1.26 μs 85.04 μs 98 μs
|
||||
60 193 ns 1.37 μs 92.75 μs 106.59 μs
|
||||
| **Depth** | **x86** | **x64** | **ARM** | **ARM64** |
|
||||
|:---------:|:-------:|:-------:|:--------:|:---------:|
|
||||
| 1 | 34 ns | 98 ns | 6.62 μs | 6.63 μs |
|
||||
| 2 | 35 ns | 150 ns | 8.08 μs | 8.25 μs |
|
||||
| 3 | 36 ns | 168 ns | 9.75 μs | 10 μs |
|
||||
| 4 | 39 ns | 190 ns | 10.92 μs | 11.58 μs |
|
||||
| 5 | 42 ns | 206 ns | 12.5 μs | 13.33 μs |
|
||||
| 10 | 52 ns | 306 ns | 19.62 μs | 21.71 μs |
|
||||
| 15 | 63 ns | 415 ns | 26.83 μs | 30.13 μs |
|
||||
| 20 | 77 ns | 531 ns | 34.25 μs | 38.71 μs |
|
||||
| 25 | 89 ns | 630 ns | 41.17 μs | 47.17 μs |
|
||||
| 30 | 109 ns | 735 ns | 48.33 μs | 55.63 μs |
|
||||
| 35 | 123 ns | 843 ns | 55.87 μs | 64.09 μs |
|
||||
| 40 | 142 ns | 950 ns | 63.12 μs | 72.59 μs |
|
||||
| 45 | 154 ns | 1.05 μs | 70.54 μs | 81 μs |
|
||||
| 50 | 167 ns | 1.16 μs | 78 μs | 89.5 μs |
|
||||
| 55 | 179 ns | 1.26 μs | 85.04 μs | 98 μs |
|
||||
| 60 | 193 ns | 1.37 μs | 92.75 μs | 106.59 μs |
|
||||
|
||||
: Median times of zone capture with call stack. x86, x64: i7 8700K; ARM: Banana Pi; ARM64: ODROID-C2. Selected architectures are plotted on figure [6](#CallstackPlot)
|
||||
:::
|
||||
_Median times of zone capture with call stack. x86, x64: i7 8700K; ARM: Banana Pi; ARM64: ODROID-C2. Selected architectures are plotted on figure [6](#CallstackPlot)_
|
||||
|
||||
<figure id="CallstackPlot" data-latex-placement="h">
|
||||
|
||||
@@ -1845,34 +1827,30 @@ Be aware that for Lua call stack retrieval to work, you need to be on a platform
|
||||
|
||||
Cost of performing Lua call stack capture is presented in table [7](#CallstackTimesLua) and figure [7](#CallstackPlotLua). Lua call stacks include native call stacks, which have a capture cost of their own (table [6](#CallstackTimes)), and the `depth` parameter is applied for both captures. The presented data were captured with full Lua stack depth, but only 13 frames were available on the native call stack. Hence, to explain the non-linearity of the graph, you need to consider what was truly measured:
|
||||
|
||||
$$\text{Cost}_{\text{total}}(\text{depth}) =
|
||||
\begin{cases}
|
||||
\text{Cost}_{\text{Lua}}(\text{depth}) + \text{Cost}_{\text{native}}(\text{depth}) & \text{when depth} \leq 13 \\
|
||||
\text{Cost}_{\text{Lua}}(\text{depth}) + \text{Cost}_{\text{native}}(13) & \text{when depth} > 13
|
||||
\end{cases}$$
|
||||
Cost_total(depth) =
|
||||
Cost_Lua(depth) + Cost_native(depth) when depth ≤ 13
|
||||
Cost_Lua(depth) + Cost_native(13) when depth > 13
|
||||
|
||||
::: {#CallstackTimesLua}
|
||||
**Depth** **Time**
|
||||
----------- ----------
|
||||
1 707 ns
|
||||
2 699 ns
|
||||
3 624 ns
|
||||
4 727 ns
|
||||
5 836 ns
|
||||
10 1.77 μs
|
||||
15 2.44 μs
|
||||
20 2.51 μs
|
||||
25 2.98 μs
|
||||
30 3.6 μs
|
||||
35 4.33 μs
|
||||
40 5.17 μs
|
||||
45 6.01 μs
|
||||
50 6.99 μs
|
||||
55 8.11 μs
|
||||
60 9.17 μs
|
||||
| **Depth** | **Time** |
|
||||
|:---------:|:--------:|
|
||||
| 1 | 707 ns |
|
||||
| 2 | 699 ns |
|
||||
| 3 | 624 ns |
|
||||
| 4 | 727 ns |
|
||||
| 5 | 836 ns |
|
||||
| 10 | 1.77 μs |
|
||||
| 15 | 2.44 μs |
|
||||
| 20 | 2.51 μs |
|
||||
| 25 | 2.98 μs |
|
||||
| 30 | 3.6 μs |
|
||||
| 35 | 4.33 μs |
|
||||
| 40 | 5.17 μs |
|
||||
| 45 | 6.01 μs |
|
||||
| 50 | 6.99 μs |
|
||||
| 55 | 8.11 μs |
|
||||
| 60 | 9.17 μs |
|
||||
|
||||
: Median times of Lua zone capture with call stack (x64, 13 native frames)
|
||||
:::
|
||||
_Median times of Lua zone capture with call stack (x64, 13 native frames)_
|
||||
|
||||
<figure id="CallstackPlotLua" data-latex-placement="h">
|
||||
|
||||
@@ -2677,11 +2655,11 @@ While the call stack sampling is a generic software-implemented functionality of
|
||||
|
||||
Tracy can use these counters to present you the following three statistics, which may help guide you in discovering why your code is not as fast as possible:
|
||||
|
||||
1. *Instructions Per Cycle (IPC)* -- shows how many instructions were executing concurrently within a single core cycle. Higher values are better. The maximum achievable value depends on the design of the CPU, including things such as the number of execution units and their individual capabilities. Calculated as $\frac{\text{\#instructions retired}}{\text{\#cycles}}$. You can disable it with the `TRACY_NO_SAMPLE_RETIREMENT` macro.
|
||||
1. *Instructions Per Cycle (IPC)* -- shows how many instructions were executing concurrently within a single core cycle. Higher values are better. The maximum achievable value depends on the design of the CPU, including things such as the number of execution units and their individual capabilities. Calculated as #instructions retired / #cycles. You can disable it with the `TRACY_NO_SAMPLE_RETIREMENT` macro.
|
||||
|
||||
2. *Branch miss rate* -- shows how frequently the CPU branch predictor makes a wrong choice. Lower values are better. Calculated as $\frac{\text{\#branch misses}}{\text{\#branch instructions}}$. You can disable it with the `TRACY_NO_SAMPLE_BRANCH` macro.
|
||||
2. *Branch miss rate* -- shows how frequently the CPU branch predictor makes a wrong choice. Lower values are better. Calculated as #branch misses / #branch instructions. You can disable it with the `TRACY_NO_SAMPLE_BRANCH` macro.
|
||||
|
||||
3. *Cache miss rate* -- shows how frequently the CPU has to retrieve data from memory. Lower values are better. The specifics of which cache level is taken into account here vary from one implementation to another. Calculated as $\frac{\text{\#cache misses}}{\text{\#cache references}}$. You can disable it with the `TRACY_NO_SAMPLE_CACHE` macro.
|
||||
3. *Cache miss rate* -- shows how frequently the CPU has to retrieve data from memory. Lower values are better. The specifics of which cache level is taken into account here vary from one implementation to another. Calculated as #cache misses / #cache references. You can disable it with the `TRACY_NO_SAMPLE_CACHE` macro.
|
||||
|
||||
Each performance counter has to be collected by a dedicated Performance Monitoring Unit (PMU). However, the availability of PMUs is very limited, so you may not be able to capture all the statistics mentioned above at the same time (as each requires capture of two different counters). In such a case, you will need to manually select what needs to be sampled with the macros specified above.
|
||||
|
||||
@@ -2918,7 +2896,7 @@ You can also adjust some settings that affect global profiler behavior in this w
|
||||
|
||||
- *Zone name shortening* -- Sets the default zone name shortening behavior used in new traces. See section [5.4](#options) for more information.
|
||||
|
||||
- *Scroll multipliers* -- Allows you to fine-tune the sensitivity of the horizontal and vertical scroll in the timeline. The default values ($1.0$) are an attempt at the best possible settings, but differences in hardware manufacturers, platform implementations, and user expectations may require adjustments.
|
||||
- *Scroll multipliers* -- Allows you to fine-tune the sensitivity of the horizontal and vertical scroll in the timeline. The default values (1.0) are an attempt at the best possible settings, but differences in hardware manufacturers, platform implementations, and user expectations may require adjustments.
|
||||
|
||||
- *Memory limit* -- When enabled, profiler will stop recording data when memory usage exceeds the specified percentage of the total system memory. This mechanism does not measure the current system memory usage or limits. The upper value is not capped, as you may use swap. See section [4.6](#memoryusage) for more information.
|
||||
|
||||
@@ -3004,52 +2982,46 @@ The `update` utility supports optional higher levels of data compression, which
|
||||
|
||||
- `-z level` -- selects Zstandard algorithm, with a specified compression level.
|
||||
|
||||
::: {#compressiontimes}
|
||||
**Mode** **Size** **Ratio** **Save time** **Load time**
|
||||
------------- ----------- ----------- --------------- ---------------
|
||||
lz4 162.48 MB 17.19% 1.91 s 470 ms
|
||||
lz4 hc 77.33 MB 8.18% 39.24 s 401 ms
|
||||
lz4 extreme 72.67 MB 7.68% 4:30 406 ms
|
||||
zstd 1 63.17 MB 6.68% 2.27 s 868 ms
|
||||
zstd 2 63.29 MB 6.69% 2.31 s 884 ms
|
||||
zstd 3 62.94 MB 6.65% 2.43 s 867 ms
|
||||
zstd 4 62.81 MB 6.64% 2.44 s 855 ms
|
||||
zstd 5 61.04 MB 6.45% 3.98 s 855 ms
|
||||
zstd 6 60.27 MB 6.37% 4.19 s 827 ms
|
||||
zstd 7 61.53 MB 6.5% 6.6 s 761 ms
|
||||
zstd 8 60.44 MB 6.39% 7.84 s 746 ms
|
||||
zstd 9 59.58 MB 6.3% 9.6 s 724 ms
|
||||
zstd 10 59.36 MB 6.28% 10.29 s 706 ms
|
||||
zstd 11 59.2 MB 6.26% 11.23 s 717 ms
|
||||
zstd 12 58.51 MB 6.19% 15.43 s 695 ms
|
||||
zstd 13 56.16 MB 5.94% 35.55 s 642 ms
|
||||
zstd 14 55.76 MB 5.89% 37.74 s 627 ms
|
||||
zstd 15 54.65 MB 5.78% 1:01 600 ms
|
||||
zstd 16 50.94 MB 5.38% 1:34 537 ms
|
||||
zstd 17 50.18 MB 5.30% 1:44 542 ms
|
||||
zstd 18 49.91 MB 5.28% 2:17 554 ms
|
||||
zstd 19 46.99 MB 4.97% 7:09 605 ms
|
||||
zstd 20 46.81 MB 4.95% 7:08 608 ms
|
||||
zstd 21 45.77 MB 4.84% 13:01 614 ms
|
||||
zstd 22 45.52 MB 4.81% 15:11 621 ms
|
||||
| **Mode** | **Size** | **Ratio** | **Save time** | **Load time** |
|
||||
|:-----------:|:---------:|:---------:|:-------------:|:-------------:|
|
||||
| lz4 | 162.48 MB | 17.19% | 1.91 s | 470 ms |
|
||||
| lz4 hc | 77.33 MB | 8.18% | 39.24 s | 401 ms |
|
||||
| lz4 extreme | 72.67 MB | 7.68% | 4:30 | 406 ms |
|
||||
| zstd 1 | 63.17 MB | 6.68% | 2.27 s | 868 ms |
|
||||
| zstd 2 | 63.29 MB | 6.69% | 2.31 s | 884 ms |
|
||||
| zstd 3 | 62.94 MB | 6.65% | 2.43 s | 867 ms |
|
||||
| zstd 4 | 62.81 MB | 6.64% | 2.44 s | 855 ms |
|
||||
| zstd 5 | 61.04 MB | 6.45% | 3.98 s | 855 ms |
|
||||
| zstd 6 | 60.27 MB | 6.37% | 4.19 s | 827 ms |
|
||||
| zstd 7 | 61.53 MB | 6.5% | 6.6 s | 761 ms |
|
||||
| zstd 8 | 60.44 MB | 6.39% | 7.84 s | 746 ms |
|
||||
| zstd 9 | 59.58 MB | 6.3% | 9.6 s | 724 ms |
|
||||
| zstd 10 | 59.36 MB | 6.28% | 10.29 s | 706 ms |
|
||||
| zstd 11 | 59.2 MB | 6.26% | 11.23 s | 717 ms |
|
||||
| zstd 12 | 58.51 MB | 6.19% | 15.43 s | 695 ms |
|
||||
| zstd 13 | 56.16 MB | 5.94% | 35.55 s | 642 ms |
|
||||
| zstd 14 | 55.76 MB | 5.89% | 37.74 s | 627 ms |
|
||||
| zstd 15 | 54.65 MB | 5.78% | 1:01 | 600 ms |
|
||||
| zstd 16 | 50.94 MB | 5.38% | 1:34 | 537 ms |
|
||||
| zstd 17 | 50.18 MB | 5.30% | 1:44 | 542 ms |
|
||||
| zstd 18 | 49.91 MB | 5.28% | 2:17 | 554 ms |
|
||||
| zstd 19 | 46.99 MB | 4.97% | 7:09 | 605 ms |
|
||||
| zstd 20 | 46.81 MB | 4.95% | 7:08 | 608 ms |
|
||||
| zstd 21 | 45.77 MB | 4.84% | 13:01 | 614 ms |
|
||||
| zstd 22 | 45.52 MB | 4.81% | 15:11 | 621 ms |
|
||||
|
||||
: Compression results for an example trace.\
|
||||
Tests performed on Ryzen 9 3900X.
|
||||
:::
|
||||
_Compression results for an example trace.\
|
||||
Tests performed on Ryzen 9 3900X._
|
||||
|
||||
<figure id="savetime">
|
||||
<div class="minipage">
|
||||
<figure id="savesize" data-latex-placement="H">
|
||||
|
||||
<figcaption>Plot of trace sizes for different compression modes (see table <a href="#compressiontimes">8</a>).</figcaption>
|
||||
</figure>
|
||||
</div>
|
||||
<div class="minipage">
|
||||
<figure id="savetime" data-latex-placement="H">
|
||||
|
||||
<figcaption>Logarithmic plot of trace compression times for different compression modes (see table <a href="#compressiontimes">8</a>).</figcaption>
|
||||
</figure>
|
||||
</div>
|
||||
<figcaption>Logarithmic plot of trace compression times for different compression modes (see table <a href="#compressiontimes">8</a>).</figcaption>
|
||||
</figure>
|
||||
|
||||
@@ -3068,37 +3040,33 @@ Saving and loading trace data can be parallelized using the `-j streams` paramet
|
||||
|
||||
Going overboard with the number of streams is not recommended, especially with the fast compression modes where it will be difficult to keep each stream busy. Also, complex compression codecs (e.g. zstd at level 22) have significantly worse compression rates when the work is divided. This is a fairly nuanced topic, and you are encouraged to do your own measurements, but for a rough guideline on the behavior, you can refer to tables [9](#streamsize) and [10](#streamspeedup).
|
||||
|
||||
::: {#streamsize}
|
||||
**4** **8** **16** **32**
|
||||
--------- --------- --------- --------- ---------
|
||||
lz4 100.30% 100.30% 100.61% 102.73%
|
||||
lz4 hc 100.80% 101.20% 101.61% 102.41%
|
||||
lz4 ext 100.40% 101.21% 101.62% 102.02%
|
||||
zstd 1 100.90% 101.36% 101.81% 102.26%
|
||||
zstd 3 100.51% 101.02% 101.53% 102.04%
|
||||
zstd 6 100.55% 101.10% 101.65% 102.75%
|
||||
zstd 9 101.27% 103.16% 105.06% 108.23%
|
||||
zstd 18 103.08% 106.15% 109.23% 115.38%
|
||||
zstd 22 107.08% 113.27% 122.12% 130.97%
|
||||
| | **4** | **8** | **16** | **32** |
|
||||
|:-------:|:-------:|:-------:|:-------:|:-------:|
|
||||
| lz4 | 100.30% | 100.30% | 100.61% | 102.73% |
|
||||
| lz4 hc | 100.80% | 101.20% | 101.61% | 102.41% |
|
||||
| lz4 ext | 100.40% | 101.21% | 101.62% | 102.02% |
|
||||
| zstd 1 | 100.90% | 101.36% | 101.81% | 102.26% |
|
||||
| zstd 3 | 100.51% | 101.02% | 101.53% | 102.04% |
|
||||
| zstd 6 | 100.55% | 101.10% | 101.65% | 102.75% |
|
||||
| zstd 9 | 101.27% | 103.16% | 105.06% | 108.23% |
|
||||
| zstd 18 | 103.08% | 106.15% | 109.23% | 115.38% |
|
||||
| zstd 22 | 107.08% | 113.27% | 122.12% | 130.97% |
|
||||
|
||||
: The increase in file size for different compression modes, as compared to a single stream.
|
||||
:::
|
||||
_The increase in file size for different compression modes, as compared to a single stream._
|
||||
|
||||
::: {#streamspeedup}
|
||||
**4** **8** **16** **32**
|
||||
--------- ------- ------- -------- --------
|
||||
lz4 2.04 2.52 2.11 3.24
|
||||
lz4 hc 3.56 6.73 9.49 15.26
|
||||
lz4 ext 3.38 6.53 9.57 17.03
|
||||
zstd 1 2.24 3.68 3.40 3.37
|
||||
zstd 3 3.23 4.13 4.07 4.50
|
||||
zstd 6 3.52 6.00 6.53 6.95
|
||||
zstd 9 3.10 4.26 5.12 5.40
|
||||
zstd 18 3.22 5.41 8.49 14.51
|
||||
zstd 22 3.99 7.47 11.10 18.20
|
||||
| | **4** | **8** | **16** | **32** |
|
||||
|:-------:|:-----:|:-----:|:------:|:------:|
|
||||
| lz4 | 2.04 | 2.52 | 2.11 | 3.24 |
|
||||
| lz4 hc | 3.56 | 6.73 | 9.49 | 15.26 |
|
||||
| lz4 ext | 3.38 | 6.53 | 9.57 | 17.03 |
|
||||
| zstd 1 | 2.24 | 3.68 | 3.40 | 3.37 |
|
||||
| zstd 3 | 3.23 | 4.13 | 4.07 | 4.50 |
|
||||
| zstd 6 | 3.52 | 6.00 | 6.53 | 6.95 |
|
||||
| zstd 9 | 3.10 | 4.26 | 5.12 | 5.40 |
|
||||
| zstd 18 | 3.22 | 5.41 | 8.49 | 14.51 |
|
||||
| zstd 22 | 3.99 | 7.47 | 11.10 | 18.20 |
|
||||
|
||||
: The speedup (*x* times faster) in saving time for different modes of compression, as compared to a single stream.
|
||||
:::
|
||||
_The speedup (*x* times faster) in saving time for different modes of compression, as compared to a single stream._
|
||||
|
||||
### Frame images dictionary {#fidict}
|
||||
|
||||
@@ -3152,7 +3120,7 @@ The workflow is identical, whether you are viewing a previously saved trace or i
|
||||
|
||||
In most cases Tracy will display an approximation of time value, depending on how big it is. For example, a short time range will be displayed as 123 ns, and some longer ones will be shortened to 123.45 μs, 123.45 ms, 12.34 s, 1:23.4, 12:34:56, or even 1d12:34:56 to indicate more than a day has passed.
|
||||
|
||||
While such a presentation makes time values easy to read, it is not always appropriate. For example, you may have multiple events happen at a time approximated to 1:23.4, giving you the precision of only $\sfrac{1}{10}$ of a second. And there's certainly a lot that can happen in 100 ms.
|
||||
While such a presentation makes time values easy to read, it is not always appropriate. For example, you may have multiple events happen at a time approximated to 1:23.4, giving you the precision of only 1/10 of a second. And there's certainly a lot that can happen in 100 ms.
|
||||
|
||||
An alternative time display is used in appropriate places to solve this problem. It combines a day--hour--minute--second value with full nanosecond resolution, resulting in values such as 1:23 456,789,012 ns.
|
||||
|
||||
@@ -4314,8 +4282,8 @@ You need to take special care when reading call stacks. Contrary to their name,
|
||||
|
||||
Let's say you are looking at the call stack of some function called within `Application::Run`. This is the result you might get:
|
||||
|
||||
0. @\ldots@
|
||||
1. @\ldots@
|
||||
0. …
|
||||
1. …
|
||||
2. Application::Run
|
||||
3. std::unique_ptr<Application>::reset
|
||||
4. main
|
||||
@@ -4513,9 +4481,9 @@ As described in chapter [3.17.6](#hardwaresampling), on some platforms, Tracy c
|
||||
|
||||
- *Cycles* -- an option very similar to the *sample count*, but the data is collected directly by the CPU hardware counters. This may make the results more reliable.
|
||||
|
||||
- *Branch impact* -- indicates places where many branch instructions are issued, and at the same time, incorrectly predicted. Calculated as $\sqrt{\text{\#branch instructions}*\text{\#branch misses}}$. This is more useful than the raw branch miss rate, as it considers the number of events taking place.
|
||||
- *Branch impact* -- indicates places where many branch instructions are issued, and at the same time, incorrectly predicted. Calculated as √(#branch instructions\*#branch misses). This is more useful than the raw branch miss rate, as it considers the number of events taking place.
|
||||
|
||||
- *Cache impact* -- similar to *branch impact*, but it shows cache miss data instead. These values are calculated as $\sqrt{\text{\#cache references}*\text{\#cache misses}}$ and will highlight places with lots of cache accesses that also miss.
|
||||
- *Cache impact* -- similar to *branch impact*, but it shows cache miss data instead. These values are calculated as √(#cache references\*#cache misses) and will highlight places with lots of cache accesses that also miss.
|
||||
|
||||
- The rest of the available selections just show raw values gathered from the hardware counters. These are: *Retirements*, *Branches taken*, *Branch miss*, *Cache access* and *Cache miss*.
|
||||
|
||||
@@ -4570,7 +4538,7 @@ This window presents information and statistics about a lock. The lock events co
|
||||
|
||||
You may view a live replay of the profiled application screen captures (see section [3.3.3](#frameimages)) using this window. Playback is controlled by the * Play* and * Pause* buttons and the *Frame image* slider can be used to scrub to the desired timestamp. Alternatively you may use the ** and ** buttons to change single frame back or forward.
|
||||
|
||||
If the *Sync timeline* option is selected, the profiler will focus the timeline view on the frame corresponding to the currently displayed screenshot. The *Zoom 2$\times$* option enlarges the image for easier viewing.
|
||||
If the *Sync timeline* option is selected, the profiler will focus the timeline view on the frame corresponding to the currently displayed screenshot. The *Zoom 2×* option enlarges the image for easier viewing.
|
||||
|
||||
The following parameters also accompany each displayed frame image: *timestamp*, showing at which time the image was captured, *frame*, displaying the numerical value of the corresponding frame, and *ratio*, telling how well the in-memory loss-less compression was able to reduce the image data size.
|
||||
|
||||
@@ -4748,7 +4716,7 @@ So, which model should you run and what hardware you need to be able to do so? L
|
||||
|
||||
As a rule of thumb, the specified number of parameters is how much total memory is needed to run the model with 8-bit quantization. Another way to get a rough estimate is to look at the model file size. Strive to fit the active parameters completely into VRAM, leaving space for computation scratch space and the context.
|
||||
|
||||
To make this practical, the 35B-A3B model at 2 bit quantization requires $35 * 2 / 8 = 8.75$ GB, which fits into the 4 + 16 GB budget in the example above. The 3B active parameters similarly calculate to 0.75 GB, with additional 1 GB or so needed for computation buffer and another 1 GB for the 50K context, which is less than the 4 GB of VRAM available, making everything fit.
|
||||
To make this practical, the 35B-A3B model at 2 bit quantization requires 35 \* 2 / 8 = 8.75 GB, which fits into the 4 + 16 GB budget in the example above. The 3B active parameters similarly calculate to 0.75 GB, with additional 1 GB or so needed for computation buffer and another 1 GB for the 50K context, which is less than the 4 GB of VRAM available, making everything fit.
|
||||
|
||||
## Usage {#llmusage}
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
\usepackage{verbatim}
|
||||
\usepackage[hyphens]{url}
|
||||
\usepackage{hyperref} % For hyperlinks in the PDF
|
||||
\usepackage{fontawesome6}
|
||||
\usepackage{fontawesome7}
|
||||
\usepackage[os=win]{menukeys}
|
||||
\usepackage{xfrac}
|
||||
\usepackage[euler]{textgreek}
|
||||
@@ -1701,6 +1701,12 @@ logo=\bcattention
|
||||
\end{itemize}
|
||||
\end{bclogo}
|
||||
|
||||
\subparagraph{Calibrated context}
|
||||
|
||||
By default, the OpenGL context is uncalibrated: the CPU and GPU clocks are aligned only once, when the context is created, so over long captures the two time domains may drift apart (section~\ref{options} describes correcting this drift manually). Defining \texttt{TRACY\_OPENGL\_AUTO\_CALIBRATION} before including \texttt{TracyOpenGL.hpp} enables periodic recalibration instead: roughly once per second Tracy samples the GPU and CPU clocks together and emits a calibration event, allowing the profiler to track and remove the drift automatically.
|
||||
|
||||
This is opt-in because OpenGL exposes no atomic CPU+GPU timestamp query (unlike Vulkan's \texttt{VK\_EXT\_calibrated\_timestamps} or Direct3D~12, whose contexts are always calibrated). Recalibration therefore reads the GPU clock with \texttt{glGetInteger64v(GL\_TIMESTAMP)}, which forces a CPU/GPU synchronization (a pipeline stall) each time it runs. Enable it only when the improved long-capture alignment is worth the periodic stall.
|
||||
|
||||
\subsubsection{Vulkan}
|
||||
|
||||
Similarly, for Vulkan support you should include the \texttt{public/tracy/TracyVulkan.hpp} header file. Tracing Vulkan devices and queues is a bit more involved, and the Vulkan initialization macro \texttt{TracyVkContext(physdev, device, queue, cmdbuf)} returns an instance of \texttt{TracyVkCtx} object, which tracks an associated Vulkan queue. Cleanup is performed using the \texttt{TracyVkDestroy(ctx)} macro. You may create multiple Vulkan contexts. To set a custom name for the context, use the \texttt{TracyVkContextName(ctx, name, size)} macro.
|
||||
@@ -2041,6 +2047,20 @@ filesystem setup as the one used to run the tracy instrumented application).
|
||||
You can do path substitution with the \texttt{-p} option to perform any number of path
|
||||
substitions in order to use symbols located elsewhere.
|
||||
|
||||
By default symbol resolution is performed with the platform's native facility: the DbgHelp
|
||||
library on Windows, and the \texttt{addr2line} tool found in \texttt{PATH} elsewhere. You can
|
||||
override this with the \texttt{-a} option, passing the path to a custom
|
||||
\texttt{addr2line}-compatible tool (for instance an \texttt{addr2line} from a cross-compilation
|
||||
toolchain, or \texttt{llvm-addr2line}). The \texttt{-a} option works on all platforms, including
|
||||
Windows, and takes precedence over the platform default.
|
||||
|
||||
Extra arguments can be passed verbatim to the resolution tool with the \texttt{-A} option. Tracy
|
||||
records callstack frame offsets relative to the image base, but \texttt{addr2line}-compatible
|
||||
tools expect a full virtual address for images that have a non-zero preferred image base (such as
|
||||
PE on Windows or Mach-O on Apple). For these, pass \texttt{-A "--relative-address"} so that
|
||||
\texttt{llvm-addr2line} or \texttt{llvm-symbolizer} adds the image base back. ELF images need no
|
||||
such adjustment.
|
||||
|
||||
\begin{bclogo}[
|
||||
noborder=true,
|
||||
couleur=black!5,
|
||||
|
||||
@@ -135,6 +135,10 @@ if get_option('ignore_memory_faults')
|
||||
tracy_common_args += ['-DTRACY_IGNORE_MEMORY_FAULTS']
|
||||
endif
|
||||
|
||||
if get_option('opengl_auto_calibration')
|
||||
tracy_common_args += ['-DTRACY_OPENGL_AUTO_CALIBRATION']
|
||||
endif
|
||||
|
||||
tracy_shared_libs = get_option('default_library') == 'shared'
|
||||
|
||||
if tracy_shared_libs
|
||||
|
||||
@@ -29,3 +29,4 @@ option('verbose', type : 'boolean', value : false, description : 'Enable verbose
|
||||
option('no_internal_message', type : 'boolean', value : false, description : 'Prevent the profiler from logging messages')
|
||||
option('debuginfod', type : 'boolean', value : false, description : 'Enable debuginfod support')
|
||||
option('ignore_memory_faults', type : 'boolean', value : false, description : 'Ignore instrumentation errors from memory free events that do not have a matching allocation')
|
||||
option('opengl_auto_calibration', type : 'boolean', value : false, description : 'Periodically recalibrate OpenGL GPU/CPU clock drift (forces a CPU/GPU sync each time)')
|
||||
|
||||
@@ -149,15 +149,30 @@ Embed(PROFILER_FILES SystemPrompt src/llm/system.prompt.md)
|
||||
Embed(PROFILER_FILES SkillCallstack src/llm/skill.callstack.md)
|
||||
Embed(PROFILER_FILES SkillOptimization src/llm/skill.optimization.md)
|
||||
Embed(PROFILER_FILES ToolsJson src/llm/tools.json)
|
||||
|
||||
Embed(PROFILER_FILES FontFixed src/font/FiraCode-Retina.ttf)
|
||||
Embed(PROFILER_FILES FontIcons src/font/Font\ Awesome\ 6\ Free-Solid-900.otf)
|
||||
Embed(PROFILER_FILES FontIcons src/font/Font\ Awesome\ 7\ Free-Solid-900.otf)
|
||||
Embed(PROFILER_FILES FontNormal src/font/Roboto-Regular.ttf)
|
||||
Embed(PROFILER_FILES FontBold src/font/Roboto-Bold.ttf)
|
||||
Embed(PROFILER_FILES FontItalic src/font/Roboto-Italic.ttf)
|
||||
Embed(PROFILER_FILES FontBoldItalic src/font/Roboto-BoldItalic.ttf)
|
||||
Embed(PROFILER_FILES FontEmoji src/font/NotoEmoji-Regular.ttf)
|
||||
|
||||
Embed(PROFILER_FILES Manual ../manual/tracy.md)
|
||||
|
||||
Embed(PROFILER_FILES Text100Million src/achievements/100Million.md)
|
||||
Embed(PROFILER_FILES TextConnectToClient src/achievements/ConnectToClient.md)
|
||||
Embed(PROFILER_FILES TextFindZone src/achievements/FindZone.md)
|
||||
Embed(PROFILER_FILES TextFrameImages src/achievements/FrameImages.md)
|
||||
Embed(PROFILER_FILES TextGlobalSettings src/achievements/GlobalSettings.md)
|
||||
Embed(PROFILER_FILES TextInstrumentationIntro src/achievements/InstrumentationIntro.md)
|
||||
Embed(PROFILER_FILES TextInstrumentationStatistics src/achievements/InstrumentationStatistics.md)
|
||||
Embed(PROFILER_FILES TextInstrumentFrames src/achievements/InstrumentFrames.md)
|
||||
Embed(PROFILER_FILES TextIntro src/achievements/Intro.md)
|
||||
Embed(PROFILER_FILES TextLoadTrace src/achievements/LoadTrace.md)
|
||||
Embed(PROFILER_FILES TextSamplingIntro src/achievements/SamplingIntro.md)
|
||||
Embed(PROFILER_FILES TextSaveTrace src/achievements/SaveTrace.md)
|
||||
|
||||
set(INCLUDES "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
set(LIBS "")
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
#include <misc/freetype/imgui_freetype.h>
|
||||
|
||||
#include "Fonts.hpp"
|
||||
#include "profiler/IconsFontAwesome6.h"
|
||||
#include "profiler/TracyEmbed.hpp"
|
||||
|
||||
#include "data/FontFixed.hpp"
|
||||
|
||||
12
profiler/src/achievements/100Million.md
Normal file
12
profiler/src/achievements/100Million.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# It's over 100 million!
|
||||
|
||||
Tracy can handle a lot of data. How about 100 million zones in a single trace? Add a lot of zones to your program and see how it handles it!
|
||||
|
||||
Capturing a long-running profile trace is easy. Need to profile an hour of your program execution? You can do it.
|
||||
|
||||
Note that it doesn't make much sense to instrument every little function you might have. The cost of the instrumentation itself will be higher than the cost of the function in such a case.
|
||||
|
||||
> [!TIP]
|
||||
> Keep in mind that the more zones you have, the more memory and CPU time the profiler will use. Be careful not to run out of memory.
|
||||
>
|
||||
> To capture 100 million zones, you will need approximately 4 GB of RAM.
|
||||
10
profiler/src/achievements/ConnectToClient.md
Normal file
10
profiler/src/achievements/ConnectToClient.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# First profiling session
|
||||
|
||||
Let's start our adventure by instrumenting your application and connecting it to the profiler. Here's a quick refresher:
|
||||
|
||||
1. Integrate Tracy Profiler into your application. This can be done using CMake, Meson, or simply by adding the source files to your project.
|
||||
2. Make sure that `TracyClient.cpp` (or the Tracy library) is included in your build.
|
||||
3. Define `TRACY_ENABLE` in your build configuration, for the whole application. Do not do it in a single source file because it won't work.
|
||||
4. Start your application, and * Connect* to it with the profiler.
|
||||
|
||||
Please refer to the [user manual](https://github.com/wolfpld/tracy/releases) for more details.
|
||||
11
profiler/src/achievements/FindZone.md
Normal file
11
profiler/src/achievements/FindZone.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Find some zones
|
||||
|
||||
You can search for zones in the trace by opening the search window with the * Find zone* button on the top bar. It will ask you for the zone name, which in most cases will be the function name in the code.
|
||||
|
||||
The search may find more than one zone with the same name. A list of all the zones found is displayed, and you can select any of them.
|
||||
|
||||
Alternatively, you can open the Statistics window and click an entry there. This will open the Find zone window as if you had searched for that zone.
|
||||
|
||||
When a zone is selected, a number of statistics are displayed to help you understand the performance of your application. In addition, a histogram of the zone execution times is displayed to make it easier for you to determine the performance of the profiled code. Be sure to select a zone with a large number of calls to make the histogram look interesting!
|
||||
|
||||
Note that you can draw a range on the histogram to limit the number of entries displayed in the zone list below. This list allows you to examine each zone individually. There are also a number of zone groupings that you can select. Each group can be selected and the time associated with the selected group will be highlighted on the histogram.
|
||||
11
profiler/src/achievements/FrameImages.md
Normal file
11
profiler/src/achievements/FrameImages.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# A picture is worth a thousand words
|
||||
|
||||
Tracy allows you to add context to each frame, by attaching a screenshot. You can do this with the `FrameImage` macro.
|
||||
|
||||
You will have to do the screen capture and resizing yourself, which can be a bit complicated. The manual provides a sample code that shows how to do this in a performant way.
|
||||
|
||||
The frame images are displayed in the context of a frame, for example, when you hover over the frame in the timeline or in the frame graph at the top of the screen.
|
||||
|
||||
You can even view a recording of what your application was doing by clicking the * Tools* icon and then selecting the * Playback* option. Try it out!
|
||||
|
||||
The `FrameImage` macro is a great way to see what happened in your application at a particular time. Maybe you have a performance problem that only occurs when a certain object is on the screen?
|
||||
5
profiler/src/achievements/GlobalSettings.md
Normal file
5
profiler/src/achievements/GlobalSettings.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Global settings
|
||||
|
||||
Tracy has a variety of settings that can be adjusted to suit your needs. These settings can be found by clicking on the * Wrench* icon on the welcome screen. This will open the about window, where you can expand the * Global settings* menu.
|
||||
|
||||
The settings are saved between sessions, so you only need to set them once.
|
||||
22
profiler/src/achievements/InstrumentFrames.md
Normal file
22
profiler/src/achievements/InstrumentFrames.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Instrumenting frames
|
||||
|
||||
In addition to instrumenting functions, you can also instrument frames. This allows you to see how much time is spent in each frame of your application.
|
||||
|
||||
To instrument frames, you need to add the `FrameMark` macro at the beginning of each frame. This can be done in the main loop of your application, or in a separate function that is called at the beginning of each frame.
|
||||
|
||||
```c++
|
||||
#include "Tracy.hpp"
|
||||
|
||||
void Render()
|
||||
{
|
||||
// Render the frame
|
||||
SwapBuffers();
|
||||
FrameMark;
|
||||
}
|
||||
```
|
||||
|
||||
When you profile your application, you will see a new frame appear on the timeline each time the `FrameMark` macro is called. This allows you to see how much time is spent in each frame and how many frames are rendered per second.
|
||||
|
||||
The `FrameMark` macro is a great way to see at a glance how your application is performing over time. Maybe there are some performance problems that only appear after a few minutes of running the application? A frame graph is drawn at the top of the profiler window where you can see the timing of all frames.
|
||||
|
||||
Note that some applications do not have a frame-based structure, and in such cases, frame instrumentation may not be useful. That's ok.
|
||||
22
profiler/src/achievements/InstrumentationIntro.md
Normal file
22
profiler/src/achievements/InstrumentationIntro.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Instrumentating your application
|
||||
|
||||
Instrumentation is a powerful feature that allows you to see the exact runtime of each call to the selected set of functions. The downside is that it takes a bit of manual work to get it set up.
|
||||
|
||||
To get started, open a source file and include the `Tracy.hpp` header. This will give you access to a variety of macros provided by Tracy. Next, add the `ZoneScoped` macro to the beginning of one of your functions, like this:
|
||||
|
||||
```c++
|
||||
#include "Tracy.hpp"
|
||||
|
||||
void SomeFunction()
|
||||
{
|
||||
ZoneScoped;
|
||||
// Your code here
|
||||
}
|
||||
```
|
||||
|
||||
Now, when you profile your application, you will see a new zone appear on the timeline for each call to the function. This allows you to see how much time is spent in each call and how many times the function is called.
|
||||
|
||||
> [!NOTE]
|
||||
> The `ZoneScoped` macro is just one of the many macros provided by Tracy. See the documentation for more information.
|
||||
|
||||
The above description applies to C++ code, but things are done similarly in other programming languages. Refer to the documentation for your language for more information.
|
||||
5
profiler/src/achievements/InstrumentationStatistics.md
Normal file
5
profiler/src/achievements/InstrumentationStatistics.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Show me the stats!
|
||||
|
||||
Once you have instrumented your application, you can view the statistics for each zone in the timeline. This allows you to see how much time is spent in each zone and how many times it is called.
|
||||
|
||||
To view the statistics, click on the * Statistics* button on the top bar. This will open a new window with a list of all zones in the trace.
|
||||
12
profiler/src/achievements/Intro.md
Normal file
12
profiler/src/achievements/Intro.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# Click here to discover achievements!
|
||||
|
||||
Clicking on the * Achievements* button opens the Achievements List. Here you can see the tasks to be completed along with a short description of what needs to be done.
|
||||
|
||||
As you complete each Achievement, new Achievements will appear, so be sure to keep checking the list for new ones!
|
||||
|
||||
To make the new things easier to spot, the Achievements List will show a marker next to them. The achievements * Achievements* button will glow yellow when there are new things to see.
|
||||
|
||||
- New tasks: orange
|
||||
- Completed tasks: green
|
||||
|
||||
Good luck!
|
||||
3
profiler/src/achievements/LoadTrace.md
Normal file
3
profiler/src/achievements/LoadTrace.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Load a trace
|
||||
|
||||
You can open a previously saved trace file (or one received from a friend) with the * Open saved trace* button on the welcome screen.
|
||||
10
profiler/src/achievements/SamplingIntro.md
Normal file
10
profiler/src/achievements/SamplingIntro.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Sampling program execution
|
||||
|
||||
Sampling program execution is a great way to find out where the hot spots are in your program. It can be used to find out which functions take the most time, or which lines of code are executed the most often.
|
||||
|
||||
While instrumentation requires changes to your code, sampling does not. However, because of the way it works, the results are coarser and it's not possible to know when functions are called or when they return.
|
||||
|
||||
Sampling is automatic on Linux. On Windows, you must run the profiled application as an administrator for it to work.
|
||||
|
||||
> [!WARNING]
|
||||
> Depending on your system configuration, some additional steps may be required. Please refer to the user manual for more information.
|
||||
12
profiler/src/achievements/SaveTrace.md
Normal file
12
profiler/src/achievements/SaveTrace.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# Save a trace
|
||||
|
||||
Now that you have traced your application (or are in the process of doing so), you can save it to disk for future reference. You can do this by clicking on the * Connection* icon in the top left corner of the screen and then clicking on the * Save trace* button.
|
||||
|
||||
Keeping old traces on hand can be beneficial, as you can compare the performance of your optimizations with what you had before.
|
||||
|
||||
You can also share the trace with your friends or co-workers by sending them the trace file.
|
||||
|
||||
> [!WARNING]
|
||||
> **Warning**
|
||||
>
|
||||
> Trace files can contain sensitive information about your application, such as program code, or even the contents of source files. Be careful when sharing them with others.
|
||||
Binary file not shown.
BIN
profiler/src/font/Font Awesome 7 Free-Solid-900.otf
Normal file
BIN
profiler/src/font/Font Awesome 7 Free-Solid-900.otf
Normal file
Binary file not shown.
@@ -39,7 +39,7 @@
|
||||
#include "profiler/TracyTexture.hpp"
|
||||
#include "profiler/TracyView.hpp"
|
||||
#include "profiler/TracyWeb.hpp"
|
||||
#include "profiler/IconsFontAwesome6.h"
|
||||
#include "profiler/IconsFontAwesome7.h"
|
||||
#include "../../server/tracy_pdqsort.h"
|
||||
#include "../../server/tracy_robin_hood.h"
|
||||
#include "../../server/TracyFileHeader.hpp"
|
||||
@@ -1466,9 +1466,17 @@ Would you like to enable achievements?
|
||||
{
|
||||
ImGui::Columns( 2 );
|
||||
ImGui::SetColumnWidth( 0, 300 * dpiScale );
|
||||
ImGui::BeginChild( "##achievementtoc", ImVec2( 0, 0 ), ImGuiChildFlags_AlwaysUseWindowPadding );
|
||||
DrawAchievements( c->items );
|
||||
ImGui::EndChild();
|
||||
ImGui::NextColumn();
|
||||
if( s_achievementItem ) s_achievementItem->description();
|
||||
ImGui::BeginChild( "##achievementtext", ImVec2( 0, 0 ), ImGuiChildFlags_AlwaysUseWindowPadding );
|
||||
if( s_achievementItem )
|
||||
{
|
||||
tracy::Markdown md( nullptr, nullptr );
|
||||
md.Print( s_achievementItem->text.c_str(), s_achievementItem->text.size() );
|
||||
}
|
||||
ImGui::EndChild();
|
||||
ImGui::EndColumns();
|
||||
ImGui::EndTabItem();
|
||||
}
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
// Generated by https://github.com/juliettef/IconFontCppHeaders script GenerateIconFontCppHeaders.py for languages C and C++
|
||||
// from https://github.com/FortAwesome/Font-Awesome/raw/6.x/metadata/icons.yml
|
||||
// for use with https://github.com/FortAwesome/Font-Awesome/blob/6.x/webfonts/fa-regular-400.ttf, https://github.com/FortAwesome/Font-Awesome/blob/6.x/webfonts/fa-solid-900.ttf
|
||||
// Generated by https://github.com/juliettef/IconFontCppHeaders script GenerateIconFontCppHeaders.py
|
||||
// for C and C++
|
||||
// from codepoints https://github.com/FortAwesome/Font-Awesome/raw/7.x/metadata/icons.yml
|
||||
// for use with font https://github.com/FortAwesome/Font-Awesome/blob/7.x/webfonts/fa-regular-400.woff2 (You may need to convert the .woff2 files to .ttf depending upon your loader.), https://github.com/FortAwesome/Font-Awesome/blob/7.x/webfonts/fa-solid-900.woff2 (You may need to convert the .woff2 files to .ttf depending upon your loader.)
|
||||
|
||||
#pragma once
|
||||
|
||||
#define FONT_ICON_FILE_NAME_FAR "fa-regular-400.ttf"
|
||||
#define FONT_ICON_FILE_NAME_FAS "fa-solid-900.ttf"
|
||||
#define FONT_ICON_FILE_NAME_FAR "fa-regular-400.woff2"
|
||||
#define FONT_ICON_FILE_NAME_FAS "fa-solid-900.woff2"
|
||||
|
||||
#define ICON_MIN_FA 0xe005
|
||||
#define ICON_MAX_16_FA 0xf8ff
|
||||
#define ICON_MAX_FA 0xf8ff
|
||||
|
||||
#define ICON_FA_0 "0" // U+0030
|
||||
#define ICON_FA_1 "1" // U+0031
|
||||
#define ICON_FA_2 "2" // U+0032
|
||||
@@ -22,6 +25,7 @@
|
||||
#define ICON_FA_A "A" // U+0041
|
||||
#define ICON_FA_ADDRESS_BOOK "\xef\x8a\xb9" // U+f2b9
|
||||
#define ICON_FA_ADDRESS_CARD "\xef\x8a\xbb" // U+f2bb
|
||||
#define ICON_FA_ALARM_CLOCK "\xef\x8d\x8e" // U+f34e
|
||||
#define ICON_FA_ALIGN_CENTER "\xef\x80\xb7" // U+f037
|
||||
#define ICON_FA_ALIGN_JUSTIFY "\xef\x80\xb9" // U+f039
|
||||
#define ICON_FA_ALIGN_LEFT "\xef\x80\xb6" // U+f036
|
||||
@@ -41,7 +45,9 @@
|
||||
#define ICON_FA_ANGLES_UP "\xef\x84\x82" // U+f102
|
||||
#define ICON_FA_ANKH "\xef\x99\x84" // U+f644
|
||||
#define ICON_FA_APPLE_WHOLE "\xef\x97\x91" // U+f5d1
|
||||
#define ICON_FA_AQUARIUS "\xee\xa1\x85" // U+e845
|
||||
#define ICON_FA_ARCHWAY "\xef\x95\x97" // U+f557
|
||||
#define ICON_FA_ARIES "\xee\xa1\x86" // U+e846
|
||||
#define ICON_FA_ARROW_DOWN "\xef\x81\xa3" // U+f063
|
||||
#define ICON_FA_ARROW_DOWN_1_9 "\xef\x85\xa2" // U+f162
|
||||
#define ICON_FA_ARROW_DOWN_9_1 "\xef\xa2\x86" // U+f886
|
||||
@@ -116,6 +122,7 @@
|
||||
#define ICON_FA_BAN "\xef\x81\x9e" // U+f05e
|
||||
#define ICON_FA_BAN_SMOKING "\xef\x95\x8d" // U+f54d
|
||||
#define ICON_FA_BANDAGE "\xef\x91\xa2" // U+f462
|
||||
#define ICON_FA_BANGLADESHI_TAKA_SIGN "\xee\x8b\xa6" // U+e2e6
|
||||
#define ICON_FA_BARCODE "\xef\x80\xaa" // U+f02a
|
||||
#define ICON_FA_BARS "\xef\x83\x89" // U+f0c9
|
||||
#define ICON_FA_BARS_PROGRESS "\xef\xa0\xa8" // U+f828
|
||||
@@ -214,6 +221,7 @@
|
||||
#define ICON_FA_BURGER "\xef\xa0\x85" // U+f805
|
||||
#define ICON_FA_BURST "\xee\x93\x9c" // U+e4dc
|
||||
#define ICON_FA_BUS "\xef\x88\x87" // U+f207
|
||||
#define ICON_FA_BUS_SIDE "\xee\xa0\x9d" // U+e81d
|
||||
#define ICON_FA_BUS_SIMPLE "\xef\x95\x9e" // U+f55e
|
||||
#define ICON_FA_BUSINESS_TIME "\xef\x99\x8a" // U+f64a
|
||||
#define ICON_FA_C "C" // U+0043
|
||||
@@ -232,8 +240,10 @@
|
||||
#define ICON_FA_CAMERA_RETRO "\xef\x82\x83" // U+f083
|
||||
#define ICON_FA_CAMERA_ROTATE "\xee\x83\x98" // U+e0d8
|
||||
#define ICON_FA_CAMPGROUND "\xef\x9a\xbb" // U+f6bb
|
||||
#define ICON_FA_CANCER "\xee\xa1\x87" // U+e847
|
||||
#define ICON_FA_CANDY_CANE "\xef\x9e\x86" // U+f786
|
||||
#define ICON_FA_CANNABIS "\xef\x95\x9f" // U+f55f
|
||||
#define ICON_FA_CAPRICORN "\xee\xa1\x88" // U+e848
|
||||
#define ICON_FA_CAPSULES "\xef\x91\xab" // U+f46b
|
||||
#define ICON_FA_CAR "\xef\x86\xb9" // U+f1b9
|
||||
#define ICON_FA_CAR_BATTERY "\xef\x97\x9f" // U+f5df
|
||||
@@ -266,6 +276,7 @@
|
||||
#define ICON_FA_CHART_AREA "\xef\x87\xbe" // U+f1fe
|
||||
#define ICON_FA_CHART_BAR "\xef\x82\x80" // U+f080
|
||||
#define ICON_FA_CHART_COLUMN "\xee\x83\xa3" // U+e0e3
|
||||
#define ICON_FA_CHART_DIAGRAM "\xee\x9a\x95" // U+e695
|
||||
#define ICON_FA_CHART_GANTT "\xee\x83\xa4" // U+e0e4
|
||||
#define ICON_FA_CHART_LINE "\xef\x88\x81" // U+f201
|
||||
#define ICON_FA_CHART_PIE "\xef\x88\x80" // U+f200
|
||||
@@ -287,9 +298,9 @@
|
||||
#define ICON_FA_CHEVRON_RIGHT "\xef\x81\x94" // U+f054
|
||||
#define ICON_FA_CHEVRON_UP "\xef\x81\xb7" // U+f077
|
||||
#define ICON_FA_CHILD "\xef\x86\xae" // U+f1ae
|
||||
#define ICON_FA_CHILD_COMBATANT "\xee\x93\xa0" // U+e4e0
|
||||
#define ICON_FA_CHILD_DRESS "\xee\x96\x9c" // U+e59c
|
||||
#define ICON_FA_CHILD_REACHING "\xee\x96\x9d" // U+e59d
|
||||
#define ICON_FA_CHILD_RIFLE "\xee\x93\xa0" // U+e4e0
|
||||
#define ICON_FA_CHILDREN "\xee\x93\xa1" // U+e4e1
|
||||
#define ICON_FA_CHURCH "\xef\x94\x9d" // U+f51d
|
||||
#define ICON_FA_CIRCLE "\xef\x84\x91" // U+f111
|
||||
@@ -334,6 +345,7 @@
|
||||
#define ICON_FA_CLOCK_ROTATE_LEFT "\xef\x87\x9a" // U+f1da
|
||||
#define ICON_FA_CLONE "\xef\x89\x8d" // U+f24d
|
||||
#define ICON_FA_CLOSED_CAPTIONING "\xef\x88\x8a" // U+f20a
|
||||
#define ICON_FA_CLOSED_CAPTIONING_SLASH "\xee\x84\xb5" // U+e135
|
||||
#define ICON_FA_CLOUD "\xef\x83\x82" // U+f0c2
|
||||
#define ICON_FA_CLOUD_ARROW_DOWN "\xef\x83\xad" // U+f0ed
|
||||
#define ICON_FA_CLOUD_ARROW_UP "\xef\x83\xae" // U+f0ee
|
||||
@@ -360,6 +372,7 @@
|
||||
#define ICON_FA_COMMENT_DOLLAR "\xef\x99\x91" // U+f651
|
||||
#define ICON_FA_COMMENT_DOTS "\xef\x92\xad" // U+f4ad
|
||||
#define ICON_FA_COMMENT_MEDICAL "\xef\x9f\xb5" // U+f7f5
|
||||
#define ICON_FA_COMMENT_NODES "\xee\x9a\x96" // U+e696
|
||||
#define ICON_FA_COMMENT_SLASH "\xef\x92\xb3" // U+f4b3
|
||||
#define ICON_FA_COMMENT_SMS "\xef\x9f\x8d" // U+f7cd
|
||||
#define ICON_FA_COMMENTS "\xef\x82\x86" // U+f086
|
||||
@@ -522,6 +535,8 @@
|
||||
#define ICON_FA_FILE_CSV "\xef\x9b\x9d" // U+f6dd
|
||||
#define ICON_FA_FILE_EXCEL "\xef\x87\x83" // U+f1c3
|
||||
#define ICON_FA_FILE_EXPORT "\xef\x95\xae" // U+f56e
|
||||
#define ICON_FA_FILE_FRAGMENT "\xee\x9a\x97" // U+e697
|
||||
#define ICON_FA_FILE_HALF_DASHED "\xee\x9a\x98" // U+e698
|
||||
#define ICON_FA_FILE_IMAGE "\xef\x87\x85" // U+f1c5
|
||||
#define ICON_FA_FILE_IMPORT "\xef\x95\xaf" // U+f56f
|
||||
#define ICON_FA_FILE_INVOICE "\xef\x95\xb0" // U+f570
|
||||
@@ -585,6 +600,7 @@
|
||||
#define ICON_FA_GEAR "\xef\x80\x93" // U+f013
|
||||
#define ICON_FA_GEARS "\xef\x82\x85" // U+f085
|
||||
#define ICON_FA_GEM "\xef\x8e\xa5" // U+f3a5
|
||||
#define ICON_FA_GEMINI "\xee\xa1\x89" // U+e849
|
||||
#define ICON_FA_GENDERLESS "\xef\x88\xad" // U+f22d
|
||||
#define ICON_FA_GHOST "\xef\x9b\xa2" // U+f6e2
|
||||
#define ICON_FA_GIFT "\xef\x81\xab" // U+f06b
|
||||
@@ -642,8 +658,6 @@
|
||||
#define ICON_FA_HANDS_PRAYING "\xef\x9a\x84" // U+f684
|
||||
#define ICON_FA_HANDSHAKE "\xef\x8a\xb5" // U+f2b5
|
||||
#define ICON_FA_HANDSHAKE_ANGLE "\xef\x93\x84" // U+f4c4
|
||||
#define ICON_FA_HANDSHAKE_SIMPLE "\xef\x93\x86" // U+f4c6
|
||||
#define ICON_FA_HANDSHAKE_SIMPLE_SLASH "\xee\x81\x9f" // U+e05f
|
||||
#define ICON_FA_HANDSHAKE_SLASH "\xee\x81\xa0" // U+e060
|
||||
#define ICON_FA_HANUKIAH "\xef\x9b\xa6" // U+f6e6
|
||||
#define ICON_FA_HARD_DRIVE "\xef\x82\xa0" // U+f0a0
|
||||
@@ -657,7 +671,6 @@
|
||||
#define ICON_FA_HEAD_SIDE_VIRUS "\xee\x81\xa4" // U+e064
|
||||
#define ICON_FA_HEADING "\xef\x87\x9c" // U+f1dc
|
||||
#define ICON_FA_HEADPHONES "\xef\x80\xa5" // U+f025
|
||||
#define ICON_FA_HEADPHONES_SIMPLE "\xef\x96\x8f" // U+f58f
|
||||
#define ICON_FA_HEADSET "\xef\x96\x90" // U+f590
|
||||
#define ICON_FA_HEART "\xef\x80\x84" // U+f004
|
||||
#define ICON_FA_HEART_CIRCLE_BOLT "\xee\x93\xbc" // U+e4fc
|
||||
@@ -672,6 +685,9 @@
|
||||
#define ICON_FA_HELICOPTER_SYMBOL "\xee\x94\x82" // U+e502
|
||||
#define ICON_FA_HELMET_SAFETY "\xef\xa0\x87" // U+f807
|
||||
#define ICON_FA_HELMET_UN "\xee\x94\x83" // U+e503
|
||||
#define ICON_FA_HEXAGON "\xef\x8c\x92" // U+f312
|
||||
#define ICON_FA_HEXAGON_NODES "\xee\x9a\x99" // U+e699
|
||||
#define ICON_FA_HEXAGON_NODES_BOLT "\xee\x9a\x9a" // U+e69a
|
||||
#define ICON_FA_HIGHLIGHTER "\xef\x96\x91" // U+f591
|
||||
#define ICON_FA_HILL_AVALANCHE "\xee\x94\x87" // U+e507
|
||||
#define ICON_FA_HILL_ROCKSLIDE "\xee\x94\x88" // U+e508
|
||||
@@ -767,8 +783,10 @@
|
||||
#define ICON_FA_LEFT_LONG "\xef\x8c\x8a" // U+f30a
|
||||
#define ICON_FA_LEFT_RIGHT "\xef\x8c\xb7" // U+f337
|
||||
#define ICON_FA_LEMON "\xef\x82\x94" // U+f094
|
||||
#define ICON_FA_LEO "\xee\xa1\x8a" // U+e84a
|
||||
#define ICON_FA_LESS_THAN "<" // U+003c
|
||||
#define ICON_FA_LESS_THAN_EQUAL "\xef\x94\xb7" // U+f537
|
||||
#define ICON_FA_LIBRA "\xee\xa1\x8b" // U+e84b
|
||||
#define ICON_FA_LIFE_RING "\xef\x87\x8d" // U+f1cd
|
||||
#define ICON_FA_LIGHTBULB "\xef\x83\xab" // U+f0eb
|
||||
#define ICON_FA_LINES_LEANING "\xee\x94\x9e" // U+e51e
|
||||
@@ -842,6 +860,7 @@
|
||||
#define ICON_FA_MOBILE_RETRO "\xee\x94\xa7" // U+e527
|
||||
#define ICON_FA_MOBILE_SCREEN "\xef\x8f\x8f" // U+f3cf
|
||||
#define ICON_FA_MOBILE_SCREEN_BUTTON "\xef\x8f\x8d" // U+f3cd
|
||||
#define ICON_FA_MOBILE_VIBRATE "\xee\xa0\x96" // U+e816
|
||||
#define ICON_FA_MONEY_BILL "\xef\x83\x96" // U+f0d6
|
||||
#define ICON_FA_MONEY_BILL_1 "\xef\x8f\x91" // U+f3d1
|
||||
#define ICON_FA_MONEY_BILL_1_WAVE "\xef\x94\xbb" // U+f53b
|
||||
@@ -871,6 +890,7 @@
|
||||
#define ICON_FA_NETWORK_WIRED "\xef\x9b\xbf" // U+f6ff
|
||||
#define ICON_FA_NEUTER "\xef\x88\xac" // U+f22c
|
||||
#define ICON_FA_NEWSPAPER "\xef\x87\xaa" // U+f1ea
|
||||
#define ICON_FA_NON_BINARY "\xee\xa0\x87" // U+e807
|
||||
#define ICON_FA_NOT_EQUAL "\xef\x94\xbe" // U+f53e
|
||||
#define ICON_FA_NOTDEF "\xee\x87\xbe" // U+e1fe
|
||||
#define ICON_FA_NOTE_STICKY "\xef\x89\x89" // U+f249
|
||||
@@ -878,6 +898,7 @@
|
||||
#define ICON_FA_O "O" // U+004f
|
||||
#define ICON_FA_OBJECT_GROUP "\xef\x89\x87" // U+f247
|
||||
#define ICON_FA_OBJECT_UNGROUP "\xef\x89\x88" // U+f248
|
||||
#define ICON_FA_OCTAGON "\xef\x8c\x86" // U+f306
|
||||
#define ICON_FA_OIL_CAN "\xef\x98\x93" // U+f613
|
||||
#define ICON_FA_OIL_WELL "\xee\x94\xb2" // U+e532
|
||||
#define ICON_FA_OM "\xef\x99\xb9" // U+f679
|
||||
@@ -906,6 +927,7 @@
|
||||
#define ICON_FA_PEN_RULER "\xef\x96\xae" // U+f5ae
|
||||
#define ICON_FA_PEN_TO_SQUARE "\xef\x81\x84" // U+f044
|
||||
#define ICON_FA_PENCIL "\xef\x8c\x83" // U+f303
|
||||
#define ICON_FA_PENTAGON "\xee\x9e\x90" // U+e790
|
||||
#define ICON_FA_PEOPLE_ARROWS "\xee\x81\xa8" // U+e068
|
||||
#define ICON_FA_PEOPLE_CARRY_BOX "\xef\x93\x8e" // U+f4ce
|
||||
#define ICON_FA_PEOPLE_GROUP "\xee\x94\xb3" // U+e533
|
||||
@@ -968,8 +990,10 @@
|
||||
#define ICON_FA_PHONE_SLASH "\xef\x8f\x9d" // U+f3dd
|
||||
#define ICON_FA_PHONE_VOLUME "\xef\x8a\xa0" // U+f2a0
|
||||
#define ICON_FA_PHOTO_FILM "\xef\xa1\xbc" // U+f87c
|
||||
#define ICON_FA_PICTURE_IN_PICTURE "\xee\xa0\x8b" // U+e80b
|
||||
#define ICON_FA_PIGGY_BANK "\xef\x93\x93" // U+f4d3
|
||||
#define ICON_FA_PILLS "\xef\x92\x84" // U+f484
|
||||
#define ICON_FA_PISCES "\xee\xa1\x8c" // U+e84c
|
||||
#define ICON_FA_PIZZA_SLICE "\xef\xa0\x98" // U+f818
|
||||
#define ICON_FA_PLACE_OF_WORSHIP "\xef\x99\xbf" // U+f67f
|
||||
#define ICON_FA_PLANE "\xef\x81\xb2" // U+f072
|
||||
@@ -1060,6 +1084,7 @@
|
||||
#define ICON_FA_S "S" // U+0053
|
||||
#define ICON_FA_SACK_DOLLAR "\xef\xa0\x9d" // U+f81d
|
||||
#define ICON_FA_SACK_XMARK "\xee\x95\xaa" // U+e56a
|
||||
#define ICON_FA_SAGITTARIUS "\xee\xa1\x8d" // U+e84d
|
||||
#define ICON_FA_SAILBOAT "\xee\x91\x85" // U+e445
|
||||
#define ICON_FA_SATELLITE "\xef\x9e\xbf" // U+f7bf
|
||||
#define ICON_FA_SATELLITE_DISH "\xef\x9f\x80" // U+f7c0
|
||||
@@ -1073,6 +1098,7 @@
|
||||
#define ICON_FA_SCHOOL_FLAG "\xee\x95\xae" // U+e56e
|
||||
#define ICON_FA_SCHOOL_LOCK "\xee\x95\xaf" // U+e56f
|
||||
#define ICON_FA_SCISSORS "\xef\x83\x84" // U+f0c4
|
||||
#define ICON_FA_SCORPIO "\xee\xa1\x8e" // U+e84e
|
||||
#define ICON_FA_SCREWDRIVER "\xef\x95\x8a" // U+f54a
|
||||
#define ICON_FA_SCREWDRIVER_WRENCH "\xef\x9f\x99" // U+f7d9
|
||||
#define ICON_FA_SCROLL "\xef\x9c\x8e" // U+f70e
|
||||
@@ -1080,6 +1106,7 @@
|
||||
#define ICON_FA_SD_CARD "\xef\x9f\x82" // U+f7c2
|
||||
#define ICON_FA_SECTION "\xee\x91\x87" // U+e447
|
||||
#define ICON_FA_SEEDLING "\xef\x93\x98" // U+f4d8
|
||||
#define ICON_FA_SEPTAGON "\xee\xa0\xa0" // U+e820
|
||||
#define ICON_FA_SERVER "\xef\x88\xb3" // U+f233
|
||||
#define ICON_FA_SHAPES "\xef\x98\x9f" // U+f61f
|
||||
#define ICON_FA_SHARE "\xef\x81\xa4" // U+f064
|
||||
@@ -1108,6 +1135,8 @@
|
||||
#define ICON_FA_SIGNATURE "\xef\x96\xb7" // U+f5b7
|
||||
#define ICON_FA_SIGNS_POST "\xef\x89\xb7" // U+f277
|
||||
#define ICON_FA_SIM_CARD "\xef\x9f\x84" // U+f7c4
|
||||
#define ICON_FA_SINGLE_QUOTE_LEFT "\xee\xa0\x9b" // U+e81b
|
||||
#define ICON_FA_SINGLE_QUOTE_RIGHT "\xee\xa0\x9c" // U+e81c
|
||||
#define ICON_FA_SINK "\xee\x81\xad" // U+e06d
|
||||
#define ICON_FA_SITEMAP "\xef\x83\xa8" // U+f0e8
|
||||
#define ICON_FA_SKULL "\xef\x95\x8c" // U+f54c
|
||||
@@ -1131,12 +1160,14 @@
|
||||
#define ICON_FA_SPELL_CHECK "\xef\xa2\x91" // U+f891
|
||||
#define ICON_FA_SPIDER "\xef\x9c\x97" // U+f717
|
||||
#define ICON_FA_SPINNER "\xef\x84\x90" // U+f110
|
||||
#define ICON_FA_SPIRAL "\xee\xa0\x8a" // U+e80a
|
||||
#define ICON_FA_SPLOTCH "\xef\x96\xbc" // U+f5bc
|
||||
#define ICON_FA_SPOON "\xef\x8b\xa5" // U+f2e5
|
||||
#define ICON_FA_SPRAY_CAN "\xef\x96\xbd" // U+f5bd
|
||||
#define ICON_FA_SPRAY_CAN_SPARKLES "\xef\x97\x90" // U+f5d0
|
||||
#define ICON_FA_SQUARE "\xef\x83\x88" // U+f0c8
|
||||
#define ICON_FA_SQUARE_ARROW_UP_RIGHT "\xef\x85\x8c" // U+f14c
|
||||
#define ICON_FA_SQUARE_BINARY "\xee\x9a\x9b" // U+e69b
|
||||
#define ICON_FA_SQUARE_CARET_DOWN "\xef\x85\x90" // U+f150
|
||||
#define ICON_FA_SQUARE_CARET_LEFT "\xef\x86\x91" // U+f191
|
||||
#define ICON_FA_SQUARE_CARET_RIGHT "\xef\x85\x92" // U+f152
|
||||
@@ -1194,7 +1225,10 @@
|
||||
#define ICON_FA_T "T" // U+0054
|
||||
#define ICON_FA_TABLE "\xef\x83\x8e" // U+f0ce
|
||||
#define ICON_FA_TABLE_CELLS "\xef\x80\x8a" // U+f00a
|
||||
#define ICON_FA_TABLE_CELLS_COLUMN_LOCK "\xee\x99\xb8" // U+e678
|
||||
#define ICON_FA_TABLE_CELLS_LARGE "\xef\x80\x89" // U+f009
|
||||
#define ICON_FA_TABLE_CELLS_ROW_LOCK "\xee\x99\xba" // U+e67a
|
||||
#define ICON_FA_TABLE_CELLS_ROW_UNLOCK "\xee\x9a\x91" // U+e691
|
||||
#define ICON_FA_TABLE_COLUMNS "\xef\x83\x9b" // U+f0db
|
||||
#define ICON_FA_TABLE_LIST "\xef\x80\x8b" // U+f00b
|
||||
#define ICON_FA_TABLE_TENNIS_PADDLE_BALL "\xef\x91\x9d" // U+f45d
|
||||
@@ -1208,6 +1242,7 @@
|
||||
#define ICON_FA_TAPE "\xef\x93\x9b" // U+f4db
|
||||
#define ICON_FA_TARP "\xee\x95\xbb" // U+e57b
|
||||
#define ICON_FA_TARP_DROPLET "\xee\x95\xbc" // U+e57c
|
||||
#define ICON_FA_TAURUS "\xee\xa1\x8f" // U+e84f
|
||||
#define ICON_FA_TAXI "\xef\x86\xba" // U+f1ba
|
||||
#define ICON_FA_TEETH "\xef\x98\xae" // U+f62e
|
||||
#define ICON_FA_TEETH_OPEN "\xef\x98\xaf" // U+f62f
|
||||
@@ -1235,6 +1270,7 @@
|
||||
#define ICON_FA_THUMBS_DOWN "\xef\x85\xa5" // U+f165
|
||||
#define ICON_FA_THUMBS_UP "\xef\x85\xa4" // U+f164
|
||||
#define ICON_FA_THUMBTACK "\xef\x82\x8d" // U+f08d
|
||||
#define ICON_FA_THUMBTACK_SLASH "\xee\x9a\x8f" // U+e68f
|
||||
#define ICON_FA_TICKET "\xef\x85\x85" // U+f145
|
||||
#define ICON_FA_TICKET_SIMPLE "\xef\x8f\xbf" // U+f3ff
|
||||
#define ICON_FA_TIMELINE "\xee\x8a\x9c" // U+e29c
|
||||
@@ -1310,8 +1346,6 @@
|
||||
#define ICON_FA_USER_GRADUATE "\xef\x94\x81" // U+f501
|
||||
#define ICON_FA_USER_GROUP "\xef\x94\x80" // U+f500
|
||||
#define ICON_FA_USER_INJURED "\xef\x9c\xa8" // U+f728
|
||||
#define ICON_FA_USER_LARGE "\xef\x90\x86" // U+f406
|
||||
#define ICON_FA_USER_LARGE_SLASH "\xef\x93\xba" // U+f4fa
|
||||
#define ICON_FA_USER_LOCK "\xef\x94\x82" // U+f502
|
||||
#define ICON_FA_USER_MINUS "\xef\x94\x83" // U+f503
|
||||
#define ICON_FA_USER_NINJA "\xef\x94\x84" // U+f504
|
||||
@@ -1336,7 +1370,6 @@
|
||||
#define ICON_FA_V "V" // U+0056
|
||||
#define ICON_FA_VAN_SHUTTLE "\xef\x96\xb6" // U+f5b6
|
||||
#define ICON_FA_VAULT "\xee\x8b\x85" // U+e2c5
|
||||
#define ICON_FA_VECTOR_SQUARE "\xef\x97\x8b" // U+f5cb
|
||||
#define ICON_FA_VENUS "\xef\x88\xa1" // U+f221
|
||||
#define ICON_FA_VENUS_DOUBLE "\xef\x88\xa6" // U+f226
|
||||
#define ICON_FA_VENUS_MARS "\xef\x88\xa8" // U+f228
|
||||
@@ -1349,6 +1382,7 @@
|
||||
#define ICON_FA_VIDEO "\xef\x80\xbd" // U+f03d
|
||||
#define ICON_FA_VIDEO_SLASH "\xef\x93\xa2" // U+f4e2
|
||||
#define ICON_FA_VIHARA "\xef\x9a\xa7" // U+f6a7
|
||||
#define ICON_FA_VIRGO "\xee\xa1\x90" // U+e850
|
||||
#define ICON_FA_VIRUS "\xee\x81\xb4" // U+e074
|
||||
#define ICON_FA_VIRUS_COVID "\xee\x92\xa8" // U+e4a8
|
||||
#define ICON_FA_VIRUS_COVID_SLASH "\xee\x92\xa9" // U+e4a9
|
||||
@@ -1357,6 +1391,7 @@
|
||||
#define ICON_FA_VOICEMAIL "\xef\xa2\x97" // U+f897
|
||||
#define ICON_FA_VOLCANO "\xef\x9d\xb0" // U+f770
|
||||
#define ICON_FA_VOLLEYBALL "\xef\x91\x9f" // U+f45f
|
||||
#define ICON_FA_VOLUME "\xef\x9a\xa8" // U+f6a8
|
||||
#define ICON_FA_VOLUME_HIGH "\xef\x80\xa8" // U+f028
|
||||
#define ICON_FA_VOLUME_LOW "\xef\x80\xa7" // U+f027
|
||||
#define ICON_FA_VOLUME_OFF "\xef\x80\xa6" // U+f026
|
||||
@@ -1372,6 +1407,7 @@
|
||||
#define ICON_FA_WATER "\xef\x9d\xb3" // U+f773
|
||||
#define ICON_FA_WATER_LADDER "\xef\x97\x85" // U+f5c5
|
||||
#define ICON_FA_WAVE_SQUARE "\xef\xa0\xbe" // U+f83e
|
||||
#define ICON_FA_WEB_AWESOME "\xee\x9a\x82" // U+e682
|
||||
#define ICON_FA_WEIGHT_HANGING "\xef\x97\x8d" // U+f5cd
|
||||
#define ICON_FA_WEIGHT_SCALE "\xef\x92\x96" // U+f496
|
||||
#define ICON_FA_WHEAT_AWN "\xee\x8b\x8d" // U+e2cd
|
||||
@@ -1,52 +1,60 @@
|
||||
#include "IconsFontAwesome6.h"
|
||||
#include "TracyAchievements.hpp"
|
||||
#include "TracyImGui.hpp"
|
||||
#include "TracySourceContents.hpp"
|
||||
#include "TracyWeb.hpp"
|
||||
#include "../Fonts.hpp"
|
||||
#include "TracyEmbed.hpp"
|
||||
|
||||
#include "data/Text100Million.hpp"
|
||||
#include "data/TextConnectToClient.hpp"
|
||||
#include "data/TextFindZone.hpp"
|
||||
#include "data/TextFrameImages.hpp"
|
||||
#include "data/TextGlobalSettings.hpp"
|
||||
#include "data/TextInstrumentFrames.hpp"
|
||||
#include "data/TextInstrumentationIntro.hpp"
|
||||
#include "data/TextInstrumentationStatistics.hpp"
|
||||
#include "data/TextIntro.hpp"
|
||||
#include "data/TextLoadTrace.hpp"
|
||||
#include "data/TextSamplingIntro.hpp"
|
||||
#include "data/TextSaveTrace.hpp"
|
||||
|
||||
namespace tracy::data
|
||||
{
|
||||
|
||||
AchievementItem ai_samplingIntro = { "samplingIntro", "Sampling program execution", [](){
|
||||
ImGui::TextWrapped( "Sampling program execution is a great way to find out where the hot spots are in your program. It can be used to find out which functions take the most time, or which lines of code are executed the most often." );
|
||||
ImGui::TextWrapped( "While instrumentation requires changes to your code, sampling does not. However, because of the way it works, the results are coarser and it's not possible to know when functions are called or when they return." );
|
||||
ImGui::TextWrapped( "Sampling is automatic on Linux. On Windows, you must run the profiled application as an administrator for it to work." );
|
||||
ImGui::PushFont( g_fonts.normal, FontSmall );
|
||||
ImGui::PushStyleColor( ImGuiCol_Text, GImGui->Style.Colors[ImGuiCol_TextDisabled] );
|
||||
ImGui::TextWrapped( "Depending on your system configuration, some additional steps may be required. Please refer to the user manual for more information." );
|
||||
ImGui::PopStyleColor();
|
||||
ImGui::PopFont();
|
||||
} };
|
||||
static std::string UnpackImpl( size_t size, size_t lz4Size, const uint8_t* data )
|
||||
{
|
||||
std::string ret;
|
||||
const EmbedData unembed( size, lz4Size, data );
|
||||
ret.assign( unembed.data(), unembed.size() );
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define Unpack( name ) UnpackImpl( Embed::name##Size, Embed::name##Lz4Size, Embed::name##Data )
|
||||
|
||||
|
||||
AchievementItem ai_samplingIntro = {
|
||||
.id = "samplingIntro",
|
||||
.name = "Sampling program execution",
|
||||
.text = Unpack( TextSamplingIntro ),
|
||||
};
|
||||
|
||||
AchievementItem* ac_samplingItems[] = { &ai_samplingIntro, nullptr };
|
||||
AchievementCategory ac_sampling = { "sampling", "Sampling", ac_samplingItems };
|
||||
|
||||
|
||||
AchievementItem ai_100million = { "100million", "It's over 100 million!", [](){
|
||||
ImGui::TextWrapped( "Tracy can handle a lot of data. How about 100 million zones in a single trace? Add a lot of zones to your program and see how it handles it!" );
|
||||
ImGui::TextWrapped( "Capturing a long-running profile trace is easy. Need to profile an hour of your program execution? You can do it." );
|
||||
ImGui::TextWrapped( "Note that it doesn't make much sense to instrument every little function you might have. The cost of the instrumentation itself will be higher than the cost of the function in such a case." );
|
||||
ImGui::PushFont( g_fonts.normal, FontSmall );
|
||||
ImGui::PushStyleColor( ImGuiCol_Text, GImGui->Style.Colors[ImGuiCol_TextDisabled] );
|
||||
ImGui::TextWrapped( "Keep in mind that the more zones you have, the more memory and CPU time the profiler will use. Be careful not to run out of memory." );
|
||||
ImGui::TextWrapped( "To capture 100 million zones, you will need approximately 4 GB of RAM." );
|
||||
ImGui::PopStyleColor();
|
||||
ImGui::PopFont();
|
||||
} };
|
||||
AchievementItem ai_100million = {
|
||||
.id = "100million",
|
||||
.name = "It's over 100 million!",
|
||||
.text = Unpack( Text100Million )
|
||||
};
|
||||
|
||||
AchievementItem ai_instrumentationStatistics = { "instrumentationStatistics", "Show me the stats!", [](){
|
||||
ImGui::TextWrapped( "Once you have instrumented your application, you can view the statistics for each zone in the timeline. This allows you to see how much time is spent in each zone and how many times it is called." );
|
||||
ImGui::TextWrapped( "To view the statistics, click on the \"" ICON_FA_ARROW_UP_WIDE_SHORT " Statistics\" button on the top bar. This will open a new window with a list of all zones in the trace." );
|
||||
} };
|
||||
AchievementItem ai_instrumentationStatistics = {
|
||||
.id = "instrumentationStatistics",
|
||||
.name = "Show me the stats!",
|
||||
.text = Unpack( TextInstrumentationStatistics )
|
||||
};
|
||||
|
||||
AchievementItem ai_findZone = { "findZone", "Find some zones", [](){
|
||||
ImGui::TextWrapped( "You can search for zones in the trace by opening the search window with the \"" ICON_FA_MAGNIFYING_GLASS " Find zone\" button on the top bar. It will ask you for the zone name, which in most cases will be the function name in the code." );
|
||||
ImGui::TextWrapped( "The search may find more than one zone with the same name. A list of all the zones found is displayed, and you can select any of them." );
|
||||
ImGui::TextWrapped( "Alternatively, you can open the Statistics window and click an entry there. This will open the Find zone window as if you had searched for that zone." );
|
||||
ImGui::TextWrapped( "When a zone is selected, a number of statistics are displayed to help you understand the performance of your application. In addition, a histogram of the zone execution times is displayed to make it easier for you to determine the performance of the profiled code. Be sure to select a zone with a large number of calls to make the histogram look interesting!" );
|
||||
ImGui::TextWrapped( "Note that you can draw a range on the histogram to limit the number of entries displayed in the zone list below. This list allows you to examine each zone individually. There are also a number of zone groupings that you can select. Each group can be selected and the time associated with the selected group will be highlighted on the histogram." );
|
||||
} };
|
||||
AchievementItem ai_findZone = {
|
||||
.id = "findZone",
|
||||
.name = "Find some zones",
|
||||
.text = Unpack( TextFindZone )
|
||||
};
|
||||
|
||||
AchievementItem* ac_instrumentationIntroItems[] = {
|
||||
&ai_100million,
|
||||
@@ -55,90 +63,46 @@ AchievementItem* ac_instrumentationIntroItems[] = {
|
||||
nullptr
|
||||
};
|
||||
|
||||
AchievementItem ai_instrumentationIntro = { "instrumentationIntro", "Instrumentating your application", [](){
|
||||
constexpr const char* src = R"(#include "Tracy.hpp"
|
||||
AchievementItem ai_instrumentationIntro = {
|
||||
.id = "instrumentationIntro",
|
||||
.name = "Instrumentating your application",
|
||||
.text = Unpack( TextInstrumentationIntro ),
|
||||
.items = ac_instrumentationIntroItems
|
||||
};
|
||||
|
||||
void SomeFunction()
|
||||
{
|
||||
ZoneScoped;
|
||||
// Your code here
|
||||
}
|
||||
)";
|
||||
|
||||
static SourceContents sc;
|
||||
sc.Parse( src );
|
||||
|
||||
ImGui::TextWrapped( "Instrumentation is a powerful feature that allows you to see the exact runtime of each call to the selected set of functions. The downside is that it takes a bit of manual work to get it set up." );
|
||||
ImGui::TextWrapped( "To get started, open a source file and include the Tracy.hpp header. This will give you access to a variety of macros provided by Tracy. Next, add the ZoneScoped macro to the beginning of one of your functions, like this:" );
|
||||
ImGui::PushFont( g_fonts.mono, FontNormal );
|
||||
PrintSource( sc.get() );
|
||||
ImGui::PopFont();
|
||||
ImGui::TextWrapped( "Now, when you profile your application, you will see a new zone appear on the timeline for each call to the function. This allows you to see how much time is spent in each call and how many times the function is called." );
|
||||
ImGui::PushFont( g_fonts.normal, FontSmall );
|
||||
ImGui::PushStyleColor( ImGuiCol_Text, GImGui->Style.Colors[ImGuiCol_TextDisabled] );
|
||||
ImGui::TextWrapped( "Note: The ZoneScoped macro is just one of the many macros provided by Tracy. See the documentation for more information." );
|
||||
ImGui::TextWrapped( "The above description applies to C++ code, but things are done similarly in other programming languages. Refer to the documentation for your language for more information." );
|
||||
ImGui::PopStyleColor();
|
||||
ImGui::PopFont();
|
||||
}, ac_instrumentationIntroItems };
|
||||
|
||||
AchievementItem ai_frameImages = { "frameImages", "A picture is worth a thousand words", [](){
|
||||
ImGui::TextWrapped( "Tracy allows you to add context to each frame, by attaching a screenshot. You can do this with the FrameImage macro." );
|
||||
ImGui::TextWrapped( "You will have to do the screen capture and resizing yourself, which can be a bit complicated. The manual provides a sample code that shows how to do this in a performant way." );
|
||||
ImGui::TextWrapped( "The frame images are displayed in the context of a frame, for example, when you hover over the frame in the timeline or in the frame graph at the top of the screen." );
|
||||
ImGui::TextWrapped( "You can even view a recording of what your application was doing by clicking the " ICON_FA_SCREWDRIVER_WRENCH " icon and then selecting the \"" ICON_FA_PLAY " Playback\" option. Try it out!" );
|
||||
ImGui::TextWrapped( "The FrameImage macro is a great way to see what happened in your application at a particular time. Maybe you have a performance problem that only occurs when a certain object is on the screen?" );
|
||||
} };
|
||||
AchievementItem ai_frameImages = {
|
||||
.id = "frameImages",
|
||||
.name = "A picture is worth a thousand words",
|
||||
.text = Unpack( TextFrameImages )
|
||||
};
|
||||
|
||||
AchievementItem* ac_instrumentFramesItems[] = {
|
||||
&ai_frameImages,
|
||||
nullptr
|
||||
};
|
||||
|
||||
AchievementItem ai_instrumentFrames = { "instrumentFrames", "Instrumenting frames", [](){
|
||||
constexpr const char* src = R"(#include "Tracy.hpp"
|
||||
|
||||
void Render()
|
||||
{
|
||||
// Render the frame
|
||||
SwapBuffers();
|
||||
FrameMark;
|
||||
}
|
||||
)";
|
||||
|
||||
static SourceContents sc;
|
||||
sc.Parse( src );
|
||||
|
||||
ImGui::TextWrapped( "In addition to instrumenting functions, you can also instrument frames. This allows you to see how much time is spent in each frame of your application." );
|
||||
ImGui::TextWrapped( "To instrument frames, you need to add the FrameMark macro at the beginning of each frame. This can be done in the main loop of your application, or in a separate function that is called at the beginning of each frame." );
|
||||
ImGui::PushFont( g_fonts.mono, FontNormal );
|
||||
PrintSource( sc.get() );
|
||||
ImGui::PopFont();
|
||||
ImGui::TextWrapped( "When you profile your application, you will see a new frame appear on the timeline each time the FrameMark macro is called. This allows you to see how much time is spent in each frame and how many frames are rendered per second." );
|
||||
ImGui::TextWrapped( "The FrameMark macro is a great way to see at a glance how your application is performing over time. Maybe there are some performance problems that only appear after a few minutes of running the application? A frame graph is drawn at the top of the profiler window where you can see the timing of all frames." );
|
||||
ImGui::TextWrapped( "Note that some applications do not have a frame-based structure, and in such cases, frame instrumentation may not be useful. That's ok." );
|
||||
}, ac_instrumentFramesItems };
|
||||
AchievementItem ai_instrumentFrames = {
|
||||
.id = "instrumentFrames",
|
||||
.name = "Instrumenting frames",
|
||||
.text = Unpack( TextInstrumentFrames ),
|
||||
.items = ac_instrumentFramesItems
|
||||
};
|
||||
|
||||
AchievementItem* ac_instrumentationItems[] = { &ai_instrumentationIntro, &ai_instrumentFrames, nullptr };
|
||||
AchievementCategory ac_instrumentation = { "instrumentation", "Instrumentation", ac_instrumentationItems };
|
||||
|
||||
|
||||
AchievementItem ai_loadTrace = { "loadTrace", "Load a trace", [](){
|
||||
ImGui::TextWrapped( "You can open a previously saved trace file (or one received from a friend) with the \"" ICON_FA_FOLDER_OPEN " Open saved trace\" button on the welcome screen." );
|
||||
} };
|
||||
AchievementItem ai_loadTrace = {
|
||||
.id = "loadTrace",
|
||||
.name = "Load a trace",
|
||||
.text = Unpack( TextLoadTrace )
|
||||
};
|
||||
|
||||
AchievementItem ai_saveTrace = { "saveTrace", "Save a trace", [](){
|
||||
ImGui::TextWrapped( "Now that you have traced your application (or are in the process of doing so), you can save it to disk for future reference. You can do this by clicking on the " ICON_FA_WIFI " icon in the top left corner of the screen and then clicking on the \"" ICON_FA_FLOPPY_DISK " Save trace\" button." );
|
||||
ImGui::TextWrapped( "Keeping old traces on hand can be beneficial, as you can compare the performance of your optimizations with what you had before." );
|
||||
ImGui::TextWrapped( "You can also share the trace with your friends or co-workers by sending them the trace file." );
|
||||
ImGui::Spacing();
|
||||
tracy::TextColoredUnformatted( 0xFF44FFFF, ICON_FA_TRIANGLE_EXCLAMATION );
|
||||
ImGui::SameLine();
|
||||
ImGui::TextUnformatted( "Warning" );
|
||||
ImGui::SameLine();
|
||||
tracy::TextColoredUnformatted( 0xFF44FFFF, ICON_FA_TRIANGLE_EXCLAMATION );
|
||||
ImGui::TextWrapped( "Trace files can contain sensitive information about your application, such as program code, or even the contents of source files. Be careful when sharing them with others." );
|
||||
} };
|
||||
AchievementItem ai_saveTrace = {
|
||||
.id = "saveTrace",
|
||||
.name = "Save a trace",
|
||||
.text = Unpack( TextSaveTrace )
|
||||
};
|
||||
|
||||
AchievementItem* ac_connectToServerItems[] = {
|
||||
&ai_saveTrace,
|
||||
@@ -152,23 +116,19 @@ AchievementItem* ac_connectToServerUnlock[] = {
|
||||
nullptr
|
||||
};
|
||||
|
||||
AchievementItem ai_connectToServer = { "connectToClient", "First profiling session", [](){
|
||||
ImGui::TextWrapped( "Let's start our adventure by instrumenting your application and connecting it to the profiler. Here's a quick refresher:" );
|
||||
ImGui::TextWrapped( " 1. Integrate Tracy Profiler into your application. This can be done using CMake, Meson, or simply by adding the source files to your project." );
|
||||
ImGui::TextWrapped( " 2. Make sure that TracyClient.cpp (or the Tracy library) is included in your build." );
|
||||
ImGui::TextWrapped( " 3. Define TRACY_ENABLE in your build configuration, for the whole application. Do not do it in a single source file because it won't work." );
|
||||
ImGui::TextWrapped( " 4. Start your application, and \"" ICON_FA_WIFI " Connect\" to it with the profiler." );
|
||||
ImGui::TextWrapped( "Please refer to the user manual for more details." );
|
||||
if( ImGui::SmallButton( "Download the user manual" ) )
|
||||
{
|
||||
tracy::OpenWebpage( "https://github.com/wolfpld/tracy/releases" );
|
||||
}
|
||||
}, ac_connectToServerItems, ac_connectToServerUnlock };
|
||||
AchievementItem ai_connectToServer = {
|
||||
.id = "connectToClient",
|
||||
.name = "First profiling session",
|
||||
.text = Unpack( TextConnectToClient ),
|
||||
.items = ac_connectToServerItems,
|
||||
.unlocks = ac_connectToServerUnlock
|
||||
};
|
||||
|
||||
AchievementItem ai_globalSettings = { "globalSettings", "Global settings", [](){
|
||||
ImGui::TextWrapped( "Tracy has a variety of settings that can be adjusted to suit your needs. These settings can be found by clicking on the " ICON_FA_WRENCH " icon on the welcome screen. This will open the about window, where you can expand the \"" ICON_FA_TOOLBOX " Global settings\" menu." );
|
||||
ImGui::TextWrapped( "The settings are saved between sessions, so you only need to set them once." );
|
||||
} };
|
||||
AchievementItem ai_globalSettings = {
|
||||
.id = "globalSettings",
|
||||
.name = "Global settings",
|
||||
.text = Unpack( TextGlobalSettings )
|
||||
};
|
||||
|
||||
AchievementItem* ac_achievementsIntroItems[] = {
|
||||
&ai_connectToServer,
|
||||
@@ -176,18 +136,14 @@ AchievementItem* ac_achievementsIntroItems[] = {
|
||||
nullptr
|
||||
};
|
||||
|
||||
AchievementItem ai_achievementsIntro = { "achievementsIntro", "Click here to discover achievements!", [](){
|
||||
ImGui::TextWrapped( "Clicking on the " ICON_FA_STAR " button opens the Achievements List. Here you can see the tasks to be completed along with a short description of what needs to be done." );
|
||||
ImGui::TextWrapped( "As you complete each Achievement, new Achievements will appear, so be sure to keep checking the list for new ones!" );
|
||||
ImGui::TextWrapped( "To make the new things easier to spot, the Achievements List will show a marker next to them. The achievements " ICON_FA_STAR " button will glow yellow when there are new things to see." );
|
||||
ImGui::TextUnformatted( "New tasks:" );
|
||||
ImGui::SameLine();
|
||||
TextColoredUnformatted( 0xFF4488FF, ICON_FA_CIRCLE_EXCLAMATION );
|
||||
ImGui::TextUnformatted( "Completed tasks:" );
|
||||
ImGui::SameLine();
|
||||
TextColoredUnformatted( 0xFF44FF44, ICON_FA_CIRCLE_CHECK );
|
||||
ImGui::TextWrapped( "Good luck!" );
|
||||
}, ac_achievementsIntroItems, nullptr, true, 1 };
|
||||
AchievementItem ai_achievementsIntro = {
|
||||
.id = "achievementsIntro",
|
||||
.name = "Click here to discover achievements!",
|
||||
.text = Unpack( TextIntro ),
|
||||
.items = ac_achievementsIntroItems,
|
||||
.keepOpen = true,
|
||||
.unlockTime = 1
|
||||
};
|
||||
|
||||
AchievementItem* ac_firstStepsItems[] = { &ai_achievementsIntro, nullptr };
|
||||
AchievementCategory ac_firstSteps = { "firstSteps", "First steps", ac_firstStepsItems, 1 };
|
||||
|
||||
@@ -20,7 +20,7 @@ struct AchievementItem
|
||||
{
|
||||
const char* id;
|
||||
const char* name;
|
||||
void(*description)();
|
||||
std::string text;
|
||||
AchievementItem** items;
|
||||
AchievementItem** unlocks;
|
||||
bool keepOpen;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#include "imgui.h"
|
||||
#include "../Fonts.hpp"
|
||||
|
||||
#include "IconsFontAwesome6.h"
|
||||
#include "IconsFontAwesome7.h"
|
||||
#include "TracyBadVersion.hpp"
|
||||
#include "TracyImGui.hpp"
|
||||
#include "TracyWeb.hpp"
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "imgui_internal.h"
|
||||
|
||||
#include "../public/common/TracyForceInline.hpp"
|
||||
#include "IconsFontAwesome6.h"
|
||||
#include "IconsFontAwesome7.h"
|
||||
#include "TracySourceTokenizer.hpp"
|
||||
|
||||
ImTextureID GetProfilerIconTexture();
|
||||
|
||||
@@ -166,6 +166,40 @@ public:
|
||||
ImGui::TextUnformatted( ". " );
|
||||
break;
|
||||
}
|
||||
case MD_BLOCK_ADMONITION:
|
||||
{
|
||||
Separate();
|
||||
ImGui::Indent();
|
||||
origin = ImGui::GetCursorScreenPos();
|
||||
auto admonition = ((MD_BLOCK_ADMONITION_DETAIL*)detail);
|
||||
switch( admonition->type.text[0] )
|
||||
{
|
||||
case 'n': // note
|
||||
color = 0xFFEB6F1F;
|
||||
TextColoredUnformatted( color, ICON_FA_CIRCLE_INFO " " );
|
||||
break;
|
||||
case 't': // tip
|
||||
color = 0xFF368623;
|
||||
TextColoredUnformatted( color, ICON_FA_LIGHTBULB " " );
|
||||
break;
|
||||
case 'i': // important
|
||||
color = 0xFFE55789;
|
||||
TextColoredUnformatted( color, ICON_FA_MESSAGE " " );
|
||||
break;
|
||||
case 'w': // warning
|
||||
color = 0xFF036A9E;
|
||||
TextColoredUnformatted( color, ICON_FA_TRIANGLE_EXCLAMATION " " );
|
||||
break;
|
||||
case 'c': // caution
|
||||
color = 0xFF3336DA;
|
||||
TextColoredUnformatted( color, ICON_FA_HAND " " );
|
||||
break;
|
||||
default:
|
||||
assert( false );
|
||||
}
|
||||
Glue();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -223,6 +257,15 @@ public:
|
||||
case MD_BLOCK_FOOTNOTE_DEF:
|
||||
ImGui::PopFont();
|
||||
break;
|
||||
case MD_BLOCK_ADMONITION:
|
||||
{
|
||||
const auto scale = GetScale();
|
||||
const auto pos = ImGui::GetCursorScreenPos();
|
||||
const auto offset = ImVec2( 8.f * scale, 0 );
|
||||
ImGui::Unindent();
|
||||
ImGui::GetWindowDrawList()->AddLine( origin - offset, pos - offset, color, 2.f * scale );
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -493,6 +536,9 @@ private:
|
||||
|
||||
int idx = 0;
|
||||
|
||||
uint32_t color;
|
||||
ImVec2 origin;
|
||||
|
||||
std::vector<List> lists;
|
||||
std::string link;
|
||||
|
||||
@@ -507,7 +553,7 @@ Markdown::Markdown( View* view, Worker* worker )
|
||||
, m_worker( worker )
|
||||
{
|
||||
memset( m_parser, 0, sizeof( MD_PARSER ) );
|
||||
m_parser->flags = MD_FLAG_COLLAPSEWHITESPACE | MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_NOHTML | MD_FLAG_TABLES | MD_FLAG_TASKLISTS | MD_FLAG_STRIKETHROUGH | MD_FLAG_FOOTNOTES;
|
||||
m_parser->flags = MD_FLAG_COLLAPSEWHITESPACE | MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_NOHTML | MD_FLAG_TABLES | MD_FLAG_TASKLISTS | MD_FLAG_STRIKETHROUGH | MD_FLAG_FOOTNOTES | MD_FLAG_ADMONITIONS;
|
||||
m_parser->enter_block = []( MD_BLOCKTYPE type, void* detail, void* ud ) -> int { return ((MarkdownContext*)ud)->EnterBlock( type, detail ); };
|
||||
m_parser->leave_block = []( MD_BLOCKTYPE type, void* detail, void* ud ) -> int { return ((MarkdownContext*)ud)->LeaveBlock( type, detail ); };
|
||||
m_parser->enter_span = []( MD_SPANTYPE type, void* detail, void* ud ) -> int { return ((MarkdownContext*)ud)->EnterSpan( type, detail ); };
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#include "tracy_pdqsort.h"
|
||||
#include "../Fonts.hpp"
|
||||
|
||||
#include "IconsFontAwesome6.h"
|
||||
#include "IconsFontAwesome7.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include "../Fonts.hpp"
|
||||
|
||||
#include "imgui_internal.h"
|
||||
#include "IconsFontAwesome6.h"
|
||||
#include "IconsFontAwesome7.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
@@ -25,6 +25,8 @@ void View::DrawManual()
|
||||
ImGui::PopStyleColor();
|
||||
ImGui::SameLine();
|
||||
TextDisabledUnformatted( "This user manual is missing features. See the PDF file for the proper version." );
|
||||
ImGui::SameLine();
|
||||
if( ImGui::Button( ICON_FA_BOOK " PDF Manual" ) ) OpenWebpage( "https://github.com/wolfpld/tracy/releases" );
|
||||
|
||||
ImGui::Separator();
|
||||
ImGui::BeginChild( "##usermanual" );
|
||||
@@ -88,6 +90,7 @@ void View::DrawManual()
|
||||
if( ImGui::IsItemClicked() && !ImGui::IsItemToggledOpen() )
|
||||
{
|
||||
m_activeManualChunk = i;
|
||||
m_manualPositionReset = true;
|
||||
}
|
||||
}
|
||||
while( level-- > 0 ) ImGui::TreePop();
|
||||
@@ -142,8 +145,8 @@ void View::DrawManual()
|
||||
ImGui::Dummy( ImVec2( 0, ImGui::GetTextLineHeight() * 0.25f ) );
|
||||
ImGui::PopFont();
|
||||
|
||||
const auto separator = chunk.text.find( "-----" );
|
||||
const auto size = separator == std::string::npos ? chunk.text.size() : separator;
|
||||
const auto separator = chunk.text.find( "\n-----" );
|
||||
const auto size = separator == std::string::npos ? chunk.text.size() : ( separator + 1 );
|
||||
|
||||
m_markdown.Print( chunk.text.c_str(), size );
|
||||
}
|
||||
|
||||
@@ -34,6 +34,9 @@ public:
|
||||
#include <atomic>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||
# include <chrono>
|
||||
#endif
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "../client/TracyProfiler.hpp"
|
||||
@@ -106,6 +109,14 @@ public:
|
||||
GLint bits;
|
||||
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
|
||||
|
||||
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||
// The anchor above is never refreshed; advertise calibration and emit periodic
|
||||
// GpuCalibration events to correct CPU/GPU drift (see Recalibrate). Opt-in,
|
||||
// because Recalibrate() calls glGetInteger64v( GL_TIMESTAMP ), which forces a
|
||||
// CPU/GPU sync.
|
||||
m_prevCalibration = GetHostTimeNs();
|
||||
#endif
|
||||
|
||||
const float period = 1.f;
|
||||
const auto thread = GetThreadHandle();
|
||||
TracyLfqPrepare( QueueType::GpuNewContext );
|
||||
@@ -114,7 +125,11 @@ public:
|
||||
MemWrite( &item->gpuNewContext.thread, thread );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||
MemWrite( &item->gpuNewContext.flags, GpuContextFlags( GpuContextCalibration ) );
|
||||
#else
|
||||
MemWrite( &item->gpuNewContext.flags, GpuContextFlags( 0 ) );
|
||||
#endif
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
@@ -143,8 +158,6 @@ public:
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
if( m_tail == m_head ) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
@@ -153,6 +166,14 @@ public:
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||
// Before the drain's early-returns, so it runs even on frames with no
|
||||
// completed queries.
|
||||
Recalibrate();
|
||||
#endif
|
||||
|
||||
if( m_tail == m_head ) return;
|
||||
|
||||
while( m_tail != m_head )
|
||||
{
|
||||
GLint available;
|
||||
@@ -173,6 +194,38 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||
// Monotonic host ns for the inter-calibration interval (cpuDelta), kept
|
||||
// separate from Profiler::GetTime() as in the D3D12/Vulkan backends.
|
||||
static tracy_force_inline int64_t GetHostTimeNs()
|
||||
{
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
std::chrono::steady_clock::now().time_since_epoch() ).count();
|
||||
}
|
||||
|
||||
// OpenGL has no atomic CPU+GPU timestamp query, so sample back-to-back; the
|
||||
// gap is negligible against the recalibration interval below. Note this forces
|
||||
// a CPU/GPU sync, which is why the whole path is opt-in (TRACY_OPENGL_AUTO_CALIBRATION).
|
||||
tracy_force_inline void Recalibrate()
|
||||
{
|
||||
const int64_t hostNow = GetHostTimeNs();
|
||||
const int64_t delta = hostNow - m_prevCalibration;
|
||||
if( delta < 1000ll * 1000 * 1000 ) return; // throttle: ~once per second
|
||||
|
||||
int64_t tgpu;
|
||||
glGetInteger64v( GL_TIMESTAMP, &tgpu );
|
||||
const int64_t refCpu = Profiler::GetTime();
|
||||
m_prevCalibration = hostNow;
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuCalibration );
|
||||
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
|
||||
MemWrite( &item->gpuCalibration.cpuDelta, delta );
|
||||
MemWrite( &item->gpuCalibration.context, m_context );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
#endif
|
||||
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const auto id = m_head;
|
||||
@@ -196,6 +249,10 @@ private:
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
|
||||
#ifdef TRACY_OPENGL_AUTO_CALIBRATION
|
||||
int64_t m_prevCalibration; // host-ns timestamp of the last emitted calibration
|
||||
#endif
|
||||
};
|
||||
|
||||
class GpuCtxScope
|
||||
|
||||
@@ -11,6 +11,22 @@
|
||||
|
||||
#include "OfflineSymbolResolver.h"
|
||||
|
||||
bool ResolveSymbols( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// On Windows the default (no custom tool given) is the DbgHelp backend.
|
||||
if( addr2lineToolPath.empty() )
|
||||
{
|
||||
return ResolveSymbolsDbgHelp( imagePath, inputEntryList, resolvedEntries );
|
||||
}
|
||||
#endif
|
||||
// Everywhere else, and whenever a custom tool is given, use the addr2line-compatible backend.
|
||||
// An empty path lets that backend fall back to the 'addr2line' found in PATH.
|
||||
return ResolveSymbolsAddr2Line( addr2lineToolPath, addr2lineArgs, imagePath, inputEntryList, resolvedEntries );
|
||||
}
|
||||
|
||||
bool ApplyPathSubstitutions( std::string& path, const PathSubstitutionList& pathSubstitutionlist )
|
||||
{
|
||||
for( const auto& substitution : pathSubstitutionlist )
|
||||
@@ -31,7 +47,35 @@ tracy::StringIdx AddSymbolString( tracy::Worker& worker, const std::string& str
|
||||
return tracy::StringIdx( location.idx );
|
||||
}
|
||||
|
||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstitutionlist, bool verbose )
|
||||
void ResetSymbols( tracy::Worker& worker )
|
||||
{
|
||||
std::cout << "Resetting callstack frame symbols to the unresolved state..." << std::endl;
|
||||
|
||||
const tracy::StringIdx unresolvedName = AddSymbolString( worker, "[unresolved]" );
|
||||
const tracy::StringIdx unknownFile = AddSymbolString( worker, "[unknown]" );
|
||||
|
||||
uint64_t frameCount = 0;
|
||||
auto& callstackFrameMap = worker.GetCallstackFrameMap();
|
||||
for( auto it = callstackFrameMap.begin(); it != callstackFrameMap.end(); ++it )
|
||||
{
|
||||
if( !it->second ) continue;
|
||||
|
||||
tracy::CallstackFrameData& frameData = *it->second;
|
||||
for( uint8_t f = 0; f < frameData.size; f++ )
|
||||
{
|
||||
tracy::CallstackFrame& frame = frameData.data[f];
|
||||
frame.name = unresolvedName;
|
||||
frame.file = unknownFile;
|
||||
frame.line = 0;
|
||||
++frameCount;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Reset " << frameCount << " callstack frames." << std::endl;
|
||||
}
|
||||
|
||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstitutionlist,
|
||||
const std::string& addr2lineToolPath, const std::string& addr2lineArgs, bool verbose )
|
||||
{
|
||||
uint64_t callstackFrameCount = worker.GetCallstackFrameCount();
|
||||
std::string relativeSoNameMatch = "[unresolved]";
|
||||
@@ -91,7 +135,7 @@ bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& p
|
||||
}
|
||||
|
||||
SymbolEntryList resolvedEntries;
|
||||
ResolveSymbols( imagePath, entries, resolvedEntries );
|
||||
ResolveSymbols( addr2lineToolPath, addr2lineArgs, imagePath, entries, resolvedEntries );
|
||||
|
||||
if( resolvedEntries.size() != entries.size() )
|
||||
{
|
||||
@@ -131,7 +175,8 @@ bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& p
|
||||
return true;
|
||||
}
|
||||
|
||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings, bool verbose )
|
||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings,
|
||||
const std::string& addr2lineToolPath, const std::string& addr2lineArgs, bool verbose )
|
||||
{
|
||||
std::cout << "Resolving and patching symbols..." << std::endl;
|
||||
|
||||
@@ -160,7 +205,7 @@ void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSu
|
||||
}
|
||||
}
|
||||
|
||||
if ( !PatchSymbolsWithRegex(worker, pathSubstitutionList, verbose) )
|
||||
if ( !PatchSymbolsWithRegex(worker, pathSubstitutionList, addr2lineToolPath, addr2lineArgs, verbose) )
|
||||
{
|
||||
std::cerr << "Failed to patch symbols" << std::endl;
|
||||
}
|
||||
|
||||
@@ -29,12 +29,41 @@ struct SymbolEntry
|
||||
|
||||
using SymbolEntryList = std::vector<SymbolEntry>;
|
||||
|
||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
// Dispatches to the appropriate backend depending on the platform and whether a custom
|
||||
// addr2line-compatible tool was specified. When addr2lineToolPath is non-empty, the tool at
|
||||
// that path is invoked (on any platform); otherwise the platform default is used (DbgHelp on
|
||||
// Windows, the 'addr2line' found in PATH elsewhere). addr2lineArgs are extra arguments passed
|
||||
// verbatim to the addr2line-compatible tool (e.g. "--relative-address").
|
||||
bool ResolveSymbols( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries );
|
||||
|
||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings, bool verbose = false );
|
||||
// Backend invoking an addr2line-compatible tool. Available on all platforms. An empty
|
||||
// addr2lineToolPath falls back to the 'addr2line' found in PATH. addr2lineArgs are inserted
|
||||
// verbatim into the tool's command line.
|
||||
bool ResolveSymbolsAddr2Line( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries );
|
||||
|
||||
#ifdef _WIN32
|
||||
// Backend using the Windows DbgHelp library.
|
||||
bool ResolveSymbolsDbgHelp( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries );
|
||||
#endif
|
||||
|
||||
// Resets all callstack frame symbols back to the unresolved state ("[unresolved]" / "[unknown]"),
|
||||
// so a subsequent PatchSymbols pass re-resolves every frame. This is useful to chain several
|
||||
// resolution passes with different path substitutions. Only meaningful for traces captured with
|
||||
// TRACY_SYMBOL_OFFLINE_RESOLVE, where each frame's symAddr holds the image-relative offset.
|
||||
void ResetSymbols( tracy::Worker& worker );
|
||||
|
||||
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings,
|
||||
const std::string& addr2lineToolPath = std::string(),
|
||||
const std::string& addr2lineArgs = std::string(), bool verbose = false );
|
||||
|
||||
using PathSubstitutionList = std::vector<std::pair<std::regex, std::string> >;
|
||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstituionlist, bool verbose = false );
|
||||
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstituionlist,
|
||||
const std::string& addr2lineToolPath = std::string(),
|
||||
const std::string& addr2lineArgs = std::string(), bool verbose = false );
|
||||
|
||||
#endif // __SYMBOLRESOLVER_HPP__
|
||||
@@ -1,5 +1,3 @@
|
||||
#ifndef _WIN32
|
||||
|
||||
#include "OfflineSymbolResolver.h"
|
||||
|
||||
#include <fstream>
|
||||
@@ -10,6 +8,11 @@
|
||||
#include <memory>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
# define popen _popen
|
||||
# define pclose _pclose
|
||||
#endif
|
||||
|
||||
std::string ExecShellCommand( const char* cmd )
|
||||
{
|
||||
std::array<char, 128> buffer;
|
||||
@@ -29,23 +32,66 @@ std::string ExecShellCommand( const char* cmd )
|
||||
class SymbolResolver
|
||||
{
|
||||
public:
|
||||
SymbolResolver()
|
||||
SymbolResolver( const std::string& addr2lineToolPath, const std::string& addr2lineArgs )
|
||||
{
|
||||
// Extra arguments are inserted verbatim into the tool invocation. Tracy records frame
|
||||
// offsets as RVAs; for images with a non-zero preferred image base (PE, Mach-O) the user
|
||||
// can pass "--relative-address" here so llvm-addr2line / llvm-symbolizer add the base back.
|
||||
if( !addr2lineArgs.empty() )
|
||||
{
|
||||
m_addr2LineArgs = " " + addr2lineArgs;
|
||||
}
|
||||
|
||||
if( !addr2lineToolPath.empty() )
|
||||
{
|
||||
// If the value looks like a path (not a bare command name resolved via PATH), verify
|
||||
// it exists so a wrong path fails with an actionable error instead of a cryptic shell one.
|
||||
const bool looksLikePath = addr2lineToolPath.find( '/' ) != std::string::npos ||
|
||||
addr2lineToolPath.find( '\\' ) != std::string::npos;
|
||||
if( looksLikePath && !std::ifstream( addr2lineToolPath ).good() )
|
||||
{
|
||||
std::cerr << "Specified symbol resolution tool not found: '" << addr2lineToolPath
|
||||
<< "' (check the path passed to the '-a' option)" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// A user-provided path may contain spaces or other shell-special characters.
|
||||
escapeShellParam( addr2lineToolPath, m_addr2LinePath );
|
||||
std::cout << "Using user-specified symbol resolution tool: '" << addr2lineToolPath.c_str() << "'" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
std::cerr << "No symbol resolution tool specified (use the '-a' option to provide one)" << std::endl;
|
||||
#else
|
||||
std::stringstream result( ExecShellCommand("which addr2line") );
|
||||
std::getline(result, m_addr2LinePath);
|
||||
|
||||
if( !m_addr2LinePath.length() )
|
||||
{
|
||||
std::cerr << "'addr2line' was not found in the system, please installed it" << std::endl;
|
||||
std::cerr << "'addr2line' was not found in the system, please install it" << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Using 'addr2line' found at: '" << m_addr2LinePath.c_str() << "'" << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void escapeShellParam(std::string const& s, std::string& out)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// cmd.exe / the CRT command parser do not understand POSIX backslash escapes, and
|
||||
// backslashes are path separators on Windows. Wrap the parameter in double quotes
|
||||
// (which handles spaces) and drop any embedded quotes, which cannot appear in a path.
|
||||
out.reserve( s.size() + 2 );
|
||||
out.push_back( '"' );
|
||||
for( char c : s )
|
||||
{
|
||||
if( c != '"' ) out.push_back( c );
|
||||
}
|
||||
out.push_back( '"' );
|
||||
#else
|
||||
out.reserve( s.size() + 2 );
|
||||
out.push_back( '"' );
|
||||
for( unsigned char c : s )
|
||||
@@ -73,34 +119,51 @@ public:
|
||||
}
|
||||
}
|
||||
out.push_back( '"' );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
{
|
||||
if( !m_addr2LinePath.length() ) return false;
|
||||
|
||||
|
||||
std:: string escapedPath;
|
||||
escapeShellParam( imagePath, escapedPath );
|
||||
|
||||
// Command-line length limits: cmd.exe (used by _popen on Windows) allows ~8191 characters;
|
||||
// a single POSIX 'sh -c' argument is capped by MAX_ARG_STRLEN (128 KiB on Linux).
|
||||
// 8000 stays under all of these, so a single conservative budget works on every platform.
|
||||
const size_t maxCmdLength = 8000;
|
||||
|
||||
size_t entryIdx = 0;
|
||||
while( entryIdx < inputEntryList.size() )
|
||||
{
|
||||
const size_t startIdx = entryIdx;
|
||||
const size_t batchEndIdx = std::min( inputEntryList.size(), startIdx + (size_t)1024 );
|
||||
|
||||
printf( "Resolving symbols [%zu-%zu]\n", startIdx, batchEndIdx );
|
||||
|
||||
// generate a single addr2line cmd line for all addresses in one invocation
|
||||
// generate a single addr2line cmd line for as many addresses as fit the length budget
|
||||
std::stringstream ss;
|
||||
ss << m_addr2LinePath << " -C -f -e " << escapedPath << " -a ";
|
||||
for( ; entryIdx < batchEndIdx; entryIdx++ )
|
||||
ss << m_addr2LinePath << " -C -f" << m_addr2LineArgs << " -e " << escapedPath << " -a ";
|
||||
while( entryIdx < inputEntryList.size() )
|
||||
{
|
||||
const FrameEntry& entry = inputEntryList[entryIdx];
|
||||
ss << " 0x" << std::hex << entry.symbolOffset;
|
||||
entryIdx++;
|
||||
// always include at least one address, then stop once near the length limit
|
||||
if( static_cast<size_t>( ss.tellp() ) >= maxCmdLength ) break;
|
||||
}
|
||||
const size_t batchEndIdx = entryIdx;
|
||||
|
||||
std::string resultStr = ExecShellCommand( ss.str().c_str() );
|
||||
printf( "Resolving symbols [%zu-%zu]\n", startIdx, batchEndIdx );
|
||||
|
||||
std::string cmd = ss.str();
|
||||
#ifdef _WIN32
|
||||
// _popen runs the command through 'cmd.exe /c', which strips the outermost pair of
|
||||
// quotes. Wrap the whole command so the quoting around the (possibly spaced) tool
|
||||
// and image paths survives.
|
||||
cmd = "\"" + cmd + "\"";
|
||||
#endif
|
||||
|
||||
std::string resultStr = ExecShellCommand( cmd.c_str() );
|
||||
std::stringstream result( resultStr );
|
||||
|
||||
//printf("executing: '%s' got '%s'\n", ss.str().c_str(), result.str().c_str());
|
||||
@@ -147,13 +210,13 @@ public:
|
||||
|
||||
private:
|
||||
std::string m_addr2LinePath;
|
||||
std::string m_addr2LineArgs;
|
||||
};
|
||||
|
||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
bool ResolveSymbolsAddr2Line( const std::string& addr2lineToolPath, const std::string& addr2lineArgs,
|
||||
const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
{
|
||||
static SymbolResolver symbolResolver;
|
||||
static SymbolResolver symbolResolver( addr2lineToolPath, addr2lineArgs );
|
||||
return symbolResolver.ResolveSymbols( imagePath, inputEntryList, resolvedEntries );
|
||||
}
|
||||
|
||||
#endif // #ifndef _WIN32
|
||||
|
||||
@@ -122,8 +122,8 @@ private:
|
||||
|
||||
char SymbolResolver::s_symbolResolutionBuffer[symbolResolutionBufferSize];
|
||||
|
||||
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
bool ResolveSymbolsDbgHelp( const std::string& imagePath, const FrameEntryList& inputEntryList,
|
||||
SymbolEntryList& resolvedEntries )
|
||||
{
|
||||
static SymbolResolver resolver;
|
||||
return resolver.ResolveSymbolsForModule( imagePath, inputEntryList, resolvedEntries );
|
||||
|
||||
@@ -38,7 +38,12 @@ void Usage()
|
||||
printf( " c: context switches, s: sampling data, C: symbol code, S: source cache\n" );
|
||||
printf( " -c: scan for source files missing in cache and add if found\n" );
|
||||
printf( " -r: resolve symbols and patch callstack frames\n");
|
||||
printf( " -R: reset all callstack frame symbols to unresolved (e.g. to re-run resolution)\n");
|
||||
printf( " -p: substitute symbol resolution path with an alternative: \"REGEX_MATCH;REPLACEMENT\"\n");
|
||||
printf( " -a: path to a custom addr2line-compatible tool to use for symbol resolution\n");
|
||||
printf( " -A: extra arguments passed verbatim to the symbol resolution tool,\n");
|
||||
printf( " e.g. \"--relative-address\" for llvm-addr2line on PE/Mach-O images\n");
|
||||
printf( " -v: verbose output while resolving symbols\n");
|
||||
printf( " -j: number of threads to use for compression (-1 to use all cores)\n" );
|
||||
|
||||
exit( 1 );
|
||||
@@ -61,10 +66,14 @@ int main( int argc, char** argv )
|
||||
bool buildDict = false;
|
||||
bool cacheSource = false;
|
||||
bool resolveSymbols = false;
|
||||
bool resetSymbols = false;
|
||||
std::vector<std::string> pathSubstitutions;
|
||||
std::string addr2lineToolPath;
|
||||
std::string addr2lineArgs;
|
||||
bool verboseSymbols = false;
|
||||
|
||||
int c;
|
||||
while( ( c = getopt( argc, argv, "4hez:ds:crp:j:" ) ) != -1 )
|
||||
while( ( c = getopt( argc, argv, "4hez:ds:crRp:a:A:vj:" ) ) != -1 )
|
||||
{
|
||||
switch( c )
|
||||
{
|
||||
@@ -137,9 +146,21 @@ int main( int argc, char** argv )
|
||||
case 'r':
|
||||
resolveSymbols = true;
|
||||
break;
|
||||
case 'R':
|
||||
resetSymbols = true;
|
||||
break;
|
||||
case 'p':
|
||||
pathSubstitutions.push_back(optarg);
|
||||
break;
|
||||
case 'a':
|
||||
addr2lineToolPath = optarg;
|
||||
break;
|
||||
case 'A':
|
||||
addr2lineArgs = optarg;
|
||||
break;
|
||||
case 'v':
|
||||
verboseSymbols = true;
|
||||
break;
|
||||
case 'j':
|
||||
streams = atoi( optarg );
|
||||
break;
|
||||
@@ -171,7 +192,7 @@ int main( int argc, char** argv )
|
||||
{
|
||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||
const bool allowBgThreads = false;
|
||||
const bool allowStringModification = resolveSymbols;
|
||||
const bool allowStringModification = resolveSymbols || resetSymbols;
|
||||
tracy::Worker worker( *f, (tracy::EventType::Type)events, allowBgThreads, allowStringModification );
|
||||
|
||||
#ifndef TRACY_NO_STATISTICS
|
||||
@@ -181,7 +202,8 @@ int main( int argc, char** argv )
|
||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
if( cacheSource ) worker.CacheSourceFiles();
|
||||
if( resolveSymbols ) PatchSymbols( worker, pathSubstitutions );
|
||||
if( resetSymbols ) ResetSymbols( worker );
|
||||
if( resolveSymbols ) PatchSymbols( worker, pathSubstitutions, addr2lineToolPath, addr2lineArgs, verboseSymbols );
|
||||
|
||||
auto w = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output, clev, zstdLevel, streams ) );
|
||||
if( !w )
|
||||
|
||||
Reference in New Issue
Block a user