mirror of
https://github.com/wolfpld/tracy.git
synced 2026-07-03 04:28:54 +00:00
Compare commits
33 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d192badd23 | ||
|
|
6045199577 | ||
|
|
eb6c6a48f5 | ||
|
|
41fb476cb0 | ||
|
|
e93d72143b | ||
|
|
f12c4f3e88 | ||
|
|
7b12fcdacf | ||
|
|
ffdf5bbd95 | ||
|
|
6db81069bf | ||
|
|
8aac5d49d0 | ||
|
|
d062e1699d | ||
|
|
b3a9231808 | ||
|
|
a70ebef673 | ||
|
|
422229bf1a | ||
|
|
c7944fda98 | ||
|
|
235ac90b85 | ||
|
|
ab91480f6d | ||
|
|
9fed0ef938 | ||
|
|
7d68b16341 | ||
|
|
89254ab353 | ||
|
|
8588b8b4a6 | ||
|
|
a35e1e7a8c | ||
|
|
be5c94ee09 | ||
|
|
9b0044838e | ||
|
|
4af26880dd | ||
|
|
e7cb1fe52b | ||
|
|
4355c686af | ||
|
|
25a260dcd1 | ||
|
|
a298c4333e | ||
|
|
6054a301c2 | ||
|
|
40186956f6 | ||
|
|
92fded825e | ||
|
|
6b64fbc3be |
25
.appveyor.yml
Normal file
25
.appveyor.yml
Normal file
@@ -0,0 +1,25 @@
|
||||
version: '{build}'
|
||||
platform:
|
||||
- x64
|
||||
image:
|
||||
- Visual Studio 2019
|
||||
- Ubuntu1804
|
||||
install:
|
||||
- cmd: cd c:\tools\vcpkg
|
||||
- cmd: git pull
|
||||
- cmd: bootstrap-vcpkg.bat
|
||||
- cmd: vcpkg install freetype glfw3 --triplet x64-windows-static
|
||||
- cmd: vcpkg integrate install
|
||||
- cmd: cd %APPVEYOR_BUILD_FOLDER%
|
||||
build_script:
|
||||
- cmd: msbuild .\update\build\win32\update.vcxproj
|
||||
- cmd: msbuild .\profiler\build\win32\Tracy.vcxproj
|
||||
- cmd: msbuild .\capture\build\win32\capture.vcxproj
|
||||
- sh: sudo apt-get update && sudo apt-get -y install libglfw3-dev libgtk2.0-dev
|
||||
- sh: make -C update/build/unix debug release
|
||||
- sh: make -C profiler/build/unix debug release
|
||||
- sh: make -C capture/build/unix debug release
|
||||
- sh: make -C test
|
||||
- sh: make -C test clean
|
||||
- sh: make -C test TRACYFLAGS=-DTRACY_ON_DEMAND
|
||||
test: off
|
||||
1
.github/FUNDING.yml
vendored
1
.github/FUNDING.yml
vendored
@@ -1 +0,0 @@
|
||||
github: wolfpld
|
||||
BIN
.github/sponsor.png
vendored
BIN
.github/sponsor.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 1.0 KiB |
44
.github/workflows/gcc.yml
vendored
44
.github/workflows/gcc.yml
vendored
@@ -1,44 +0,0 @@
|
||||
name: gcc
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-22.04, macOS-latest]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Install linux libraries
|
||||
if: ${{ matrix.os == 'ubuntu-22.04' }}
|
||||
run: sudo apt-get update && sudo apt-get -y install libglfw3-dev libdbus-1-dev libcapstone-dev libtbb-dev libdebuginfod-dev
|
||||
- name: Install macos libraries
|
||||
if: ${{ matrix.os == 'macOS-latest' }}
|
||||
run: brew install capstone tbb pkg-config glfw
|
||||
- name: Profiler GUI
|
||||
run: make -j -C profiler/build/unix debug release
|
||||
- name: Update utility
|
||||
run: make -j -C update/build/unix debug release
|
||||
- name: Capture utility
|
||||
run: make -j -C capture/build/unix debug release
|
||||
- name: Csvexport utility
|
||||
run: make -j -C csvexport/build/unix debug release
|
||||
- name: Import-chrome utility
|
||||
run: make -j -C import-chrome/build/unix debug release
|
||||
- name: Library
|
||||
run: make -j -C library/unix debug release
|
||||
- name: Test application
|
||||
run: |
|
||||
make -j -C test
|
||||
make -j -C test clean
|
||||
make -j -C test TRACYFLAGS=-DTRACY_ON_DEMAND
|
||||
make -j -C test clean
|
||||
make -j -C test TRACYFLAGS="-DTRACY_DELAYED_INIT -DTRACY_MANUAL_LIFETIME"
|
||||
28
.github/workflows/latex.yml
vendored
28
.github/workflows/latex.yml
vendored
@@ -1,28 +0,0 @@
|
||||
name: Manual
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Fix stupidity
|
||||
run: |
|
||||
cp AUTHORS AUTHORS.
|
||||
cp LICENSE LICENSE.
|
||||
- name: Compile LaTeX
|
||||
uses: xu-cheng/latex-action@v2
|
||||
with:
|
||||
working_directory: manual
|
||||
root_file: tracy.tex
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: manual
|
||||
path: manual/tracy.pdf
|
||||
72
.github/workflows/msvc.yml
vendored
72
.github/workflows/msvc.yml
vendored
@@ -1,72 +0,0 @@
|
||||
name: MSVC
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build-with-vcpkg-integration:
|
||||
|
||||
runs-on: windows-2022
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: microsoft/setup-msbuild@v1.0.2
|
||||
- name: Integrate vcpkg
|
||||
run: vcpkg integrate install
|
||||
- name: Profiler GUI Debug
|
||||
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Profiler GUI Release
|
||||
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Update utility Debug
|
||||
run: msbuild .\update\build\win32\update.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Update utility Release
|
||||
run: msbuild .\update\build\win32\update.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Capture utility Debug
|
||||
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Capture utility Release
|
||||
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Csvexport utility Debug
|
||||
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Csvexport utility Release
|
||||
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Import-chrome utility Debug
|
||||
run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Debug /property:Platform=x64
|
||||
- name: Import-chrome utility Release
|
||||
run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Library
|
||||
run: msbuild .\library\win32\TracyProfiler.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Package binaries
|
||||
run: |
|
||||
mkdir bin
|
||||
mkdir bin\dev
|
||||
copy profiler\build\win32\x64\Release\Tracy.exe bin
|
||||
copy update\build\win32\x64\Release\update.exe bin
|
||||
copy capture\build\win32\x64\Release\capture.exe bin
|
||||
copy import-chrome\build\win32\x64\Release\import-chrome.exe bin
|
||||
copy csvexport\build\win32\x64\Release\csvexport.exe bin
|
||||
copy library\win32\x64\Release\TracyProfiler.dll bin\dev
|
||||
copy library\win32\x64\Release\TracyProfiler.lib bin\dev
|
||||
7z a Tracy.7z bin
|
||||
- uses: actions/upload-artifact@v2
|
||||
with:
|
||||
path: Tracy.7z
|
||||
|
||||
build-without-vcpkg-integration:
|
||||
|
||||
runs-on: windows-2022
|
||||
|
||||
env:
|
||||
VCPKG_ROOT: ''
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: microsoft/setup-msbuild@v1.0.2
|
||||
- name: Install vcpkg dependencies
|
||||
run: vcpkg/install_vcpkg_dependencies.bat
|
||||
- name: Profiler GUI Release
|
||||
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
- name: Capture utility Release
|
||||
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Release /property:Platform=x64
|
||||
20
.gitignore
vendored
20
.gitignore
vendored
@@ -5,12 +5,8 @@
|
||||
x64
|
||||
Release
|
||||
Debug
|
||||
_build
|
||||
_compiler
|
||||
tools/*
|
||||
*.d
|
||||
*.o
|
||||
*.so
|
||||
*.swp
|
||||
imgui.ini
|
||||
test/tracy_test
|
||||
@@ -22,21 +18,5 @@ manual/t*.out
|
||||
manual/t*.pdf
|
||||
manual/t*.synctex.gz
|
||||
manual/t*.toc
|
||||
manual/t*.bbl
|
||||
manual/t*.blg
|
||||
profiler/build/win32/packages
|
||||
profiler/build/win32/Tracy.aps
|
||||
# include the vcpkg install script but not the files it produces
|
||||
vcpkg/*
|
||||
!vcpkg/install_vcpkg_dependencies.bat
|
||||
/vcpkg_installed
|
||||
.deps/
|
||||
.dirstamp
|
||||
.vscode/
|
||||
|
||||
/_*/**
|
||||
/**/__pycache__/**
|
||||
extra/vswhere.exe
|
||||
extra/tracy-build
|
||||
/.cache
|
||||
compile_commands.json
|
||||
|
||||
17
.vscode/c_cpp_properties.json
vendored
17
.vscode/c_cpp_properties.json
vendored
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"/usr/include/freetype2"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/bin/clang++",
|
||||
"cStandard": "c11",
|
||||
"cppStandard": "c++17",
|
||||
"intelliSenseMode": "clang-x64"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
||||
58
.vscode/launch.json
vendored
58
.vscode/launch.json
vendored
@@ -1,58 +0,0 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Launch Profiler GUI",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/profiler/build/unix/Tracy-debug",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"preLaunchTask": "Build Profiler GUI",
|
||||
"console": "internalConsole",
|
||||
"internalConsoleOptions": "neverOpen"
|
||||
},
|
||||
{
|
||||
"name": "Launch capture tool",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/capture/build/unix/capture-debug",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"preLaunchTask": "Build capture tool"
|
||||
},
|
||||
{
|
||||
"name": "Launch update tool",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/update/build/unix/update-debug",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"preLaunchTask": "Build update tool"
|
||||
},
|
||||
{
|
||||
"name": "Launch test application",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/test/tracy_test",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}/test",
|
||||
"environment": [],
|
||||
"externalConsole": false,
|
||||
"preLaunchTask": "Build test application"
|
||||
},
|
||||
]
|
||||
}
|
||||
88
.vscode/tasks.json
vendored
88
.vscode/tasks.json
vendored
@@ -1,88 +0,0 @@
|
||||
{
|
||||
// See https://go.microsoft.com/fwlink/?LinkId=733558
|
||||
// for the documentation about the tasks.json format
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "Build Profiler GUI",
|
||||
"type": "shell",
|
||||
"command": "CC=clang CXX=clang++ make debug -C profiler/build/unix -j `nproc`",
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"presentation": {
|
||||
"close": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "Build capture tool",
|
||||
"type": "shell",
|
||||
"command": "CC=clang CXX=clang++ make debug -C capture/build/unix -j `nproc`",
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"presentation": {
|
||||
"close": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "Build csvexport tool",
|
||||
"type": "shell",
|
||||
"command": "CC=clang CXX=clang++ make debug -C csvexport/build/unix -j `nproc`",
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"presentation": {
|
||||
"close": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "Build import-chrome tool",
|
||||
"type": "shell",
|
||||
"command": "CC=clang CXX=clang++ make debug -C import-chrome/build/unix -j `nproc`",
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "Build update tool",
|
||||
"type": "shell",
|
||||
"command": "CC=clang CXX=clang++ make debug -C update/build/unix -j `nproc`",
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"presentation": {
|
||||
"close": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "Build client library",
|
||||
"type": "shell",
|
||||
"command": "CC=clang CXX=clang++ make debug -C library/unix -j `nproc`",
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"presentation": {
|
||||
"close": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "Build test application",
|
||||
"type": "shell",
|
||||
"command": "CC=clang CXX=clang++ make OPTFLAGS=-DTRACY_VERBOSE\\ -fno-omit-frame-pointer\\ -march=native\\ -g -C test -j `nproc`",
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
},
|
||||
"presentation": {
|
||||
"close": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
13
AUTHORS
13
AUTHORS
@@ -1,4 +1,4 @@
|
||||
Bartosz Taudul <wolf@nereid.pl>
|
||||
Bartosz Taudul <wolf.pld@gmail.com>
|
||||
Kamil Klimek <kamil.klimek@sharkbits.com> (initial find zone implementation)
|
||||
Bartosz Szreder <zgredder@gmail.com> (view/worker split)
|
||||
Arvid Gerstmann <dev@arvid-g.de> (compatibility fixes)
|
||||
@@ -7,14 +7,3 @@ Till Rathmann <till.rathmann@gmx.de> (DLL support)
|
||||
Sherief Farouk <sherief.personal@gmail.com> (compatibility fixes)
|
||||
Dedmen Miller <dedmen@dedmen.de> (find zone bug fixes, improvements)
|
||||
Michał Cichoń <michcic@gmail.com> (OSX call stack decoding backport)
|
||||
Thales Sabino <thales@codeplay.com> (OpenCL support)
|
||||
Andrew Depke <andrewdepke@gmail.com> (Direct3D 12 support)
|
||||
Simonas Kazlauskas <git@kazlauskas.me> (OSX CI, external bindings)
|
||||
Jakub Žádník <kubouch@gmail.com> (csvexport utility)
|
||||
Andrey Voroshilov <andrew.voroshilov@gmail.com> (multi-DLL fixes)
|
||||
Benoit Jacob <benoitjacob@google.com> (Android improvements)
|
||||
David Farrel <dafarrel@adobe.com> (Direct3D 11 support)
|
||||
Terence Rokop <rokopt@sharpears.net> (Non-reentrant zones)
|
||||
Lukas Berbuer <lukas.berbuer@gmail.com> (CMake integration)
|
||||
Xavier Bouchoux <xavierb@gmail.com> (sample data in find zone)
|
||||
Balazs Kovacsics <kovab93@gmail.com> (Universal Windows Platform)
|
||||
|
||||
132
CMakeLists.txt
132
CMakeLists.txt
@@ -1,132 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(Tracy LANGUAGES CXX)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
add_library(TracyClient TracyClient.cpp)
|
||||
target_compile_features(TracyClient PUBLIC cxx_std_11)
|
||||
target_include_directories(TracyClient SYSTEM PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||
$<INSTALL_INTERFACE:include>)
|
||||
target_link_libraries(
|
||||
TracyClient
|
||||
PUBLIC
|
||||
Threads::Threads
|
||||
${CMAKE_DL_LIBS}
|
||||
)
|
||||
|
||||
# Public dependency on some libraries required when using Mingw
|
||||
if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} MATCHES "GNU")
|
||||
target_link_libraries(TracyClient PUBLIC ws2_32 dbghelp)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
find_library(EXECINFO_LIBRARY NAMES execinfo REQUIRED)
|
||||
target_link_libraries(TracyClient PUBLIC ${EXECINFO_LIBRARY})
|
||||
endif()
|
||||
|
||||
add_library(Tracy::TracyClient ALIAS TracyClient)
|
||||
|
||||
macro(set_option option help value)
|
||||
option(${option} ${help} ${value})
|
||||
if(${option})
|
||||
message(STATUS "${option}: ON")
|
||||
target_compile_definitions(TracyClient PUBLIC ${option})
|
||||
else()
|
||||
message(STATUS "${option}: OFF")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
set_option(TRACY_ENABLE "Enable profiling" ON)
|
||||
set_option(TRACY_ON_DEMAND "On-demand profiling" OFF)
|
||||
set_option(TRACY_CALLSTACK "Enfore callstack collection for tracy regions" OFF)
|
||||
set_option(TRACY_NO_CALLSTACK "Disable all callstack related functionality" OFF)
|
||||
set_option(TRACY_NO_CALLSTACK_INLINES "Disables the inline functions in callstacks" OFF)
|
||||
set_option(TRACY_ONLY_LOCALHOST "Only listen on the localhost interface" OFF)
|
||||
set_option(TRACY_NO_BROADCAST "Disable client discovery by broadcast to local network" OFF)
|
||||
set_option(TRACY_ONLY_IPV4 "Tracy will only accept connections on IPv4 addresses (disable IPv6)" OFF)
|
||||
set_option(TRACY_NO_CODE_TRANSFER "Disable collection of source code" OFF)
|
||||
set_option(TRACY_NO_CONTEXT_SWITCH "Disable capture of context switches" OFF)
|
||||
set_option(TRACY_NO_EXIT "Client executable does not exit until all profile data is sent to server" OFF)
|
||||
set_option(TRACY_NO_SAMPLING "Disable call stack sampling" OFF)
|
||||
set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF)
|
||||
set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF)
|
||||
set_option(TRACY_NO_FRAME_IMAGE "Disable the frame image support and its thread" OFF)
|
||||
set_option(TRACY_NO_SYSTEM_TRACING "Disable systrace sampling" OFF)
|
||||
set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF)
|
||||
set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF)
|
||||
set_option(TRACY_FIBERS "Enable fibers support" OFF)
|
||||
set_option(TRACY_NO_CRASH_HANDLER "Disable crash handling" OFF)
|
||||
set_option(TRACY_TIMER_FALLBACK "Use lower resolution timers" OFF)
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_compile_definitions(TracyClient PRIVATE TRACY_EXPORTS)
|
||||
target_compile_definitions(TracyClient PUBLIC TRACY_IMPORTS)
|
||||
endif()
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
set(includes
|
||||
${CMAKE_CURRENT_LIST_DIR}/TracyC.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/Tracy.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/TracyD3D11.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/TracyD3D12.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/TracyLua.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/TracyOpenCL.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/TracyOpenGL.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/TracyVulkan.hpp)
|
||||
|
||||
set(client_includes
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/tracy_concurrentqueue.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/tracy_rpmalloc.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/tracy_SPSCQueue.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyArmCpuTable.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyCallstack.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyCallstack.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyDebug.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyDxt1.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyFastVector.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyLock.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyProfiler.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyRingBuffer.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyScoped.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyStringHelpers.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracySysTime.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracySysTrace.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/client/TracyThread.hpp)
|
||||
|
||||
set(common_includes
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/tracy_lz4.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/tracy_lz4hc.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyAlign.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyAlign.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyAlloc.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyApi.h
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyColor.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyForceInline.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyMutex.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyProtocol.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyQueue.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracySocket.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyStackFrames.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracySystem.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyUwp.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/common/TracyYield.hpp)
|
||||
|
||||
install(TARGETS TracyClient
|
||||
EXPORT TracyConfig
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
install(FILES ${includes}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
install(FILES ${client_includes}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/client)
|
||||
install(FILES ${common_includes}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common)
|
||||
install(EXPORT TracyConfig
|
||||
NAMESPACE Tracy::
|
||||
FILE TracyConfig.cmake
|
||||
DESTINATION share/Tracy)
|
||||
4
LICENSE
4
LICENSE
@@ -1,7 +1,7 @@
|
||||
Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the
|
||||
Tracy Profiler (https://bitbucket.org/wolfpld/tracy) is licensed under the
|
||||
3-clause BSD license.
|
||||
|
||||
Copyright (c) 2017-2022, Bartosz Taudul <wolf@nereid.pl>
|
||||
Copyright (c) 2017-2020, Bartosz Taudul <wolf.pld@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
428
NEWS
428
NEWS
@@ -2,422 +2,9 @@ Note: There is no guarantee that version mismatched client and server will
|
||||
be able to talk with each other. Network protocol breakages won't be listed
|
||||
here.
|
||||
|
||||
v0.8.2 (2022-06-28)
|
||||
-------------------
|
||||
|
||||
- Added support for debuginfod debug information services. Note that
|
||||
since this depends on proper system configuration, vendors providing
|
||||
the debug information, and network retrieval, it is disabled by
|
||||
default. To enable, compile the profiled application with the
|
||||
TRACY_DEBUGINFOD define and link with libdebuginfod.
|
||||
- When Tracy server-side utilities are build with MSVC, the required
|
||||
libraries will be now automatically retrieved and built with vcpkg.
|
||||
- Added microarchitecture data for: Bonnell, Airmont, Goldmont, Goldmont
|
||||
Plus, Tremont.
|
||||
- Recognize additional CPUIDs of Zen 3, Alder Lake, Ice Lake
|
||||
microarchitectures.
|
||||
- Assembly line width will be now extended, if needed. Previously the line
|
||||
width was calculated for the initial layout and changing amount of
|
||||
displayed data (especially listing the read/written registers) didn't
|
||||
affect this, which may have made some lines partially unreadable.
|
||||
- Added ability to filter call stacks in memory tab by inactive allocations.
|
||||
Filtering by inactive allocations helps to pinpoint wasteful allocations
|
||||
in the program.
|
||||
- Plot graph will no longer display min/max values interpolated for
|
||||
animation, but rather true values.
|
||||
- The CPU topology tree structure was replaced by a CPU schematic showing
|
||||
the same thing in a more concise way.
|
||||
|
||||
|
||||
v0.8.1 (2022-04-21)
|
||||
-------------------
|
||||
|
||||
- Support for pre-0.7 traces has been dropped.
|
||||
- Update utility can now scan for source files missing in the trace cache,
|
||||
if the '-c' parameter is given. Found files will be added to the cache.
|
||||
- Added high-priority queue for fast queries to bypass slow symbol queries.
|
||||
- Fixed Android documentation to show how to enable context switch tracing.
|
||||
- Workaround MSVC 2015 stupidity which prevented compilation as C++11.
|
||||
- Added support for showing branch cost data for CPUs that don't report
|
||||
branch retirement events (but do report branch misses).
|
||||
- The right-click context menu available for jump arrows in the symbol view
|
||||
window will now additionally display jump context, i.e. jump sources and
|
||||
jump target source code fragments.
|
||||
- Added freedesktop.org compliant desktop entry and MIME type definition.
|
||||
- The call stack column in list of messages will now be only displayed when
|
||||
at least one message on the list has call stack data.
|
||||
- File dialogs on Unix will be now native to the desktop environment you are
|
||||
using. Note that this relies on xdg-desktop-portal and dbus.
|
||||
|
||||
|
||||
v0.8.0 (2022-03-28)
|
||||
-------------------
|
||||
|
||||
- Support for Cygwin has been dropped. It was not working for a very long
|
||||
time and nobody had complained about it.
|
||||
- Mingw is deprecated due to lack of interest.
|
||||
- Added TRACY_NO_CALLSTACK_INLINES macro to disable inline functions
|
||||
resolution in call stacks on Windows.
|
||||
- Improved function matching algorithm in compare traces view.
|
||||
- Added CMake integration.
|
||||
- Reworked rpmalloc initialization.
|
||||
- Fixed display of messages with newlines on messages list.
|
||||
- Excluded some uninteresting wrapper functions from call stacks (for
|
||||
example SIMD pass-through intrinsics to the compiler built-ins).
|
||||
- Adjusted coloring of instruction hotness in symbol view.
|
||||
- Properly handle rare cases when sampling on Linux is momentary not able to
|
||||
resolve time stamps.
|
||||
- Added Rocket Lake microarchitectural data.
|
||||
- Updated CPU identifier lists.
|
||||
- Implemented GPU timer overflow handling heuristics.
|
||||
- Assembly instructions are now assigned to inline symbols.
|
||||
- You can not only see the assembly source file and line, but also the
|
||||
originating function.
|
||||
- If symbol view is restricted to a single inline function, all assembly
|
||||
instructions not in this context will be dimmed out.
|
||||
- Likewise, the navigation in assembly code will be limited just to the
|
||||
inline context, if a single function is selected.
|
||||
- Kernel call stacks will be now properly captured and displayed in the
|
||||
profiler. Kernel functions are marked with the red color.
|
||||
- The CPU hardware performance counters can be now sampled on Linux.
|
||||
- Three inferred statistics are displayed for lines in both source and
|
||||
assembly code in the symbol view window:
|
||||
- Instructions executed per cycle.
|
||||
- Branch miss rate.
|
||||
- Cache miss rate.
|
||||
- Instruction cost estimation method is no longer tied to software call
|
||||
stack sampling.
|
||||
- The image name filter entry field is now providing a list of available
|
||||
images.
|
||||
- Reentrant function calls may be now excluded from calculations in the
|
||||
statistics view.
|
||||
- Crash handler is now properly removed during profiler destruction.
|
||||
- Repeatedly right-clicking on the same source line in the symbol view
|
||||
window will now cycle through assembly blocks associated with this source
|
||||
line.
|
||||
- Vulkan headers must be now explicitly included before including
|
||||
TracyVulkan.hpp.
|
||||
- The capture utility may now limit capture time to a specified number of
|
||||
seconds.
|
||||
- Fixed message thread assignment in the import-chrome utility.
|
||||
- Sampling data can be now also found in the find zone menu.
|
||||
- Instrumentation failures may now display their context, e.g. the zone text
|
||||
that was to be set.
|
||||
- A warning is now displayed when sampling data is out-of-order.
|
||||
- Average value for plots can be now viewed.
|
||||
- Moved symbol resolution to a separate thread. Profiling will no longer be
|
||||
stuck when there is a large number of symbols to resolve. This not only
|
||||
improves user experience, but also prevents buildup of data (and memory
|
||||
consumption) on the client side.
|
||||
- Android device name will be now reported.
|
||||
- Added support for capturing fibers.
|
||||
- Fibers require additional processing, which has to be enabled by adding
|
||||
the TRACY_FIBERS define on the client side.
|
||||
- Client code requires additional instrumentation using the new macros
|
||||
TracyFiberEnter and TracyFiberLeave (or the corresponding C API
|
||||
variants).
|
||||
- Fibers are represented in traces as separate threads, and are
|
||||
distinguished by green color. Faux context switch regions are used to
|
||||
indicate when a fiber is being run by the worker thread.
|
||||
- Continuous frame marks no longer need to be issued from a single thread.
|
||||
- Context switch call stacks are now captured on Windows and Linux.
|
||||
- Hovering the context switch wait region will now display wait stack,
|
||||
which may provide additional insight into why the switch happened.
|
||||
- Wait stacks inspection can be performed in a new view.
|
||||
- Stacks can be limited to certain threads and to a selected time range.
|
||||
- Stacks are presented either as a sorted list, or as a bottom-up and
|
||||
top-down trees.
|
||||
- Entry call stacks can be now also viewed as a bottom-up and top-down
|
||||
trees.
|
||||
- Updated project build files to MSVC 2022.
|
||||
- Call stack tooltips now also show the executable image name.
|
||||
- Playback frames can be now changed by interacting with the frame image
|
||||
slider using the mouse wheel.
|
||||
- Signal used to handle crashes on Linux can be now redefined.
|
||||
- Various DPI scaling improvements.
|
||||
- User interface can be now scaled in run time.
|
||||
- Symbol code retrieval now also supports kernel on Windows.
|
||||
- Added low-level C API interface for GPU zones.
|
||||
- Symbol child calls can be now listed.
|
||||
- Replaced "restrict time" in memory window with a proper time range limit.
|
||||
- Added Alder Lake microarchitectural data.
|
||||
- Added GPU zone statistics.
|
||||
- Universal Windows Platform support.
|
||||
- All call stack related functionality can be now disabled with the
|
||||
TRACY_NO_CALLSTACK macro.
|
||||
- Added ability to add full-view annotations from the annotations list
|
||||
window.
|
||||
|
||||
|
||||
v0.7.8 (2021-05-19)
|
||||
-------------------
|
||||
|
||||
- Updated Zen 3 and added Tiger Lake microarchitectural data.
|
||||
- Manually disconnecting from the server will no longer display erroneous
|
||||
warning message.
|
||||
- Added ability to display sample time spent in child function calls.
|
||||
- Fixed issue which may have prevented sampling on ARM64.
|
||||
- Added TRACY_NO_FRAME_IMAGE macro to disable frame image compression
|
||||
thread.
|
||||
- Ctrl and shift keys will now modify mouse wheel zoom speed.
|
||||
- Improved user experience in the symbol view window.
|
||||
- Added support for Direct3D 11 instrumentation.
|
||||
- Vulkan contexts can be now calibrated on Linux.
|
||||
- Support loading zstd-compressed chrome traces.
|
||||
- Chrome traces with multiple PID entries (and possibly conflicting TIDs)
|
||||
can be now imported.
|
||||
- Added support for custom source location tag ("loc") in chrome traces.
|
||||
- Sampling frequency can be now controlled using TRACY_SAMPLING_HZ macro.
|
||||
- Trace compression can be now selected when saving a trace.
|
||||
- If a trace cannot be saved, a failure dialog will be displayed.
|
||||
- Run-time memory usage of frame images can be reduced by calculating
|
||||
a compression dictionary. This can be only performed when a trace is saved
|
||||
or through the update utility.
|
||||
|
||||
|
||||
v0.7.7 (2021-04-01)
|
||||
-------------------
|
||||
|
||||
- Linux crash handler will now also catch SIGABRT.
|
||||
- Fixed invalid name assignment to source files discovered client-side.
|
||||
- Added ability to check if a zone is active (which may be used to avoid
|
||||
preparing zone text, etc., as it wouldn't be used anyway).
|
||||
- Improved sorting behavior of internal vectors.
|
||||
- Some data will now be always properly displayed during live capture.
|
||||
This was not particularly visible before, as it mainly concerns edge
|
||||
cases.
|
||||
- Sorting is performed only as needed.
|
||||
- In case of plots the performance during live capture may be decreased,
|
||||
as these were sorted with at least 0.25 second intervals before. Now
|
||||
the sorting is performed every frame.
|
||||
- Some other data, which previously was not sorted, is sorted now.
|
||||
- In headless capture mode sorting will be only performed when the trace
|
||||
is saved to disk.
|
||||
- Fixed some typos in macros.
|
||||
- Fixed handling of non-ANSI file names on Windows. You can now name your
|
||||
traces 'ęśąćż.tracy' and it should work as intended. This is supported on
|
||||
Windows 10 release 1903 and newer.
|
||||
- Fixed sending GPU context name in on-demand mode.
|
||||
- Fixed color channel order in ZoneColor() macro.
|
||||
- Handle failure state when a memory pointer allocation is reported twice,
|
||||
without an intermediate free.
|
||||
- Renamed "call stack parents" to "entry call stacks".
|
||||
- Display number of entry call stacks in assembly line sample count tooltip.
|
||||
- Added tooltips with preview of source code in various places in the UI.
|
||||
|
||||
|
||||
v0.7.6 (2021-02-06)
|
||||
-------------------
|
||||
|
||||
- Various fixes in build scripts.
|
||||
- Fixed a faulty rpmalloc initialization path when the first thing the
|
||||
thread did was sending a message with call stack.
|
||||
- Added fallback timer define for various virtualized environments, which
|
||||
may not be able to access the hardware timer registers. This will result
|
||||
in usage of timer provided by the standard library, with reduced
|
||||
resolution.
|
||||
- Further OpenCL improvements.
|
||||
- Updated libbacktrace.
|
||||
- Adds Mach-O 64-bit FAT support.
|
||||
- Fixes memory corruption when processing Mach-O data.
|
||||
- Fixes missing matching entries during binary search.
|
||||
- Adds support for MiniDebugInfo.
|
||||
- Adds fallback to ELF symbol table if no debug info is available.
|
||||
- Various other fixes.
|
||||
- Store build time of profiled program in captures.
|
||||
- GPU contexts can be now named.
|
||||
- Implemented client -> server source code transfer.
|
||||
|
||||
|
||||
v0.7.5 (2021-01-23)
|
||||
-------------------
|
||||
|
||||
- More robust handling of system tracing on Android.
|
||||
- Added warning dialog when the connection is lost before all needed data
|
||||
can be retrieved.
|
||||
- Fixed handling of NaN plot entries (by skipping them).
|
||||
- Dynamic zone colors are now supported through the ZoneColor() macro.
|
||||
- Fixed Arm machine code printout to match the one printed by objdump.
|
||||
- Fixed client memory corruption when using colored messages.
|
||||
- Switched to the next-gen ImGui table UI.
|
||||
- Table columns can have their order rearranged, can be hidden, can be
|
||||
sorted both in ascending and descending order (where appropriate).
|
||||
- Table columns state is now preserved between runs.
|
||||
- Various fixes related to restricting listening to localhost.
|
||||
- Improved compatibility of ETW tracing with non-MSVC compilers.
|
||||
- Fixed Vulkan call stack transfer.
|
||||
- Added support for transient GPU zones (OpenGL, Vulkan, Direct3D 12).
|
||||
- OpenCL fixes for assert-less builds and non-active zones.
|
||||
- Added support for thread names and title bar description in traces
|
||||
imported from chrome tracing format.
|
||||
|
||||
|
||||
v0.7.4 (2020-11-15)
|
||||
-------------------
|
||||
|
||||
- Added support for user-provided locks to keep dbghelp calls thread-safe.
|
||||
- Call stacks can be now copied to clipboard.
|
||||
- Allow more control over which automated captures are performed.
|
||||
- Added textual descriptions for some assembly instructions.
|
||||
- Profiler memory usage is now also displayed as a percentage of available
|
||||
physical memory.
|
||||
- Microarchitecture mismatch is now clearly displayed in the source view
|
||||
window.
|
||||
- Added Zen 3 and Cascade Lake microarchitectural data.
|
||||
- Ghost zones are now supporting all zone coloring modes and namespace
|
||||
shortening.
|
||||
- Extend C API to support memory pools.
|
||||
- Frame rate targets can be now visually represented on the timeline view.
|
||||
|
||||
|
||||
v0.7.3 (2020-10-06)
|
||||
-------------------
|
||||
|
||||
- Properly support DPI scaling on Linux (requires GLFW 3.3).
|
||||
- Added early checks for output file validity in the capture utility.
|
||||
- Improvements to presence broadcast handling.
|
||||
- Custom zone colors can be optionally ignored.
|
||||
- Added support for tracking multiple memory pools.
|
||||
- Memory free failure dialog can now show call stack pointing to the failure
|
||||
location.
|
||||
- Added support for Wayland on Linux.
|
||||
- If during the first 5 seconds of the trace there are no frames being
|
||||
reported, the profiler will switch to following last 5 seconds of the
|
||||
trace, instead of displaying three last frames.
|
||||
|
||||
|
||||
v0.7.2 (2020-09-14)
|
||||
-------------------
|
||||
|
||||
- Note: the bitbucket repository is obsolete and will soon stop receiving
|
||||
updates. Migrate to https://github.com/wolfpld/tracy, if you haven't
|
||||
already.
|
||||
- The "waiting for connection" dialog no longer has "cancel" button. To
|
||||
abort connection attempt just use the "close window" button.
|
||||
- Added update notification.
|
||||
- The most recent traced events can be now viewed regardless of timeline
|
||||
zoom level.
|
||||
- Fixed going-to-line in source view (again).
|
||||
- Crash handling on client is now not performed, if there is no active
|
||||
connection.
|
||||
- Added ability to listen only on IPv4 interfaces.
|
||||
|
||||
|
||||
v0.7.1 (2020-08-24)
|
||||
-------------------
|
||||
|
||||
- Dropped support for pre-v0.6 traces.
|
||||
- Fixed regression on non-AVX2 CPUs.
|
||||
- Fixed incorrect calculation of some ghost zones.
|
||||
- Added list of cached source files.
|
||||
- Added import of plot data.
|
||||
- Secure versions of alloc/free macros.
|
||||
- Automated tracing of vertical synchronization on Windows.
|
||||
- Fixed attachment of postponed frame images.
|
||||
- Source location data can be now copied to clipboard from zone info window.
|
||||
- Zones in find zones menu can be now grouped by zone name.
|
||||
- Vulkan and D3D12 GPU contexts can be now calibrated.
|
||||
- Added CSV export utility.
|
||||
- "Go to frame" popup no longer has a dedicated button. To show it, click on
|
||||
the frame counter.
|
||||
- Added macro for checking if profiler is connected.
|
||||
- Implemented optional data removal from traces in the update utility.
|
||||
- Allow manual management of profiler lifetime.
|
||||
- Adjusted priority of ETW threads to time critical.
|
||||
- Annotations can be now freely adjusted on the timeline.
|
||||
- Limiting time range for find zone functionality has been significantly
|
||||
improved.
|
||||
- Added time range limits for statistics and symbol view.
|
||||
- Implemented call stack sampling on Linux (including Android).
|
||||
- Exact time from start of profiling session can be now viewed by hovering
|
||||
the mouse over the time scale.
|
||||
- Code transfer can be now compiled-out.
|
||||
- Added support for zone markup in unloadable modules.
|
||||
- Added image name filter to sampling statistics results window.
|
||||
|
||||
|
||||
v0.7 (2020-06-11)
|
||||
-----------------
|
||||
|
||||
This is the last release which will be able to load pre-v0.6 traces. Use the
|
||||
update utility to convert your old traces now!
|
||||
|
||||
- chrome:tracing importer now imports zone metadata from "args" key.
|
||||
- Added display of statistical mode to find zone menu.
|
||||
- Automatic stack sampling is now available on windows.
|
||||
- Properly handle tracing on long-running systems.
|
||||
- Message list entries can now show associated frame image.
|
||||
- Call stack window will now display module names.
|
||||
- Symbol location in call stack window may now also display symbol address.
|
||||
- Statistics menu can now be used to display call stack sampling data or
|
||||
list available symbols.
|
||||
- All call paths leading to the sampled instruction in a call stack can be
|
||||
now displayed.
|
||||
- Frame image compression ratio (lossless in-memory compression, not taking
|
||||
into account DXT compression) is displayed in playback window.
|
||||
- Allow reconnection straight from the discard data dialog.
|
||||
- Added ability to set custom names for locks.
|
||||
- Improved handling of network ports.
|
||||
- Added time percentage display to instrumentation statistics.
|
||||
- Display of ghost zones (generated from automated call stack sampling).
|
||||
- Notify when empty labels display is enabled.
|
||||
- Small fragments of executable code will be now sent from client to server.
|
||||
- Added notification about query backlog.
|
||||
- Fixed performance problem with query backlog.
|
||||
- Display number of in-flight queries, in addition to query backlog.
|
||||
- Improved failure reports.
|
||||
- The capture utility will connect to localhost by default.
|
||||
- Added optional support for QPC timer on windows.
|
||||
- Complete rewrite of source file viewer. It is now 100% reliable when going
|
||||
to a source location.
|
||||
- Symbol source view was added.
|
||||
- Extension of source file viewer.
|
||||
- Can display source file, assembly view, or both at the same time.
|
||||
- May include display of statistical profiling data.
|
||||
- Ability to switch between source files which were used to build the
|
||||
symbol.
|
||||
- Ability to switch between inlined functions which are incorporated into
|
||||
the symbol.
|
||||
- Graphical representation of control flow in program.
|
||||
- Display of micro-architectural data for each assembly instruction.
|
||||
- Tracking register dependencies between assembly instructions.
|
||||
- Disassembly may be saved to a file, in order to be processed by external
|
||||
tools.
|
||||
- If the default listening port is occupied, profiler will now try listening
|
||||
on other ports.
|
||||
- Added possibility to perform source file names substitution.
|
||||
- Profiler windows can be now docked.
|
||||
- CPU usage tooltip now displays a list of running threads.
|
||||
- Added possibility to filter discovered clients list.
|
||||
- Source files are now cached during capture.
|
||||
- Profiler will now display a popup when application crashes.
|
||||
- Added ability to send simple integral values as extra payload for zones.
|
||||
- Per-frame zone times on the frames plot can now display self time.
|
||||
- Ability to bind only on localhost interface.
|
||||
- OpenCL profiling.
|
||||
- Direct3D 12 profiling.
|
||||
|
||||
|
||||
v0.6.3 (2020-02-13)
|
||||
-------------------
|
||||
|
||||
- Fixed performance issues with loading saved traces on Ryzen CPUs.
|
||||
- Profiler window contents are now properly updated during window resize.
|
||||
- Improved tid to pid mapping on windows.
|
||||
- Zero length and unfinished zones are no longer taken into account for
|
||||
statistics.
|
||||
- Build files for shared library are now available (experimental).
|
||||
- GPU zones now also have "active" parameter.
|
||||
- Further reduction of memory usage and on-disk trace size.
|
||||
- Replaced ska::flat_hash_map with robin-hood-hashing.
|
||||
- Speed-up rendering of long lists of items.
|
||||
- Exact event time is displayed in some places in the UI.
|
||||
- Memory allocation lists can now be sorted.
|
||||
- Added display of trace file compression ratio.
|
||||
- Optional Zstd compression of trace files.
|
||||
- Frame images are now internally compressed using Zstd (instead of LZ4).
|
||||
- Fix display of continuous frame set tooltips.
|
||||
|
||||
Note: Release numbers are nothing more than numbers. There are some
|
||||
"missing" versions due to trace file changes during development. This is not
|
||||
a mistake.
|
||||
|
||||
v0.6.2 (2019-12-30)
|
||||
-------------------
|
||||
@@ -433,7 +20,6 @@ v0.6.2 (2019-12-30)
|
||||
- Highlight hovered zone from find zone menu zone list on the histogram.
|
||||
- Allow copying user data directory location to the clipboard.
|
||||
|
||||
|
||||
v0.6.1 (2019-11-28)
|
||||
-------------------
|
||||
|
||||
@@ -445,7 +31,6 @@ v0.6.1 (2019-11-28)
|
||||
- Client parameters may be now set from the server.
|
||||
- Minor UI fixes.
|
||||
|
||||
|
||||
v0.6 (2019-11-17)
|
||||
-----------------
|
||||
|
||||
@@ -505,7 +90,6 @@ update utility to convert your old traces now!
|
||||
- Implemented configuration of plots.
|
||||
- Messages can now collect call stacks.
|
||||
|
||||
|
||||
v0.5 (2019-08-10)
|
||||
-----------------
|
||||
|
||||
@@ -593,7 +177,6 @@ update utility to convert your old traces now!
|
||||
- GPU drift value can be now automatically measured.
|
||||
- Connection window is now a popup hidden under a dedicated button.
|
||||
|
||||
|
||||
v0.4.1 (2018-12-30)
|
||||
-------------------
|
||||
|
||||
@@ -621,7 +204,6 @@ v0.4.1 (2018-12-30)
|
||||
- Pressing enter key after entering client address in the welcome dialog
|
||||
will now automatically begin connection process.
|
||||
|
||||
|
||||
v0.4 (2018-10-09)
|
||||
-----------------
|
||||
|
||||
@@ -753,8 +335,8 @@ v0.4 (2018-10-09)
|
||||
- The capture utility will now display time span of the ongoing capture.
|
||||
|
||||
|
||||
v0.3 (2018-07-03)
|
||||
-----------------
|
||||
v0.3.3 (2018-07-03)
|
||||
-------------------
|
||||
|
||||
- Breaking change: the format of trace files has changed.
|
||||
- Previous tracy version will crash when trying to open new traces.
|
||||
|
||||
22
README.md
22
README.md
@@ -1,25 +1,19 @@
|
||||
# Tracy Profiler
|
||||
|
||||
[](https://github.com/sponsors/wolfpld/)
|
||||
|
||||
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
|
||||
|
||||
Tracy supports profiling CPU (Direct support is provided for C, C++, and Lua integration. At the same time, third-party bindings to many other languages exist on the internet, such as Rust, Zig, OCaml, Odin, etc.), GPU (All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, OpenCL.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
||||
|
||||
- [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
|
||||
- [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
|
||||
- [Changelog](NEWS)
|
||||
[](https://ci.appveyor.com/project/wolfpld/tracy/branch/master)
|
||||
|
||||

|
||||
|
||||

|
||||
### A real time, nanosecond resolution, remote telemetry frame profiler for games and other applications.
|
||||
|
||||

|
||||
Tracy supports profiling CPU (C, C++11, Lua), GPU (OpenGL, Vulkan), memory, locks, context switches, per-frame screenshots and more.
|
||||
|
||||
For usage instructions, consult the user manual [at the following address](https://bitbucket.org/wolfpld/tracy/downloads/tracy.pdf).
|
||||
|
||||
[Changelog](NEWS)
|
||||
|
||||
[Introduction to Tracy Profiler v0.2](https://www.youtube.com/watch?v=fB5B46lbapc)
|
||||
[New features in Tracy Profiler v0.3](https://www.youtube.com/watch?v=3SXpDpDh2Uo)
|
||||
[New features in Tracy Profiler v0.4](https://www.youtube.com/watch?v=eAkgkaO8B9o)
|
||||
[New features in Tracy Profiler v0.5](https://www.youtube.com/watch?v=P6E7qLMmzTQ)
|
||||
[New features in Tracy Profiler v0.6](https://www.youtube.com/watch?v=uJkrFgriuOo)
|
||||
[New features in Tracy Profiler v0.7](https://www.youtube.com/watch?v=_hU7vw00MZ4)
|
||||
[New features in Tracy Profiler v0.8](https://www.youtube.com/watch?v=30wpRpHTTag)
|
||||
[New features in Tracy Profiler v0.6](https://www.youtube.com/watch?v=uJkrFgriuOo)
|
||||
|
||||
7
TODO
7
TODO
@@ -1,7 +0,0 @@
|
||||
"Would be nice to have" list for 1.0 release:
|
||||
=============================================
|
||||
|
||||
* Pack queue items tightly in the queues.
|
||||
* Use level-of-detail system for plots.
|
||||
* Use per-thread lock data structures.
|
||||
* Use DTrace for BSD/OSX context switch capture.
|
||||
138
Tracy.hpp
138
Tracy.hpp
@@ -11,24 +11,13 @@
|
||||
#define ZoneNamedC(x,y,z)
|
||||
#define ZoneNamedNC(x,y,z,w)
|
||||
|
||||
#define ZoneTransient(x,y)
|
||||
#define ZoneTransientN(x,y,z)
|
||||
|
||||
#define ZoneScoped
|
||||
#define ZoneScopedN(x)
|
||||
#define ZoneScopedC(x)
|
||||
#define ZoneScopedNC(x,y)
|
||||
|
||||
#define ZoneText(x,y)
|
||||
#define ZoneTextV(x,y,z)
|
||||
#define ZoneName(x,y)
|
||||
#define ZoneNameV(x,y,z)
|
||||
#define ZoneColor(x)
|
||||
#define ZoneColorV(x,y)
|
||||
#define ZoneValue(x)
|
||||
#define ZoneValueV(x,y)
|
||||
#define ZoneIsActive false
|
||||
#define ZoneIsActiveV(x) false
|
||||
|
||||
#define FrameMark
|
||||
#define FrameMarkNamed(x)
|
||||
@@ -44,7 +33,6 @@
|
||||
#define LockableBase( type ) type
|
||||
#define SharedLockableBase( type ) type
|
||||
#define LockMark(x) (void)x;
|
||||
#define LockableName(x,y,z);
|
||||
|
||||
#define TracyPlot(x,y)
|
||||
#define TracyPlotConfig(x,y)
|
||||
@@ -57,22 +45,12 @@
|
||||
|
||||
#define TracyAlloc(x,y)
|
||||
#define TracyFree(x)
|
||||
#define TracySecureAlloc(x,y)
|
||||
#define TracySecureFree(x)
|
||||
|
||||
#define TracyAllocN(x,y,z)
|
||||
#define TracyFreeN(x,y)
|
||||
#define TracySecureAllocN(x,y,z)
|
||||
#define TracySecureFreeN(x,y)
|
||||
|
||||
#define ZoneNamedS(x,y,z)
|
||||
#define ZoneNamedNS(x,y,z,w)
|
||||
#define ZoneNamedCS(x,y,z,w)
|
||||
#define ZoneNamedNCS(x,y,z,w,a)
|
||||
|
||||
#define ZoneTransientS(x,y,z)
|
||||
#define ZoneTransientNS(x,y,z,w)
|
||||
|
||||
#define ZoneScopedS(x)
|
||||
#define ZoneScopedNS(x,y)
|
||||
#define ZoneScopedCS(x,y)
|
||||
@@ -80,13 +58,6 @@
|
||||
|
||||
#define TracyAllocS(x,y,z)
|
||||
#define TracyFreeS(x,y)
|
||||
#define TracySecureAllocS(x,y,z)
|
||||
#define TracySecureFreeS(x,y)
|
||||
|
||||
#define TracyAllocNS(x,y,z,w)
|
||||
#define TracyFreeNS(x,y,z)
|
||||
#define TracySecureAllocNS(x,y,z,w)
|
||||
#define TracySecureFreeNS(x,y,z)
|
||||
|
||||
#define TracyMessageS(x,y,z)
|
||||
#define TracyMessageLS(x,y)
|
||||
@@ -95,35 +66,23 @@
|
||||
|
||||
#define TracyParameterRegister(x)
|
||||
#define TracyParameterSetup(x,y,z,w)
|
||||
#define TracyIsConnected false
|
||||
|
||||
#define TracyFiberEnter(x)
|
||||
#define TracyFiberLeave
|
||||
|
||||
#else
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "client/TracyLock.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyScoped.hpp"
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
|
||||
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, TRACY_CALLSTACK, active );
|
||||
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
|
||||
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, active );
|
||||
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
|
||||
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
#endif
|
||||
|
||||
#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true )
|
||||
@@ -132,15 +91,7 @@
|
||||
#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true )
|
||||
|
||||
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size );
|
||||
#define ZoneTextV( varname, txt, size ) varname.Text( txt, size );
|
||||
#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size );
|
||||
#define ZoneNameV( varname, txt, size ) varname.Name( txt, size );
|
||||
#define ZoneColor( color ) ___tracy_scoped_zone.Color( color );
|
||||
#define ZoneColorV( varname, color ) varname.Color( color );
|
||||
#define ZoneValue( value ) ___tracy_scoped_zone.Value( value );
|
||||
#define ZoneValueV( varname, value ) varname.Value( value );
|
||||
#define ZoneIsActive ___tracy_scoped_zone.IsActive()
|
||||
#define ZoneIsActiveV( varname ) varname.IsActive()
|
||||
|
||||
#define FrameMark tracy::Profiler::SendFrameMark( nullptr );
|
||||
#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name );
|
||||
@@ -149,14 +100,13 @@
|
||||
|
||||
#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip );
|
||||
|
||||
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define LockableBase( type ) tracy::Lockable<type>
|
||||
#define SharedLockableBase( type ) tracy::SharedLockable<type>
|
||||
#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
|
||||
#define LockableName( varname, txt, size ) varname.CustomName( txt, size );
|
||||
#define LockMark( varname ) static const tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
|
||||
|
||||
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val );
|
||||
#define TracyPlotConfig( name, type ) tracy::Profiler::ConfigurePlot( name, type );
|
||||
@@ -169,55 +119,31 @@
|
||||
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK );
|
||||
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK );
|
||||
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false );
|
||||
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true );
|
||||
# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true );
|
||||
|
||||
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name );
|
||||
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name );
|
||||
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name );
|
||||
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name );
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK );
|
||||
#else
|
||||
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 );
|
||||
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 );
|
||||
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 );
|
||||
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 );
|
||||
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false );
|
||||
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true );
|
||||
# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true );
|
||||
|
||||
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name );
|
||||
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name );
|
||||
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name );
|
||||
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name );
|
||||
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size );
|
||||
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr );
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
|
||||
# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, depth, active );
|
||||
# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
|
||||
# define ZoneNamedS( varname, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedCS( varname, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
|
||||
# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
|
||||
# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
|
||||
# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
|
||||
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
|
||||
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color depth, true )
|
||||
|
||||
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false );
|
||||
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false );
|
||||
# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true );
|
||||
# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true );
|
||||
|
||||
# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name );
|
||||
# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name );
|
||||
# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name );
|
||||
# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name );
|
||||
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth );
|
||||
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth );
|
||||
|
||||
# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth );
|
||||
# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth );
|
||||
@@ -229,9 +155,6 @@
|
||||
# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active )
|
||||
# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active )
|
||||
|
||||
# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active )
|
||||
# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active )
|
||||
|
||||
# define ZoneScopedS( depth ) ZoneScoped
|
||||
# define ZoneScopedNS( name, depth ) ZoneScopedN( name )
|
||||
# define ZoneScopedCS( color, depth ) ZoneScopedC( color )
|
||||
@@ -239,13 +162,6 @@
|
||||
|
||||
# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size )
|
||||
# define TracyFreeS( ptr, depth ) TracyFree( ptr )
|
||||
# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size )
|
||||
# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr )
|
||||
|
||||
# define TracyAllocNS( ptr, size, depth, name ) TracyAlloc( ptr, size, name )
|
||||
# define TracyFreeNS( ptr, depth, name ) TracyFree( ptr, name )
|
||||
# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAlloc( ptr, size, name )
|
||||
# define TracySecureFreeNS( ptr, depth, name ) TracySecureFree( ptr, name )
|
||||
|
||||
# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size )
|
||||
# define TracyMessageLS( txt, depth ) TracyMessageL( txt )
|
||||
@@ -255,12 +171,6 @@
|
||||
|
||||
#define TracyParameterRegister( cb ) tracy::Profiler::ParameterRegister( cb );
|
||||
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val );
|
||||
#define TracyIsConnected tracy::GetProfiler().IsConnected()
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber );
|
||||
# define TracyFiberLeave tracy::Profiler::LeaveFiber();
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
177
TracyC.h
177
TracyC.h
@@ -5,17 +5,11 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include "client/TracyCallstack.h"
|
||||
#include "common/TracyApi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
TRACY_API void ___tracy_set_thread_name( const char* name );
|
||||
|
||||
#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
|
||||
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
typedef const void* TracyCZoneCtx;
|
||||
@@ -27,18 +21,9 @@ typedef const void* TracyCZoneCtx;
|
||||
#define TracyCZoneEnd(c)
|
||||
#define TracyCZoneText(c,x,y)
|
||||
#define TracyCZoneName(c,x,y)
|
||||
#define TracyCZoneColor(c,x)
|
||||
#define TracyCZoneValue(c,x)
|
||||
|
||||
#define TracyCAlloc(x,y)
|
||||
#define TracyCFree(x)
|
||||
#define TracyCSecureAlloc(x,y)
|
||||
#define TracyCSecureFree(x)
|
||||
|
||||
#define TracyCAllocN(x,y,z)
|
||||
#define TracyCFreeN(x,y)
|
||||
#define TracyCSecureAllocN(x,y,z)
|
||||
#define TracyCSecureFreeN(x,y)
|
||||
|
||||
#define TracyCFrameMark
|
||||
#define TracyCFrameMarkNamed(x)
|
||||
@@ -60,26 +45,12 @@ typedef const void* TracyCZoneCtx;
|
||||
|
||||
#define TracyCAllocS(x,y,z)
|
||||
#define TracyCFreeS(x,y)
|
||||
#define TracyCSecureAllocS(x,y,z)
|
||||
#define TracyCSecureFreeS(x,y)
|
||||
|
||||
#define TracyCAllocNS(x,y,z,w)
|
||||
#define TracyCFreeNS(x,y,z)
|
||||
#define TracyCSecureAllocNS(x,y,z,w)
|
||||
#define TracyCSecureFreeNS(x,y,z)
|
||||
|
||||
#define TracyCMessageS(x,y,z)
|
||||
#define TracyCMessageLS(x,y)
|
||||
#define TracyCMessageCS(x,y,z,w)
|
||||
#define TracyCMessageLCS(x,y,z)
|
||||
|
||||
#define TracyCIsConnected 0
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyCFiberEnter(fiber)
|
||||
# define TracyCFiberLeave
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifndef TracyConcat
|
||||
@@ -104,50 +75,12 @@ struct ___tracy_c_zone_context
|
||||
int active;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_time_data
|
||||
{
|
||||
int64_t gpuTime;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_zone_begin_data {
|
||||
uint64_t srcloc;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_zone_end_data {
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_new_context_data {
|
||||
int64_t gpuTime;
|
||||
float period;
|
||||
uint8_t context;
|
||||
uint8_t flags;
|
||||
uint8_t type;
|
||||
};
|
||||
|
||||
struct ___tracy_gpu_context_name_data {
|
||||
uint8_t context;
|
||||
const char* name;
|
||||
uint16_t len;
|
||||
};
|
||||
|
||||
// Some containers don't support storing const types.
|
||||
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
|
||||
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
|
||||
|
||||
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
TRACY_API void ___tracy_startup_profiler(void);
|
||||
TRACY_API void ___tracy_shutdown_profiler(void);
|
||||
#endif
|
||||
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz );
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz );
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, const char* function );
|
||||
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz );
|
||||
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active );
|
||||
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active );
|
||||
@@ -156,51 +89,29 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srclo
|
||||
TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx );
|
||||
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size );
|
||||
TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color );
|
||||
TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value );
|
||||
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data );
|
||||
TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data );
|
||||
TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data );
|
||||
TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data );
|
||||
|
||||
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data );
|
||||
TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data );
|
||||
TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data );
|
||||
TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data );
|
||||
TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data );
|
||||
|
||||
TRACY_API int ___tracy_connected(void);
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
|
||||
#endif
|
||||
|
||||
#define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx );
|
||||
|
||||
#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size );
|
||||
#define TracyCZoneName( ctx, txt, size ) ___tracy_emit_zone_name( ctx, txt, size );
|
||||
#define TracyCZoneColor( ctx, color ) ___tracy_emit_zone_color( ctx, color );
|
||||
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
|
||||
|
||||
|
||||
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name );
|
||||
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size );
|
||||
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth );
|
||||
TRACY_API void ___tracy_emit_memory_free( const void* ptr );
|
||||
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth );
|
||||
|
||||
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack );
|
||||
TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack );
|
||||
@@ -208,30 +119,16 @@ TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t co
|
||||
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack );
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
|
||||
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
|
||||
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
|
||||
|
||||
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name )
|
||||
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name )
|
||||
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name )
|
||||
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name )
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK )
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_alloc_free_callstack( ptr, TRACY_CALLSTACK )
|
||||
|
||||
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
|
||||
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
|
||||
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
|
||||
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
|
||||
#else
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 );
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 );
|
||||
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 );
|
||||
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 );
|
||||
|
||||
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 0, name );
|
||||
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 0, name );
|
||||
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 1, name );
|
||||
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 1, name );
|
||||
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size );
|
||||
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr );
|
||||
|
||||
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 );
|
||||
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 );
|
||||
@@ -256,24 +153,17 @@ TRACY_API void ___tracy_emit_plot( const char* name, double val );
|
||||
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
|
||||
#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
|
||||
#define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size );
|
||||
#define TracyCAppInfo( txt, color ) ___tracy_emit_message_appinfo( txt, color );
|
||||
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
|
||||
|
||||
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
|
||||
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
|
||||
# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
|
||||
# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
|
||||
|
||||
# define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name )
|
||||
# define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name )
|
||||
# define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name )
|
||||
# define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name )
|
||||
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth )
|
||||
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_alloc_free_callstack( ptr, depth )
|
||||
|
||||
# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
|
||||
# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
|
||||
@@ -287,13 +177,6 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
|
||||
# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size )
|
||||
# define TracyCFreeS( ptr, depth ) TracyCFree( ptr )
|
||||
# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size )
|
||||
# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr )
|
||||
|
||||
# define TracyCAllocNS( ptr, size, depth, name ) TracyCAllocN( ptr, size, name )
|
||||
# define TracyCFreeNS( ptr, depth, name ) TracyCFreeN( ptr, name )
|
||||
# define TracyCSecureAllocNS( ptr, size, depth, name ) TracyCSecureAllocN( ptr, size, name )
|
||||
# define TracyCSecureFreeNS( ptr, depth, name ) TracyCSecureFreeN( ptr, name )
|
||||
|
||||
# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size )
|
||||
# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt )
|
||||
@@ -301,16 +184,6 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
|
||||
# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color )
|
||||
#endif
|
||||
|
||||
#define TracyCIsConnected ___tracy_connected()
|
||||
|
||||
TRACY_API void ___tracy_fiber_enter( const char* fiber );
|
||||
TRACY_API void ___tracy_fiber_leave( void );
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyCFiberEnter( fiber ) ___tracy_fiber_enter( fiber );
|
||||
# define TracyCFiberLeave ___tracy_fiber_leave();
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -15,10 +15,6 @@
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(push, 0)
|
||||
#endif
|
||||
|
||||
#include "common/tracy_lz4.cpp"
|
||||
#include "client/TracyProfiler.cpp"
|
||||
#include "client/TracyCallstack.cpp"
|
||||
@@ -27,7 +23,7 @@
|
||||
#include "common/TracySocket.cpp"
|
||||
#include "client/tracy_rpmalloc.cpp"
|
||||
#include "client/TracyDxt1.cpp"
|
||||
#include "client/TracyAlloc.cpp"
|
||||
#include "client/TracyLfq.cpp"
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
|
||||
# include "libbacktrace/alloc.cpp"
|
||||
@@ -42,15 +38,11 @@
|
||||
# else
|
||||
# include "libbacktrace/elf.cpp"
|
||||
# endif
|
||||
# include "common/TracyStackFrames.cpp"
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma comment(lib, "ws2_32.lib")
|
||||
# pragma comment(lib, "dbghelp.lib")
|
||||
# pragma comment(lib, "advapi32.lib")
|
||||
# pragma comment(lib, "user32.lib")
|
||||
# pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
19
TracyClientDLL.cpp
Normal file
19
TracyClientDLL.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Tracy profiler
|
||||
// ----------------
|
||||
//
|
||||
// On multi-DLL projects compile and
|
||||
// link with this source file (and none
|
||||
// other) in the executable and in
|
||||
// DLLs / shared objects that link to
|
||||
// the main DLL.
|
||||
//
|
||||
|
||||
// Define TRACY_ENABLE to enable profiler.
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# ifndef TRACY_IMPORTS
|
||||
# define TRACY_IMPORTS 1
|
||||
# endif
|
||||
#endif
|
||||
#include "common/TracySystem.cpp"
|
||||
442
TracyD3D11.hpp
442
TracyD3D11.hpp
@@ -1,442 +0,0 @@
|
||||
#ifndef __TRACYD3D11_HPP__
|
||||
#define __TRACYD3D11_HPP__
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyD3D11Context(device,queue) nullptr
|
||||
#define TracyD3D11Destroy(ctx)
|
||||
#define TracyD3D11ContextName(ctx, name, size)
|
||||
|
||||
#define TracyD3D11NewFrame(ctx)
|
||||
|
||||
#define TracyD3D11Zone(ctx, name)
|
||||
#define TracyD3D11ZoneC(ctx, name, color)
|
||||
#define TracyD3D11NamedZone(ctx, varname, name, active)
|
||||
#define TracyD3D11NamedZoneC(ctx, varname, name, color, active)
|
||||
#define TracyD3D12ZoneTransient(ctx, varname, name, active)
|
||||
|
||||
#define TracyD3D11ZoneS(ctx, name, depth)
|
||||
#define TracyD3D11ZoneCS(ctx, name, color, depth)
|
||||
#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active)
|
||||
#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active)
|
||||
#define TracyD3D12ZoneTransientS(ctx, varname, name, depth, active)
|
||||
|
||||
#define TracyD3D11Collect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class D3D11ZoneScope {};
|
||||
}
|
||||
|
||||
using TracyD3D11Ctx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include <atomic>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
#include "common/TracyAlign.hpp"
|
||||
#include "common/TracyAlloc.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class D3D11Ctx
|
||||
{
|
||||
friend class D3D11ZoneScope;
|
||||
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx )
|
||||
: m_device( device )
|
||||
, m_devicectx( devicectx )
|
||||
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
|
||||
for (int i = 0; i < QueryCount; i++)
|
||||
{
|
||||
HRESULT hr = S_OK;
|
||||
D3D11_QUERY_DESC desc;
|
||||
desc.MiscFlags = 0;
|
||||
|
||||
desc.Query = D3D11_QUERY_TIMESTAMP;
|
||||
hr |= device->CreateQuery(&desc, &m_queries[i]);
|
||||
|
||||
desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
|
||||
hr |= device->CreateQuery(&desc, &m_disjoints[i]);
|
||||
|
||||
m_disjointMap[i] = nullptr;
|
||||
|
||||
assert(SUCCEEDED(hr));
|
||||
}
|
||||
|
||||
// Force query the initial GPU timestamp (pipeline stall)
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
|
||||
UINT64 timestamp;
|
||||
for (int attempts = 0; attempts < 50; attempts++)
|
||||
{
|
||||
devicectx->Begin(m_disjoints[0]);
|
||||
devicectx->End(m_queries[0]);
|
||||
devicectx->End(m_disjoints[0]);
|
||||
devicectx->Flush();
|
||||
|
||||
while (devicectx->GetData(m_disjoints[0], &disjoint, sizeof(disjoint), 0) == S_FALSE)
|
||||
/* Nothing */;
|
||||
|
||||
if (disjoint.Disjoint)
|
||||
continue;
|
||||
|
||||
while (devicectx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) == S_FALSE)
|
||||
/* Nothing */;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
int64_t tgpu = timestamp * (1000000000ull / disjoint.Frequency);
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
|
||||
uint8_t flags = 0;
|
||||
|
||||
const float period = 1.f;
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
||||
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
~D3D11Ctx()
|
||||
{
|
||||
for (int i = 0; i < QueryCount; i++)
|
||||
{
|
||||
m_queries[i]->Release();
|
||||
m_disjoints[i]->Release();
|
||||
m_disjointMap[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
if( m_tail == m_head ) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
m_head = m_tail = 0;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto start = m_tail;
|
||||
auto end = m_head + QueryCount;
|
||||
auto cnt = (end - start) % QueryCount;
|
||||
while (cnt > 1)
|
||||
{
|
||||
auto mid = start + cnt / 2;
|
||||
|
||||
bool available =
|
||||
m_devicectx->GetData(m_disjointMap[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK &&
|
||||
m_devicectx->GetData(m_queries[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
|
||||
|
||||
if (available)
|
||||
{
|
||||
start = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
end = mid;
|
||||
}
|
||||
cnt = (end - start) % QueryCount;
|
||||
}
|
||||
|
||||
start %= QueryCount;
|
||||
|
||||
while (m_tail != start)
|
||||
{
|
||||
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
|
||||
UINT64 time;
|
||||
|
||||
m_devicectx->GetData(m_disjointMap[m_tail], &disjoint, sizeof(disjoint), 0);
|
||||
m_devicectx->GetData(m_queries[m_tail], &time, sizeof(time), 0);
|
||||
|
||||
time *= (1000000000ull / disjoint.Frequency);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, (int64_t)time);
|
||||
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
|
||||
MemWrite(&item->gpuTime.context, m_context);
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_tail = (m_tail + 1) % QueryCount;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = ( m_head + 1 ) % QueryCount;
|
||||
assert( m_head != m_tail );
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline ID3D11Query* TranslateQueryId( unsigned int id )
|
||||
{
|
||||
return m_queries[id];
|
||||
}
|
||||
|
||||
tracy_force_inline ID3D11Query* MapDisjointQueryId( unsigned int id, unsigned int disjointId )
|
||||
{
|
||||
m_disjointMap[id] = m_disjoints[disjointId];
|
||||
return m_disjoints[disjointId];
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
|
||||
ID3D11Device* m_device;
|
||||
ID3D11DeviceContext* m_devicectx;
|
||||
|
||||
ID3D11Query* m_queries[QueryCount];
|
||||
ID3D11Query* m_disjoints[QueryCount];
|
||||
ID3D11Query* m_disjointMap[QueryCount]; // Multiple time queries can have one disjoint query
|
||||
uint8_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
};
|
||||
|
||||
class D3D11ZoneScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
GetProfiler().SendCallstack( depth );
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
|
||||
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
|
||||
|
||||
m_disjointId = queryId;
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D11ZoneScope()
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
m_ctx->m_devicectx->End(m_ctx->TranslateQueryId(queryId));
|
||||
m_ctx->m_devicectx->End(m_ctx->MapDisjointQueryId(queryId, m_disjointId));
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
D3D11Ctx* m_ctx;
|
||||
unsigned int m_disjointId;
|
||||
};
|
||||
|
||||
static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx )
|
||||
{
|
||||
auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) );
|
||||
new(ctx) D3D11Ctx( device, devicectx );
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyD3D11Context( D3D11Ctx* ctx )
|
||||
{
|
||||
ctx->~D3D11Ctx();
|
||||
tracy_free( ctx );
|
||||
}
|
||||
}
|
||||
|
||||
using TracyD3D11Ctx = tracy::D3D11Ctx*;
|
||||
|
||||
#define TracyD3D11Context( device, devicectx ) tracy::CreateD3D11Context( device, devicectx );
|
||||
#define TracyD3D11Destroy(ctx) tracy::DestroyD3D11Context(ctx);
|
||||
#define TracyD3D11ContextName(ctx, name, size) ctx->Name(name, size);
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyD3D11ZoneTransient(ctx, varname, name, active) TracyD3D11ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
|
||||
#else
|
||||
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, ___tracy_gpu_zone, name, true )
|
||||
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, ___tracy_gpu_zone, name, color, true )
|
||||
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyD3D11ZoneTransient(ctx, varname, name, active) tracy::D3D11ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), active };
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, depth, true )
|
||||
# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, depth, true )
|
||||
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) tracy::D3D11ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), depth, active };
|
||||
#else
|
||||
# define TracyD3D11ZoneS( ctx, name, depth, active ) TracyD3D11Zone( ctx, name )
|
||||
# define TracyD3D11ZoneCS( ctx, name, color, depth, active ) TracyD3D11ZoneC( name, color )
|
||||
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11NamedZone( ctx, varname, name, active )
|
||||
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11NamedZoneC( ctx, varname, name, color, active )
|
||||
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, name, active)
|
||||
#endif
|
||||
|
||||
#define TracyD3D11Collect( ctx ) ctx->Collect();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
506
TracyD3D12.hpp
506
TracyD3D12.hpp
@@ -1,506 +0,0 @@
|
||||
#ifndef __TRACYD3D12_HPP__
|
||||
#define __TRACYD3D12_HPP__
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define TracyD3D12Context(device, queue) nullptr
|
||||
#define TracyD3D12Destroy(ctx)
|
||||
#define TracyD3D12ContextName(ctx, name, size)
|
||||
|
||||
#define TracyD3D12NewFrame(ctx)
|
||||
|
||||
#define TracyD3D12Zone(ctx, cmdList, name)
|
||||
#define TracyD3D12ZoneC(ctx, cmdList, name, color)
|
||||
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
|
||||
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
|
||||
#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
|
||||
|
||||
#define TracyD3D12ZoneS(ctx, cmdList, name, depth)
|
||||
#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth)
|
||||
#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active)
|
||||
#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active)
|
||||
#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active)
|
||||
|
||||
#define TracyD3D12Collect(ctx)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class D3D12ZoneScope {};
|
||||
}
|
||||
|
||||
using TracyD3D12Ctx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cassert>
|
||||
#include <d3d12.h>
|
||||
#include <dxgi.h>
|
||||
#include <wrl/client.h>
|
||||
#include <queue>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct D3D12QueryPayload
|
||||
{
|
||||
uint32_t m_queryIdStart = 0;
|
||||
uint32_t m_queryCount = 0;
|
||||
};
|
||||
|
||||
// Command queue context.
|
||||
class D3D12QueueCtx
|
||||
{
|
||||
friend class D3D12ZoneScope;
|
||||
|
||||
static constexpr uint32_t MaxQueries = 64 * 1024; // Queries are begin and end markers, so we can store half as many total time durations. Must be even!
|
||||
|
||||
bool m_initialized = false;
|
||||
|
||||
ID3D12Device* m_device = nullptr;
|
||||
ID3D12CommandQueue* m_queue = nullptr;
|
||||
uint8_t m_context;
|
||||
Microsoft::WRL::ComPtr<ID3D12QueryHeap> m_queryHeap;
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> m_readbackBuffer;
|
||||
|
||||
// In-progress payload.
|
||||
uint32_t m_queryLimit = MaxQueries;
|
||||
uint32_t m_queryCounter = 0;
|
||||
uint32_t m_previousQueryCounter = 0;
|
||||
|
||||
uint32_t m_activePayload = 0;
|
||||
Microsoft::WRL::ComPtr<ID3D12Fence> m_payloadFence;
|
||||
std::queue<D3D12QueryPayload> m_payloadQueue;
|
||||
|
||||
int64_t m_prevCalibration = 0;
|
||||
int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() };
|
||||
|
||||
public:
|
||||
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
: m_device(device)
|
||||
, m_queue(queue)
|
||||
, m_context(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
|
||||
{
|
||||
// Verify we support timestamp queries on this queue.
|
||||
|
||||
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
|
||||
{
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
|
||||
|
||||
bool Success = SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData)));
|
||||
assert(Success && featureData.CopyQueueTimestampQueriesSupported && "Platform does not support profiling of copy queues.");
|
||||
}
|
||||
|
||||
uint64_t timestampFrequency;
|
||||
|
||||
if (FAILED(queue->GetTimestampFrequency(×tampFrequency)))
|
||||
{
|
||||
assert(false && "Failed to get timestamp frequency.");
|
||||
}
|
||||
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
|
||||
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
assert(false && "Failed to get queue clock calibration.");
|
||||
}
|
||||
|
||||
// Save the device cpu timestamp, not the profiler's timestamp.
|
||||
m_prevCalibration = cpuTimestamp * m_qpcToNs;
|
||||
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
D3D12_QUERY_HEAP_DESC heapDesc{};
|
||||
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
|
||||
{
|
||||
m_queryLimit /= 2;
|
||||
heapDesc.Count = m_queryLimit;
|
||||
}
|
||||
|
||||
// Create a readback buffer, which will be used as a destination for the query data.
|
||||
|
||||
D3D12_RESOURCE_DESC readbackBufferDesc{};
|
||||
readbackBufferDesc.Alignment = 0;
|
||||
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
|
||||
readbackBufferDesc.Height = 1;
|
||||
readbackBufferDesc.DepthOrArraySize = 1;
|
||||
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
|
||||
readbackBufferDesc.MipLevels = 1;
|
||||
readbackBufferDesc.SampleDesc.Count = 1;
|
||||
readbackBufferDesc.SampleDesc.Quality = 0;
|
||||
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||
|
||||
D3D12_HEAP_PROPERTIES readbackHeapProps{};
|
||||
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
|
||||
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
||||
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||
readbackHeapProps.CreationNodeMask = 0;
|
||||
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
|
||||
|
||||
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
|
||||
{
|
||||
assert(false && "Failed to create query readback buffer.");
|
||||
}
|
||||
|
||||
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
|
||||
{
|
||||
assert(false && "Failed to create payload fence.");
|
||||
}
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
|
||||
MemWrite(&item->gpuNewContext.context, m_context);
|
||||
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_initialized = true;
|
||||
}
|
||||
|
||||
void NewFrame()
|
||||
{
|
||||
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, m_queryCounter });
|
||||
m_previousQueryCounter += m_queryCounter;
|
||||
m_queryCounter = 0;
|
||||
|
||||
if (m_previousQueryCounter >= m_queryLimit)
|
||||
{
|
||||
m_previousQueryCounter -= m_queryLimit;
|
||||
}
|
||||
|
||||
m_queue->Signal(m_payloadFence.Get(), ++m_activePayload);
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_queryCounter = 0;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Find out what payloads are available.
|
||||
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
|
||||
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
|
||||
|
||||
if (!payloadCount)
|
||||
{
|
||||
return; // No payloads are available yet, exit out.
|
||||
}
|
||||
|
||||
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
|
||||
|
||||
// Map the readback buffer so we can fetch the query data from the GPU.
|
||||
void* readbackBufferMapping = nullptr;
|
||||
|
||||
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
|
||||
{
|
||||
assert(false && "Failed to map readback buffer.");
|
||||
}
|
||||
|
||||
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
|
||||
|
||||
for (uint32_t i = 0; i < payloadCount; ++i)
|
||||
{
|
||||
const auto& payload = m_payloadQueue.front();
|
||||
|
||||
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
|
||||
{
|
||||
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
|
||||
const auto timestamp = timestampData[counter];
|
||||
const auto queryId = counter;
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, timestamp);
|
||||
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuTime.context, m_context);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
m_payloadQueue.pop();
|
||||
}
|
||||
|
||||
m_readbackBuffer->Unmap(0, nullptr);
|
||||
|
||||
// Recalibrate to account for drift.
|
||||
|
||||
uint64_t cpuTimestamp;
|
||||
uint64_t gpuTimestamp;
|
||||
|
||||
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
|
||||
{
|
||||
assert(false && "Failed to get queue clock calibration.");
|
||||
}
|
||||
|
||||
cpuTimestamp *= m_qpcToNs;
|
||||
|
||||
const auto cpuDelta = cpuTimestamp - m_prevCalibration;
|
||||
if (cpuDelta > 0)
|
||||
{
|
||||
m_prevCalibration = cpuTimestamp;
|
||||
cpuTimestamp = Profiler::GetTime();
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
|
||||
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
|
||||
MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta);
|
||||
MemWrite(&item->gpuCalibration.context, m_context);
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline uint32_t NextQueryId()
|
||||
{
|
||||
assert(m_queryCounter < m_queryLimit && "Submitted too many GPU queries! Consider increasing MaxQueries.");
|
||||
|
||||
const uint32_t id = (m_previousQueryCounter + m_queryCounter) % m_queryLimit;
|
||||
m_queryCounter += 2; // Allocate space for a begin and end query.
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
};
|
||||
|
||||
class D3D12ZoneScope
|
||||
{
|
||||
const bool m_active;
|
||||
D3D12QueueCtx* m_ctx = nullptr;
|
||||
ID3D12GraphicsCommandList* m_cmdList = nullptr;
|
||||
uint32_t m_queryId = 0; // Used for tracking in nested zones.
|
||||
|
||||
public:
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(active)
|
||||
#endif
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_ctx = ctx;
|
||||
m_cmdList = cmdList;
|
||||
|
||||
m_queryId = ctx->NextQueryId();
|
||||
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
|
||||
|
||||
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
|
||||
|
||||
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline ~D3D12ZoneScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
|
||||
m_cmdList->EndQuery(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, queryId);
|
||||
|
||||
auto* item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer.Get(), m_queryId * sizeof(uint64_t));
|
||||
}
|
||||
};
|
||||
|
||||
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
|
||||
{
|
||||
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
|
||||
new (ctx) D3D12QueueCtx{ device, queue };
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyD3D12Context(D3D12QueueCtx* ctx)
|
||||
{
|
||||
ctx->~D3D12QueueCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
|
||||
|
||||
#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue);
|
||||
#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx);
|
||||
#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size);
|
||||
|
||||
#define TracyD3D12NewFrame(ctx) ctx->NewFrame();
|
||||
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, TRACY_CALLSTACK, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), TRACY_CALLSTACK, active };
|
||||
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
|
||||
#else
|
||||
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, ___tracy_gpu_zone, cmdList, name, true)
|
||||
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, ___tracy_gpu_zone, cmdList, name, color, true)
|
||||
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
|
||||
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
|
||||
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), cmdList, active };
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, depth, true)
|
||||
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, depth, true)
|
||||
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), depth, active };
|
||||
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), depth, active };
|
||||
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), cmdList, depth, active };
|
||||
#else
|
||||
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name)
|
||||
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color)
|
||||
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
|
||||
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
|
||||
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
|
||||
#endif
|
||||
|
||||
#define TracyD3D12Collect(ctx) ctx->Collect();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
119
TracyLua.hpp
119
TracyLua.hpp
@@ -125,7 +125,6 @@ static inline void LuaRemove( char* script )
|
||||
#else
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits>
|
||||
|
||||
#include "common/TracyColor.hpp"
|
||||
#include "common/TracyAlign.hpp"
|
||||
@@ -151,9 +150,9 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
|
||||
const char* func[64];
|
||||
uint32_t fsz[64];
|
||||
uint32_t ssz[64];
|
||||
uint32_t spaceNeeded = 4; // cnt
|
||||
|
||||
uint8_t cnt;
|
||||
uint16_t spaceNeeded = sizeof( cnt );
|
||||
uint32_t cnt;
|
||||
for( cnt=0; cnt<depth; cnt++ )
|
||||
{
|
||||
if( lua_getstack( L, cnt+1, dbg+cnt ) == 0 ) break;
|
||||
@@ -163,29 +162,27 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
|
||||
ssz[cnt] = uint32_t( strlen( dbg[cnt].source ) );
|
||||
spaceNeeded += fsz[cnt] + ssz[cnt];
|
||||
}
|
||||
spaceNeeded += cnt * ( 4 + 2 + 2 ); // source line, function string length, source string length
|
||||
spaceNeeded += cnt * ( 4 + 4 + 4 ); // source line, function string length, source string length
|
||||
|
||||
auto ptr = (char*)tracy_malloc( spaceNeeded + 2 );
|
||||
auto ptr = (char*)tracy_malloc( spaceNeeded + 4 );
|
||||
auto dst = ptr;
|
||||
memcpy( dst, &spaceNeeded, 2 ); dst += 2;
|
||||
memcpy( dst, &cnt, 1 ); dst++;
|
||||
for( uint8_t i=0; i<cnt; i++ )
|
||||
memcpy( dst, &spaceNeeded, 4 ); dst += 4;
|
||||
memcpy( dst, &cnt, 4 ); dst += 4;
|
||||
for( uint32_t i=0; i<cnt; i++ )
|
||||
{
|
||||
const uint32_t line = dbg[i].currentline;
|
||||
memcpy( dst, &line, 4 ); dst += 4;
|
||||
assert( fsz[i] <= std::numeric_limits<uint16_t>::max() );
|
||||
memcpy( dst, fsz+i, 2 ); dst += 2;
|
||||
memcpy( dst, fsz+i, 4 ); dst += 4;
|
||||
memcpy( dst, func[i], fsz[i] ); dst += fsz[i];
|
||||
assert( ssz[i] <= std::numeric_limits<uint16_t>::max() );
|
||||
memcpy( dst, ssz+i, 2 ); dst += 2;
|
||||
memcpy( dst, ssz+i, 4 ); dst += 4;
|
||||
memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i];
|
||||
}
|
||||
assert( dst - ptr == spaceNeeded + 2 );
|
||||
assert( dst - ptr == spaceNeeded + 4 );
|
||||
|
||||
TracyQueuePrepare( QueueType::CallstackAlloc );
|
||||
MemWrite( &item->callstackAllocFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->callstackAllocFat.nativePtr, (uint64_t)Callstack( depth ) );
|
||||
TracyQueueCommit( callstackAllocFatThread );
|
||||
TracyLfqPrepare( QueueType::CallstackAlloc );
|
||||
MemWrite( &item->callstackAlloc.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->callstackAlloc.nativePtr, (uint64_t)Callstack( depth ) );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginS( lua_State* L )
|
||||
@@ -197,6 +194,16 @@ static inline int LuaZoneBeginS( lua_State* L )
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
|
||||
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyLfqCommit;
|
||||
|
||||
#ifdef TRACY_CALLSTACK
|
||||
const uint32_t depth = TRACY_CALLSTACK;
|
||||
#else
|
||||
@@ -204,16 +211,6 @@ static inline int LuaZoneBeginS( lua_State* L )
|
||||
#endif
|
||||
SendLuaCallstack( L, depth );
|
||||
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -226,13 +223,6 @@ static inline int LuaZoneBeginNS( lua_State* L )
|
||||
if( !GetLuaZoneState().active ) return 0;
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_CALLSTACK
|
||||
const uint32_t depth = TRACY_CALLSTACK;
|
||||
#else
|
||||
const auto depth = uint32_t( lua_tointeger( L, 2 ) );
|
||||
#endif
|
||||
SendLuaCallstack( L, depth );
|
||||
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
@@ -240,10 +230,17 @@ static inline int LuaZoneBeginNS( lua_State* L )
|
||||
const auto name = lua_tolstring( L, 1, &nsz );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
TracyLfqCommit;
|
||||
|
||||
#ifdef TRACY_CALLSTACK
|
||||
const uint32_t depth = TRACY_CALLSTACK;
|
||||
#else
|
||||
const auto depth = uint32_t( lua_tointeger( L, 2 ) );
|
||||
#endif
|
||||
SendLuaCallstack( L, depth );
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -266,10 +263,10 @@ static inline int LuaZoneBegin( lua_State* L )
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
@@ -293,10 +290,10 @@ static inline int LuaZoneBeginN( lua_State* L )
|
||||
const auto name = lua_tolstring( L, 1, &nsz );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
@@ -314,9 +311,9 @@ static inline int LuaZoneEnd( lua_State* L )
|
||||
}
|
||||
#endif
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneEnd );
|
||||
TracyLfqPrepare( QueueType::ZoneEnd );
|
||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||
TracyQueueCommit( zoneEndThread );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -333,15 +330,13 @@ static inline int LuaZoneText( lua_State* L )
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneText );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
ptr[size] = '\0';
|
||||
TracyLfqPrepare( QueueType::ZoneText );
|
||||
MemWrite( &item->zoneText.text, (uint64_t)ptr );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -358,15 +353,13 @@ static inline int LuaZoneName( lua_State* L )
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneName );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
ptr[size] = '\0';
|
||||
TracyLfqPrepare( QueueType::ZoneName );
|
||||
MemWrite( &item->zoneText.text, (uint64_t)ptr );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -378,16 +371,14 @@ static inline int LuaMessage( lua_State* L )
|
||||
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
|
||||
TracyQueuePrepare( QueueType::Message );
|
||||
MemWrite( &item->messageFat.time, Profiler::GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( messageFatThread );
|
||||
ptr[size] = '\0';
|
||||
TracyLfqPrepare( QueueType::Message );
|
||||
MemWrite( &item->message.time, Profiler::GetTime() );
|
||||
MemWrite( &item->message.text, (uint64_t)ptr );
|
||||
TracyLfqCommit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
414
TracyOpenCL.hpp
414
TracyOpenCL.hpp
@@ -1,414 +0,0 @@
|
||||
#ifndef __TRACYOPENCL_HPP__
|
||||
#define __TRACYOPENCL_HPP__
|
||||
|
||||
#if !defined TRACY_ENABLE
|
||||
|
||||
#define TracyCLContext(c, x) nullptr
|
||||
#define TracyCLDestroy(c)
|
||||
#define TracyCLContextName(c, x, y)
|
||||
|
||||
#define TracyCLNamedZone(c, x, y, z)
|
||||
#define TracyCLNamedZoneC(c, x, y, z, w)
|
||||
#define TracyCLZone(c, x)
|
||||
#define TracyCLZoneC(c, x, y)
|
||||
#define TracyCLZoneTransient(c,x,y,z)
|
||||
|
||||
#define TracyCLNamedZoneS(c, x, y, z, w)
|
||||
#define TracyCLNamedZoneCS(c, x, y, z, w, v)
|
||||
#define TracyCLZoneS(c, x, y)
|
||||
#define TracyCLZoneCS(c, x, y, z)
|
||||
#define TracyCLZoneTransientS(c,x,y,z,w)
|
||||
|
||||
#define TracyCLNamedZoneSetEvent(x, e)
|
||||
#define TracyCLZoneSetEvent(e)
|
||||
|
||||
#define TracyCLCollect(c)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
class OpenCLCtxScope {};
|
||||
}
|
||||
|
||||
using TracyCLCtx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "common/TracyAlloc.hpp"
|
||||
|
||||
#define TRACY_CL_TO_STRING_INDIRECT(T) #T
|
||||
#define TRACY_CL_TO_STRING(T) TRACY_CL_TO_STRING_INDIRECT(T)
|
||||
#define TRACY_CL_ASSERT(p) if(!(p)) { \
|
||||
TracyMessageL( "TRACY_CL_ASSERT failed on " __FILE__ ":" TRACY_CL_TO_STRING(__LINE__) ); \
|
||||
assert(false && "TRACY_CL_ASSERT failed"); \
|
||||
}
|
||||
#define TRACY_CL_CHECK_ERROR(err) if(err != CL_SUCCESS) { \
|
||||
std::ostringstream oss; \
|
||||
oss << "TRACY_CL_CHECK_ERROR failed on " << __FILE__ << ":" << __LINE__ \
|
||||
<< ": error code " << err; \
|
||||
auto msg = oss.str(); \
|
||||
TracyMessage(msg.data(), msg.size()); \
|
||||
assert(false && "TRACY_CL_CHECK_ERROR failed"); \
|
||||
}
|
||||
|
||||
namespace tracy {
|
||||
|
||||
enum class EventPhase : uint8_t
|
||||
{
|
||||
Begin,
|
||||
End
|
||||
};
|
||||
|
||||
struct EventInfo
|
||||
{
|
||||
cl_event event;
|
||||
EventPhase phase;
|
||||
};
|
||||
|
||||
class OpenCLCtx
|
||||
{
|
||||
public:
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
OpenCLCtx(cl_context context, cl_device_id device)
|
||||
: m_contextId(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
|
||||
, m_head(0)
|
||||
, m_tail(0)
|
||||
{
|
||||
int64_t tcpu, tgpu;
|
||||
TRACY_CL_ASSERT(m_contextId != 255);
|
||||
|
||||
cl_int err = CL_SUCCESS;
|
||||
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
TRACY_CL_CHECK_ERROR(err)
|
||||
uint32_t dummyValue = 42;
|
||||
cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err);
|
||||
TRACY_CL_CHECK_ERROR(err)
|
||||
cl_event writeBufferEvent;
|
||||
TRACY_CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent));
|
||||
TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &writeBufferEvent));
|
||||
|
||||
tcpu = Profiler::GetTime();
|
||||
|
||||
cl_int eventStatus;
|
||||
TRACY_CL_CHECK_ERROR(clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr));
|
||||
TRACY_CL_ASSERT(eventStatus == CL_COMPLETE);
|
||||
TRACY_CL_CHECK_ERROR(clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr));
|
||||
TRACY_CL_CHECK_ERROR(clReleaseEvent(writeBufferEvent));
|
||||
TRACY_CL_CHECK_ERROR(clReleaseMemObject(dummyBuffer));
|
||||
TRACY_CL_CHECK_ERROR(clReleaseCommandQueue(queue));
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
|
||||
MemWrite(&item->gpuNewContext.cpuTime, tcpu);
|
||||
MemWrite(&item->gpuNewContext.gpuTime, tgpu);
|
||||
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
|
||||
MemWrite(&item->gpuNewContext.period, 1.0f);
|
||||
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
|
||||
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
|
||||
MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem(*item);
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, (uint8_t)m_contextId );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC(Color::Red4);
|
||||
|
||||
if (m_tail == m_head) return;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if (!GetProfiler().IsConnected())
|
||||
{
|
||||
m_head = m_tail = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; m_tail != m_head; m_tail = (m_tail + 1) % QueryCount)
|
||||
{
|
||||
EventInfo eventInfo = GetQuery(m_tail);
|
||||
cl_int eventStatus;
|
||||
cl_int err = clGetEventInfo(eventInfo.event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "clGetEventInfo falied with error code " << err << ", on event " << eventInfo.event << ", skipping...";
|
||||
auto msg = oss.str();
|
||||
TracyMessage(msg.data(), msg.size());
|
||||
if (eventInfo.event == nullptr) {
|
||||
TracyMessageL("A TracyCLZone must be paird with a TracyCLZoneSetEvent, check your code!");
|
||||
}
|
||||
assert(false && "clGetEventInfo failed, maybe a TracyCLZone is not paired with TracyCLZoneSetEvent");
|
||||
continue;
|
||||
}
|
||||
if (eventStatus != CL_COMPLETE) return;
|
||||
|
||||
cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin)
|
||||
? CL_PROFILING_COMMAND_START
|
||||
: CL_PROFILING_COMMAND_END;
|
||||
|
||||
cl_ulong eventTimeStamp = 0;
|
||||
err = clGetEventProfilingInfo(eventInfo.event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr);
|
||||
if (err == CL_PROFILING_INFO_NOT_AVAILABLE)
|
||||
{
|
||||
TracyMessageL("command queue is not created with CL_QUEUE_PROFILING_ENABLE flag, check your code!");
|
||||
assert(false && "command queue is not created with CL_QUEUE_PROFILING_ENABLE flag");
|
||||
}
|
||||
else
|
||||
TRACY_CL_CHECK_ERROR(err);
|
||||
|
||||
TRACY_CL_ASSERT(eventTimeStamp != 0);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuTime);
|
||||
MemWrite(&item->gpuTime.gpuTime, (int64_t)eventTimeStamp);
|
||||
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
|
||||
MemWrite(&item->gpuTime.context, m_contextId);
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
if (eventInfo.phase == EventPhase::End)
|
||||
{
|
||||
// Done with the event, so release it
|
||||
TRACY_CL_CHECK_ERROR(clReleaseEvent(eventInfo.event));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline uint8_t GetId() const
|
||||
{
|
||||
return m_contextId;
|
||||
}
|
||||
|
||||
tracy_force_inline unsigned int NextQueryId(EventInfo eventInfo)
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = (m_head + 1) % QueryCount;
|
||||
TRACY_CL_ASSERT(m_head != m_tail);
|
||||
m_query[id] = eventInfo;
|
||||
return id;
|
||||
}
|
||||
|
||||
tracy_force_inline EventInfo& GetQuery(unsigned int id)
|
||||
{
|
||||
TRACY_CL_ASSERT(id < QueryCount);
|
||||
return m_query[id];
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
unsigned int m_contextId;
|
||||
|
||||
EventInfo m_query[QueryCount];
|
||||
unsigned int m_head; // index at which a new event should be inserted
|
||||
unsigned int m_tail; // oldest event
|
||||
|
||||
};
|
||||
|
||||
class OpenCLCtxScope {
|
||||
public:
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active&& GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
GetProfiler().SendCallstack(depth);
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, srcloc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active)
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active(is_active && GetProfiler().IsConnected())
|
||||
#else
|
||||
: m_active(is_active)
|
||||
#endif
|
||||
, m_ctx(ctx)
|
||||
, m_event(nullptr)
|
||||
{
|
||||
if (!m_active) return;
|
||||
|
||||
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
|
||||
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneBegin.srcloc, srcloc);
|
||||
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
|
||||
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void SetEvent(cl_event event)
|
||||
{
|
||||
if (!m_active) return;
|
||||
m_event = event;
|
||||
TRACY_CL_CHECK_ERROR(clRetainEvent(m_event));
|
||||
m_ctx->GetQuery(m_beginQueryId).event = m_event;
|
||||
}
|
||||
|
||||
tracy_force_inline ~OpenCLCtxScope()
|
||||
{
|
||||
if (!m_active) return;
|
||||
const auto queryId = m_ctx->NextQueryId(EventInfo{ m_event, EventPhase::End });
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
|
||||
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
|
||||
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
|
||||
MemWrite(&item->gpuZoneEnd.queryId, (uint16_t)queryId);
|
||||
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
const bool m_active;
|
||||
OpenCLCtx* m_ctx;
|
||||
cl_event m_event;
|
||||
unsigned int m_beginQueryId;
|
||||
};
|
||||
|
||||
static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device)
|
||||
{
|
||||
auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx));
|
||||
new (ctx) OpenCLCtx(context, device);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static inline void DestroyCLContext(OpenCLCtx* ctx)
|
||||
{
|
||||
ctx->~OpenCLCtx();
|
||||
tracy_free(ctx);
|
||||
}
|
||||
|
||||
} // namespace tracy
|
||||
|
||||
using TracyCLCtx = tracy::OpenCLCtx*;
|
||||
|
||||
#define TracyCLContext(context, device) tracy::CreateCLContext(context, device);
|
||||
#define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx);
|
||||
#define TracyCLContextName(context, name, size) ctx->Name(name, size);
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyCLZone(ctx, name) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, TRACY_CALLSTACK, true)
|
||||
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, TRACY_CALLSTACK, true)
|
||||
# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
|
||||
#else
|
||||
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active);
|
||||
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active);
|
||||
# define TracyCLZone(ctx, name) TracyCLNamedZone(ctx, __tracy_gpu_zone, name, true)
|
||||
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneC(ctx, __tracy_gpu_zone, name, color, true )
|
||||
# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active);
|
||||
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active);
|
||||
# define TracyCLZoneS(ctx, name, depth) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, depth, true)
|
||||
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, depth, true)
|
||||
# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) tracy::OpenCLCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
|
||||
#else
|
||||
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) TracyCLNamedZone(ctx, varname, name, active)
|
||||
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) TracyCLNamedZoneC(ctx, varname, name, color, active)
|
||||
# define TracyCLZoneS(ctx, name, depth) TracyCLZone(ctx, name)
|
||||
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLZoneC(ctx, name, color)
|
||||
# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) TracyCLZoneTransient( ctx, varname, name, active )
|
||||
#endif
|
||||
|
||||
#define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event)
|
||||
#define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event)
|
||||
|
||||
#define TracyCLCollect(ctx) ctx->Collect()
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
152
TracyOpenGL.hpp
152
TracyOpenGL.hpp
@@ -1,22 +1,21 @@
|
||||
#ifndef __TRACYOPENGL_HPP__
|
||||
#define __TRACYOPENGL_HPP__
|
||||
|
||||
// Include this file after you include OpenGL 3.2 headers.
|
||||
|
||||
#if !defined TRACY_ENABLE || defined __APPLE__
|
||||
|
||||
#define TracyGpuContext
|
||||
#define TracyGpuContextName(x,y)
|
||||
#define TracyGpuNamedZone(x,y,z)
|
||||
#define TracyGpuNamedZoneC(x,y,z,w)
|
||||
#define TracyGpuNamedZone(x,y)
|
||||
#define TracyGpuNamedZoneC(x,y,z)
|
||||
#define TracyGpuZone(x)
|
||||
#define TracyGpuZoneC(x,y)
|
||||
#define TracyGpuZoneTransient(x,y,z)
|
||||
#define TracyGpuCollect
|
||||
|
||||
#define TracyGpuNamedZoneS(x,y,z,w)
|
||||
#define TracyGpuNamedZoneCS(x,y,z,w,a)
|
||||
#define TracyGpuNamedZoneS(x,y,z)
|
||||
#define TracyGpuNamedZoneCS(x,y,z,w)
|
||||
#define TracyGpuZoneS(x,y)
|
||||
#define TracyGpuZoneCS(x,y,z)
|
||||
#define TracyGpuZoneTransientS(x,y,z,w)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
@@ -24,8 +23,8 @@ struct SourceLocationData;
|
||||
class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
GpuCtxScope( const SourceLocationData*, bool ) {}
|
||||
GpuCtxScope( const SourceLocationData*, int, bool ) {}
|
||||
GpuCtxScope( const SourceLocationData* ) {}
|
||||
GpuCtxScope( const SourceLocationData*, int ) {}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -50,34 +49,29 @@ public:
|
||||
#endif
|
||||
|
||||
#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
|
||||
#define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size );
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
|
||||
# define TracyGpuNamedZone( varname, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK );
|
||||
# define TracyGpuNamedZoneC( varname, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK )
|
||||
#else
|
||||
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
|
||||
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
|
||||
# define TracyGpuNamedZone( varname, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) );
|
||||
# define TracyGpuNamedZoneC( varname, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) );
|
||||
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name )
|
||||
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color )
|
||||
#endif
|
||||
#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
|
||||
# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
|
||||
# define TracyGpuNamedZoneS( varname, name, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth );
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth );
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth )
|
||||
#else
|
||||
# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active )
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active )
|
||||
# define TracyGpuNamedZoneS( varname, name, depth ) TracyGpuNamedZone( varname, name )
|
||||
# define TracyGpuNamedZoneCS( varname, name, color, depth ) TracyGpuNamedZoneC( varname, name, color )
|
||||
# define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
|
||||
# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
|
||||
# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active )
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
@@ -114,8 +108,7 @@ public:
|
||||
MemWrite( &item->gpuNewContext.thread, thread );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
||||
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
@@ -124,21 +117,6 @@ public:
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
@@ -201,108 +179,54 @@ private:
|
||||
class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, bool is_active )
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
: m_active( GetProfiler().IsConnected() )
|
||||
#endif
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
|
||||
#endif
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuZoneBegin );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active )
|
||||
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
: m_active( GetProfiler().IsConnected() )
|
||||
#endif
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
|
||||
#endif
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
TracyLfqPrepare( QueueType::GpuZoneBegin );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
#else
|
||||
GetProfiler().SendCallstack( depth );
|
||||
const auto thread = GetThreadHandle();
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
#endif
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, thread );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
|
||||
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
||||
#else
|
||||
GetProfiler().SendCallstack( depth );
|
||||
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
#endif
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ~GpuCtxScope()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
|
||||
#endif
|
||||
const auto queryId = GetGpuCtx().ptr->NextQueryId();
|
||||
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
||||
|
||||
@@ -315,7 +239,9 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const bool m_active;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
314
TracyVulkan.hpp
314
TracyVulkan.hpp
@@ -4,21 +4,17 @@
|
||||
#if !defined TRACY_ENABLE
|
||||
|
||||
#define TracyVkContext(x,y,z,w) nullptr
|
||||
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
|
||||
#define TracyVkDestroy(x)
|
||||
#define TracyVkContextName(c,x,y)
|
||||
#define TracyVkNamedZone(c,x,y,z,w)
|
||||
#define TracyVkNamedZoneC(c,x,y,z,w,a)
|
||||
#define TracyVkNamedZone(c,x,y,z)
|
||||
#define TracyVkNamedZoneC(c,x,y,z,w)
|
||||
#define TracyVkZone(c,x,y)
|
||||
#define TracyVkZoneC(c,x,y,z)
|
||||
#define TracyVkZoneTransient(c,x,y,z,w)
|
||||
#define TracyVkCollect(c,x)
|
||||
|
||||
#define TracyVkNamedZoneS(c,x,y,z,w,a)
|
||||
#define TracyVkNamedZoneCS(c,x,y,z,w,v,a)
|
||||
#define TracyVkNamedZoneS(c,x,y,z,w)
|
||||
#define TracyVkNamedZoneCS(c,x,y,z,w,v)
|
||||
#define TracyVkZoneS(c,x,y,z)
|
||||
#define TracyVkZoneCS(c,x,y,z,w)
|
||||
#define TracyVkZoneTransientS(c,x,y,z,w,a)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
@@ -29,12 +25,9 @@ using TracyVkCtx = void*;
|
||||
|
||||
#else
|
||||
|
||||
#if !defined VK_NULL_HANDLE
|
||||
# error "You must include Vulkan headers before including TracyVulkan.hpp"
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyCallstack.hpp"
|
||||
@@ -49,41 +42,16 @@ class VkCtx
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT )
|
||||
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
|
||||
: m_device( device )
|
||||
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
, m_oldCnt( 0 )
|
||||
, m_queryCount( QueryCount )
|
||||
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT )
|
||||
{
|
||||
assert( m_context != 255 );
|
||||
|
||||
if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT )
|
||||
{
|
||||
uint32_t num;
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr );
|
||||
if( num > 4 ) num = 4;
|
||||
VkTimeDomainEXT data[4];
|
||||
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data );
|
||||
VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1;
|
||||
#if defined _WIN32
|
||||
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
|
||||
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
|
||||
#endif
|
||||
for( uint32_t i=0; i<num; i++ )
|
||||
{
|
||||
if( data[i] == supportedDomain )
|
||||
{
|
||||
m_timeDomain = data[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkPhysicalDeviceProperties prop;
|
||||
vkGetPhysicalDeviceProperties( physdev, &prop );
|
||||
const float period = prop.limits.timestampPeriod;
|
||||
@@ -113,58 +81,21 @@ public:
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
|
||||
int64_t tcpu, tgpu;
|
||||
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
|
||||
tcpu = Profiler::GetTime();
|
||||
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
int64_t tgpu;
|
||||
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
|
||||
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
}
|
||||
else
|
||||
{
|
||||
enum { NumProbes = 32 };
|
||||
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation[NumProbes];
|
||||
for( int i=0; i<NumProbes; i++ )
|
||||
{
|
||||
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
|
||||
}
|
||||
uint64_t minDeviation = deviation[0];
|
||||
for( int i=1; i<NumProbes; i++ )
|
||||
{
|
||||
if( minDeviation > deviation[i] )
|
||||
{
|
||||
minDeviation = deviation[i];
|
||||
}
|
||||
}
|
||||
m_deviation = minDeviation * 3 / 2;
|
||||
|
||||
#if defined _WIN32
|
||||
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
|
||||
#endif
|
||||
|
||||
Calibrate( device, m_prevCalibration, tgpu );
|
||||
tcpu = Profiler::GetTime();
|
||||
}
|
||||
|
||||
uint8_t flags = 0;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
|
||||
vkBeginCommandBuffer( cmdbuf, &beginInfo );
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
|
||||
vkEndCommandBuffer( cmdbuf );
|
||||
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
|
||||
vkQueueWaitIdle( queue );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
|
||||
@@ -173,9 +104,7 @@ public:
|
||||
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
|
||||
MemWrite( &item->gpuNewContext.period, period );
|
||||
MemWrite( &item->gpuNewContext.context, m_context );
|
||||
MemWrite( &item->gpuNewContext.flags, flags );
|
||||
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
|
||||
|
||||
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
@@ -190,22 +119,6 @@ public:
|
||||
vkDestroyQueryPool( m_device, m_query, nullptr );
|
||||
}
|
||||
|
||||
void Name( const char* name, uint16_t len )
|
||||
{
|
||||
auto ptr = (char*)tracy_malloc( len );
|
||||
memcpy( ptr, name, len );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuContextName );
|
||||
MemWrite( &item->gpuContextNameFat.context, m_context );
|
||||
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->gpuContextNameFat.size, len );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
void Collect( VkCommandBuffer cmdbuf )
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
@@ -216,9 +129,7 @@ public:
|
||||
if( !GetProfiler().IsConnected() )
|
||||
{
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
|
||||
m_head = m_tail = m_oldCnt = 0;
|
||||
int64_t tgpu;
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
|
||||
m_head = m_tail = 0;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@@ -250,25 +161,6 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
|
||||
{
|
||||
int64_t tgpu, tcpu;
|
||||
Calibrate( m_device, tcpu, tgpu );
|
||||
const auto refCpu = Profiler::GetTime();
|
||||
const auto delta = tcpu - m_prevCalibration;
|
||||
if( delta > 0 )
|
||||
{
|
||||
m_prevCalibration = tcpu;
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
|
||||
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
|
||||
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
|
||||
MemWrite( &item->gpuCalibration.cpuDelta, delta );
|
||||
MemWrite( &item->gpuCalibration.context, m_context );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
}
|
||||
|
||||
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
|
||||
|
||||
m_tail += cnt;
|
||||
@@ -289,38 +181,8 @@ private:
|
||||
return m_context;
|
||||
}
|
||||
|
||||
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
|
||||
{
|
||||
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
|
||||
VkCalibratedTimestampInfoEXT spec[2] = {
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
|
||||
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
|
||||
};
|
||||
uint64_t ts[2];
|
||||
uint64_t deviation;
|
||||
do
|
||||
{
|
||||
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
|
||||
}
|
||||
while( deviation > m_deviation );
|
||||
|
||||
#if defined _WIN32
|
||||
tGpu = ts[0];
|
||||
tCpu = ts[1] * m_qpcToNs;
|
||||
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
tGpu = ts[0];
|
||||
tCpu = ts[1];
|
||||
#else
|
||||
assert( false );
|
||||
#endif
|
||||
}
|
||||
|
||||
VkDevice m_device;
|
||||
VkQueryPool m_query;
|
||||
VkTimeDomainEXT m_timeDomain;
|
||||
uint64_t m_deviation;
|
||||
int64_t m_qpcToNs;
|
||||
int64_t m_prevCalibration;
|
||||
uint8_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
@@ -329,26 +191,23 @@ private:
|
||||
unsigned int m_queryCount;
|
||||
|
||||
int64_t* m_res;
|
||||
|
||||
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
|
||||
};
|
||||
|
||||
class VkCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active )
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf )
|
||||
: m_cmdbuf( cmdbuf )
|
||||
, m_ctx( ctx )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
, m_active( GetProfiler().IsConnected() )
|
||||
#endif
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
#endif
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
|
||||
@@ -360,21 +219,21 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active )
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth )
|
||||
: m_cmdbuf( cmdbuf )
|
||||
, m_ctx( ctx )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
, m_active( GetProfiler().IsConnected() )
|
||||
#endif
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
#endif
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
||||
@@ -382,64 +241,17 @@ public:
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int depth, bool is_active )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
m_cmdbuf = cmdbuf;
|
||||
m_ctx = ctx;
|
||||
|
||||
const auto queryId = ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
|
||||
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
|
||||
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
||||
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
|
||||
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
||||
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
||||
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
|
||||
Profiler::QueueSerialFinish();
|
||||
GetProfiler().SendCallstack( depth );
|
||||
}
|
||||
|
||||
tracy_force_inline ~VkCtxScope()
|
||||
{
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !m_active ) return;
|
||||
|
||||
#endif
|
||||
const auto queryId = m_ctx->NextQueryId();
|
||||
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId );
|
||||
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_ctx->m_query, queryId );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
|
||||
@@ -451,16 +263,18 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
VkCommandBuffer m_cmdbuf;
|
||||
VkCtx* m_ctx;
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
const bool m_active;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
|
||||
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
|
||||
{
|
||||
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
|
||||
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
new(ctx) VkCtx( physdev, device, queue, cmdbuf );
|
||||
return ctx;
|
||||
}
|
||||
|
||||
@@ -474,37 +288,31 @@ static inline void DestroyVkContext( VkCtx* ctx )
|
||||
|
||||
using TracyVkCtx = tracy::VkCtx*;
|
||||
|
||||
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
|
||||
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
|
||||
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf );
|
||||
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
|
||||
#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size );
|
||||
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true )
|
||||
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active )
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK )
|
||||
#else
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true )
|
||||
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), cmdbuf, active );
|
||||
# define TracyVkNamedZone( ctx, varname, cmdbuf, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf );
|
||||
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf );
|
||||
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name )
|
||||
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color )
|
||||
#endif
|
||||
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true )
|
||||
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), cmdbuf, depth, active );
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth );
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth );
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth )
|
||||
#else
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active )
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active )
|
||||
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth ) TracyVkNamedZone( ctx, varname, cmdbuf, name )
|
||||
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color )
|
||||
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name )
|
||||
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color )
|
||||
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active )
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
all: release
|
||||
all: debug
|
||||
|
||||
debug:
|
||||
@+make -f debug.mk all
|
||||
@@ -9,8 +9,4 @@ release:
|
||||
clean:
|
||||
@+make -f build.mk clean
|
||||
|
||||
db: clean
|
||||
@bear -- $(MAKE) -f debug.mk all
|
||||
@mv -f compile_commands.json ../../../
|
||||
|
||||
.PHONY: all clean debug release db
|
||||
.PHONY: all clean debug release
|
||||
|
||||
@@ -1,12 +1,60 @@
|
||||
CFLAGS +=
|
||||
CXXFLAGS := $(CFLAGS) -std=gnu++17
|
||||
DEFINES += -DTRACY_NO_STATISTICS
|
||||
INCLUDES := $(shell pkg-config --cflags capstone)
|
||||
LIBS += $(shell pkg-config --libs capstone) -lpthread
|
||||
INCLUDES :=
|
||||
LIBS := -lpthread
|
||||
PROJECT := capture
|
||||
IMAGE := $(PROJECT)-$(BUILD)
|
||||
|
||||
FILTER := ../../../getopt/getopt.c
|
||||
include ../../../common/src-from-vcxproj.mk
|
||||
FILTER :=
|
||||
|
||||
include ../../../common/unix.mk
|
||||
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
|
||||
SRC := $(filter-out $(FILTER),$(BASE))
|
||||
SRC2 := $(filter-out $(FILTER),$(BASE2))
|
||||
|
||||
TBB := $(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?)
|
||||
ifeq ($(TBB),0)
|
||||
LIBS += -ltbb
|
||||
endif
|
||||
|
||||
OBJDIRBASE := obj/$(BUILD)
|
||||
OBJDIR := $(OBJDIRBASE)/o/o/o
|
||||
|
||||
OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
|
||||
OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
|
||||
|
||||
all: $(IMAGE)
|
||||
|
||||
$(OBJDIR)/%.o: %.cpp
|
||||
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.cpp
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(OBJDIR)/%.o: %.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.c
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(IMAGE): $(OBJ) $(OBJ2)
|
||||
$(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -o $@
|
||||
|
||||
ifneq "$(MAKECMDGOALS)" "clean"
|
||||
-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) %(addprefix $(OBJDIR)/,$(SRC2:.c=.d))
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJDIRBASE) $(IMAGE)*
|
||||
|
||||
.PHONY: clean all
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
ARCH := $(shell uname -m)
|
||||
|
||||
CFLAGS := -g3 -Wall
|
||||
DEFINES := -DDEBUG
|
||||
BUILD := debug
|
||||
|
||||
include ../../../common/unix-debug.mk
|
||||
ifeq ($(ARCH),x86_64)
|
||||
CFLAGS += -msse4.1
|
||||
endif
|
||||
|
||||
include build.mk
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
CFLAGS := -O3
|
||||
ifndef TRACY_NO_LTO
|
||||
CFLAGS += -flto
|
||||
endif
|
||||
ARCH := $(shell uname -m)
|
||||
|
||||
CFLAGS := -O3 -s -fomit-frame-pointer
|
||||
DEFINES := -DNDEBUG
|
||||
BUILD := release
|
||||
|
||||
include ../../../common/unix-release.mk
|
||||
ifeq ($(ARCH),x86_64)
|
||||
CFLAGS += -msse4.1
|
||||
endif
|
||||
|
||||
include build.mk
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.30907.101
|
||||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.27428.2002
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "capture", "capture.vcxproj", "{447D58BF-94CD-4469-BB90-549C05D03E00}"
|
||||
EndProject
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
@@ -15,19 +23,31 @@
|
||||
<ProjectGuid>{447D58BF-94CD-4469-BB90-549C05D03E00}</ProjectGuid>
|
||||
<RootNamespace>capture</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
<VcpkgTriplet>x64-windows-static</VcpkgTriplet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
@@ -36,6 +56,12 @@
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
@@ -44,9 +70,14 @@
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<PropertyGroup Label="Vcpkg">
|
||||
<VcpkgEnableManifest>true</VcpkgEnableManifest>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
@@ -57,12 +88,24 @@
|
||||
<PreprocessorDefinitions>TRACY_NO_STATISTICS;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpplatest</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include;$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include\capstone;$(VcpkgManifestRoot)\vcpkg_installed\$(VcpkgTriplet)\$(VcpkgTriplet)\include\capstone;$(VcpkgRoot)\installed\$(VcpkgTriplet)\include\capstone</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\debug\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
@@ -77,60 +120,26 @@
|
||||
<PreprocessorDefinitions>TRACY_NO_STATISTICS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpplatest</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include;$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include\capstone;$(VcpkgManifestRoot)\vcpkg_installed\$(VcpkgTriplet)\$(VcpkgTriplet)\include\capstone;$(VcpkgRoot)\installed\$(VcpkgTriplet)\include\capstone</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\TracySocket.cpp" />
|
||||
<ClCompile Include="..\..\..\common\TracyStackFrames.cpp" />
|
||||
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp" />
|
||||
<ClCompile Include="..\..\..\getopt\getopt.c" />
|
||||
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyMmap.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyPrint.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyWorker.cpp" />
|
||||
<ClCompile Include="..\..\..\zstd\common\debug.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\entropy_common.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\error_private.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\fse_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\pool.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\threading.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\xxhash.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\zstd_common.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\fse_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\hist.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\huf_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_literals.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_sequences.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_superblock.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_double_fast.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_fast.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_lazy.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_ldm.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_opt.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\huf_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\src\capture.cpp" />
|
||||
<ClCompile Include="..\..\src\getopt.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
|
||||
@@ -140,65 +149,24 @@
|
||||
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyStackFrames.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_benaphore.h" />
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp" />
|
||||
<ClInclude Include="..\..\..\getopt\getopt.h" />
|
||||
<ClInclude Include="..\..\..\common\tracy_sema.h" />
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyFileRead.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyMemory.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyMmap.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyPrint.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracySlab.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyVector.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp" />
|
||||
<ClInclude Include="..\..\..\zstd\common\bitstream.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\compiler.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\cpu.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\debug.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\error_private.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\fse.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\huf.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\mem.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\pool.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\portability_macros.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\threading.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\xxhash.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_deps.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_trace.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\clevels.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\hist.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_literals.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_sequences.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_superblock.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_cwksp.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_double_fast.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_fast.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_lazy.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm_geartab.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_opt.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S" />
|
||||
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp" />
|
||||
<ClInclude Include="..\..\src\getopt.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
|
||||
@@ -10,24 +10,6 @@
|
||||
<Filter Include="common">
|
||||
<UniqueIdentifier>{e39d3623-47cd-4752-8da9-3ea324f964c1}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="getopt">
|
||||
<UniqueIdentifier>{ee9737d2-69c7-44da-b9c7-539d18f9d4b4}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd">
|
||||
<UniqueIdentifier>{f201463b-5e69-46fe-bdfc-1b5eed86c7f7}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\common">
|
||||
<UniqueIdentifier>{7e93ae33-6543-4bca-b05b-50818dbf24cc}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\compress">
|
||||
<UniqueIdentifier>{3b0c32f5-9efb-4503-9394-5ab95909fb1c}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\decompress">
|
||||
<UniqueIdentifier>{c1f99170-d904-4af1-8010-0a3ded5736c8}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\dictBuilder">
|
||||
<UniqueIdentifier>{456e6786-ea57-42b8-ae38-829cd2d918bd}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
@@ -48,6 +30,9 @@
|
||||
<ClCompile Include="..\..\src\capture.cpp">
|
||||
<Filter>src</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\getopt.c">
|
||||
<Filter>src</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
@@ -60,105 +45,6 @@
|
||||
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyMmap.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\getopt\getopt.c">
|
||||
<Filter>getopt</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\debug.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\entropy_common.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\error_private.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\fse_decompress.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\pool.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\threading.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\xxhash.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\zstd_common.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\fse_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\hist.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\huf_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_literals.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_sequences.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_superblock.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_double_fast.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_fast.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_lazy.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_ldm.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_opt.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\decompress\huf_decompress.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\TracyStackFrames.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
@@ -185,6 +71,9 @@
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
@@ -209,9 +98,18 @@
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\src\getopt.h">
|
||||
<Filter>src</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyAlign.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\tracy_benaphore.h">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\tracy_sema.h">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
@@ -224,136 +122,5 @@
|
||||
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyFileRead.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyMmap.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\getopt\getopt.h">
|
||||
<Filter>getopt</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\bitstream.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\compiler.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\cpu.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\debug.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\error_private.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\fse.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\huf.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\mem.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\pool.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\threading.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\xxhash.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_deps.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_internal.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_trace.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\hist.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_internal.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_literals.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_sequences.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_superblock.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_cwksp.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_double_fast.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_fast.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_lazy.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm_geartab.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_opt.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyStackFrames.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\portability_macros.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\clevels.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -1,96 +1,36 @@
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
# include <io.h>
|
||||
#else
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <inttypes.h>
|
||||
#include <mutex>
|
||||
#include <signal.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "../../common/TracyProtocol.hpp"
|
||||
#include "../../common/TracyStackFrames.hpp"
|
||||
#include "../../server/TracyFileWrite.hpp"
|
||||
#include "../../server/TracyMemory.hpp"
|
||||
#include "../../server/TracyPrint.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
#include "getopt.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
# include "../../getopt/getopt.h"
|
||||
#endif
|
||||
|
||||
|
||||
// This atomic is written by a signal handler (SigInt). Traditionally that would
|
||||
// have had to be `volatile sig_atomic_t`, and annoyingly, `bool` was
|
||||
// technically not allowed there, even though in practice it would work.
|
||||
// The good thing with C++11 atomics is that we can use atomic<bool> instead
|
||||
// here and be on the actually supported path.
|
||||
static std::atomic<bool> s_disconnect { false };
|
||||
#ifndef _MSC_VER
|
||||
struct sigaction oldsigint;
|
||||
bool disconnect = false;
|
||||
|
||||
void SigInt( int )
|
||||
{
|
||||
// Relaxed order is closest to a traditional `volatile` write.
|
||||
// We don't need stronger ordering since this signal handler doesn't do
|
||||
// anything else that would need to be ordered relatively to this.
|
||||
s_disconnect.store(true, std::memory_order_relaxed);
|
||||
disconnect = true;
|
||||
}
|
||||
|
||||
static bool s_isStdoutATerminal = false;
|
||||
|
||||
void InitIsStdoutATerminal() {
|
||||
#ifdef _WIN32
|
||||
s_isStdoutATerminal = _isatty( fileno( stdout ) );
|
||||
#else
|
||||
s_isStdoutATerminal = isatty( fileno( stdout ) );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool IsStdoutATerminal() { return s_isStdoutATerminal; }
|
||||
|
||||
#define ANSI_RESET "\033[0m"
|
||||
#define ANSI_BOLD "\033[1m"
|
||||
#define ANSI_BLACK "\033[30m"
|
||||
#define ANSI_RED "\033[31m"
|
||||
#define ANSI_GREEN "\033[32m"
|
||||
#define ANSI_YELLOW "\033[33m"
|
||||
#define ANSI_MAGENTA "\033[35m"
|
||||
#define ANSI_CYAN "\033[36m"
|
||||
#define ANSI_ERASE_LINE "\033[2K"
|
||||
|
||||
// Like printf, but if stdout is a terminal, prepends the output with
|
||||
// the given `ansiEscape` and appends ANSI_RESET.
|
||||
void AnsiPrintf( const char* ansiEscape, const char* format, ... ) {
|
||||
if( IsStdoutATerminal() )
|
||||
{
|
||||
// Prepend ansiEscape and append ANSI_RESET.
|
||||
char buf[256];
|
||||
va_list args;
|
||||
va_start( args, format );
|
||||
vsnprintf( buf, sizeof buf, format, args );
|
||||
va_end( args );
|
||||
printf( "%s%s" ANSI_RESET, ansiEscape, buf );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Just a normal printf.
|
||||
va_list args;
|
||||
va_start( args, format );
|
||||
vfprintf( stdout, format, args );
|
||||
va_end( args );
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] void Usage()
|
||||
void Usage()
|
||||
{
|
||||
printf( "Usage: capture -o output.tracy [-a address] [-p port] [-f] [-s seconds]\n" );
|
||||
printf( "Usage: capture -a address -o output.tracy [-p port]\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
@@ -104,16 +44,12 @@ int main( int argc, char** argv )
|
||||
}
|
||||
#endif
|
||||
|
||||
InitIsStdoutATerminal();
|
||||
|
||||
bool overwrite = false;
|
||||
const char* address = "127.0.0.1";
|
||||
const char* address = nullptr;
|
||||
const char* output = nullptr;
|
||||
int port = 8086;
|
||||
int seconds = -1;
|
||||
|
||||
int c;
|
||||
while( ( c = getopt( argc, argv, "a:o:p:fs:" ) ) != -1 )
|
||||
while( ( c = getopt( argc, argv, "a:o:p:" ) ) != -1 )
|
||||
{
|
||||
switch( c )
|
||||
{
|
||||
@@ -126,12 +62,6 @@ int main( int argc, char** argv )
|
||||
case 'p':
|
||||
port = atoi( optarg );
|
||||
break;
|
||||
case 'f':
|
||||
overwrite = true;
|
||||
break;
|
||||
case 's':
|
||||
seconds = atoi (optarg);
|
||||
break;
|
||||
default:
|
||||
Usage();
|
||||
break;
|
||||
@@ -140,22 +70,6 @@ int main( int argc, char** argv )
|
||||
|
||||
if( !address || !output ) Usage();
|
||||
|
||||
struct stat st;
|
||||
if( stat( output, &st ) == 0 && !overwrite )
|
||||
{
|
||||
printf( "Output file %s already exists! Use -f to force overwrite.\n", output );
|
||||
return 4;
|
||||
}
|
||||
|
||||
FILE* test = fopen( output, "wb" );
|
||||
if( !test )
|
||||
{
|
||||
printf( "Cannot open output file %s for writing!\n", output );
|
||||
return 5;
|
||||
}
|
||||
fclose( test );
|
||||
unlink( output );
|
||||
|
||||
printf( "Connecting to %s:%i...", address, port );
|
||||
fflush( stdout );
|
||||
tracy::Worker worker( address, port );
|
||||
@@ -181,10 +95,8 @@ int main( int argc, char** argv )
|
||||
while( !worker.HasData() ) std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
printf( "\nQueue delay: %s\nTimer resolution: %s\n", tracy::TimeToString( worker.GetDelay() ), tracy::TimeToString( worker.GetResolution() ) );
|
||||
|
||||
#ifdef _WIN32
|
||||
signal( SIGINT, SigInt );
|
||||
#else
|
||||
struct sigaction sigint, oldsigint;
|
||||
#ifndef _MSC_VER
|
||||
struct sigaction sigint;
|
||||
memset( &sigint, 0, sizeof( sigint ) );
|
||||
sigint.sa_handler = SigInt;
|
||||
sigaction( SIGINT, &sigint, &oldsigint );
|
||||
@@ -195,17 +107,13 @@ int main( int argc, char** argv )
|
||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||
while( worker.IsConnected() )
|
||||
{
|
||||
// Relaxed order is sufficient here because `s_disconnect` is only ever
|
||||
// set by this thread or by the SigInt handler, and that handler does
|
||||
// nothing else than storing `s_disconnect`.
|
||||
if( s_disconnect.load( std::memory_order_relaxed ) )
|
||||
#ifndef _MSC_VER
|
||||
if( disconnect )
|
||||
{
|
||||
worker.Disconnect();
|
||||
// Relaxed order is sufficient because only this thread ever reads
|
||||
// this value.
|
||||
s_disconnect.store(false, std::memory_order_relaxed );
|
||||
break;
|
||||
disconnect = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
lock.lock();
|
||||
const auto mbps = worker.GetMbpsData().back();
|
||||
@@ -213,142 +121,45 @@ int main( int argc, char** argv )
|
||||
const auto netTotal = worker.GetDataTransferred();
|
||||
lock.unlock();
|
||||
|
||||
// Output progress info only if destination is a TTY to avoid bloating
|
||||
// log files (so this is not just about usage of ANSI color codes).
|
||||
if( IsStdoutATerminal() )
|
||||
if( mbps < 0.1f )
|
||||
{
|
||||
const char* unit = "Mbps";
|
||||
float unitsPerMbps = 1.f;
|
||||
if( mbps < 0.1f )
|
||||
{
|
||||
unit = "Kbps";
|
||||
unitsPerMbps = 1000.f;
|
||||
}
|
||||
AnsiPrintf( ANSI_ERASE_LINE ANSI_CYAN ANSI_BOLD, "\r%7.2f %s", mbps * unitsPerMbps, unit );
|
||||
printf( " /");
|
||||
AnsiPrintf( ANSI_CYAN ANSI_BOLD, "%5.1f%%", compRatio * 100.f );
|
||||
printf( " =");
|
||||
AnsiPrintf( ANSI_YELLOW ANSI_BOLD, "%7.2f Mbps", mbps / compRatio );
|
||||
printf( " | ");
|
||||
AnsiPrintf( ANSI_YELLOW, "Tx: ");
|
||||
AnsiPrintf( ANSI_GREEN, "%s", tracy::MemSizeToString( netTotal ) );
|
||||
printf( " | ");
|
||||
AnsiPrintf( ANSI_RED ANSI_BOLD, "%s", tracy::MemSizeToString( tracy::memUsage ) );
|
||||
printf( " | ");
|
||||
AnsiPrintf( ANSI_RED, "%s", tracy::TimeToString( worker.GetLastTime() ) );
|
||||
fflush( stdout );
|
||||
printf( "\33[2K\r\033[36;1m%7.2f Kbps", mbps * 1000.f );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "\33[2K\r\033[36;1m%7.2f Mbps", mbps );
|
||||
}
|
||||
printf( " \033[0m /\033[36;1m%5.1f%% \033[0m=\033[33;1m%7.2f Mbps \033[0m| \033[33mNet: \033[32m%s \033[0m| \033[33mMem: \033[31;1m%s\033[0m | \033[33mTime: %s\033[0m",
|
||||
compRatio * 100.f,
|
||||
mbps / compRatio,
|
||||
tracy::MemSizeToString( netTotal ),
|
||||
tracy::MemSizeToString( tracy::memUsage ),
|
||||
tracy::TimeToString( worker.GetLastTime() ) );
|
||||
fflush( stdout );
|
||||
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
if( seconds != -1 )
|
||||
{
|
||||
const auto dur = std::chrono::high_resolution_clock::now() - t0;
|
||||
if( std::chrono::duration_cast<std::chrono::seconds>(dur).count() >= seconds )
|
||||
{
|
||||
// Relaxed order is sufficient because only this thread ever reads
|
||||
// this value.
|
||||
s_disconnect.store(true, std::memory_order_relaxed );
|
||||
}
|
||||
}
|
||||
}
|
||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
const auto& failure = worker.GetFailureType();
|
||||
if( failure != tracy::Worker::Failure::None )
|
||||
{
|
||||
AnsiPrintf( ANSI_RED ANSI_BOLD, "\nInstrumentation failure: %s", tracy::Worker::GetFailureString( failure ) );
|
||||
auto& fd = worker.GetFailureData();
|
||||
if( !fd.message.empty() )
|
||||
{
|
||||
printf( "\nContext: %s", fd.message.c_str() );
|
||||
}
|
||||
if( fd.callstack != 0 )
|
||||
{
|
||||
AnsiPrintf( ANSI_BOLD, "\n%sFailure callstack:%s\n" );
|
||||
auto& cs = worker.GetCallstack( fd.callstack );
|
||||
int fidx = 0;
|
||||
int bidx = 0;
|
||||
for( auto& entry : cs )
|
||||
{
|
||||
auto frameData = worker.GetCallstackFrame( entry );
|
||||
if( !frameData )
|
||||
{
|
||||
printf( "%3i. %p\n", fidx++, (void*)worker.GetCanonicalPointer( entry ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto fsz = frameData->size;
|
||||
for( uint8_t f=0; f<fsz; f++ )
|
||||
{
|
||||
const auto& frame = frameData->data[f];
|
||||
auto txt = worker.GetString( frame.name );
|
||||
|
||||
if( fidx == 0 && f != fsz-1 )
|
||||
{
|
||||
auto test = tracy::s_tracyStackFrames;
|
||||
bool match = false;
|
||||
do
|
||||
{
|
||||
if( strcmp( txt, *test ) == 0 )
|
||||
{
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
while( *++test );
|
||||
if( match ) continue;
|
||||
}
|
||||
|
||||
bidx++;
|
||||
|
||||
if( f == fsz-1 )
|
||||
{
|
||||
printf( "%3i. ", fidx++ );
|
||||
}
|
||||
else
|
||||
{
|
||||
AnsiPrintf( ANSI_BLACK ANSI_BOLD, "inl. " );
|
||||
}
|
||||
AnsiPrintf( ANSI_CYAN, "%s ", txt );
|
||||
txt = worker.GetString( frame.file );
|
||||
if( frame.line == 0 )
|
||||
{
|
||||
AnsiPrintf( ANSI_YELLOW, "(%s)", txt );
|
||||
}
|
||||
else
|
||||
{
|
||||
AnsiPrintf( ANSI_YELLOW, "(%s:%" PRIu32 ")", txt, frame.line );
|
||||
}
|
||||
if( frameData->imageName.Active() )
|
||||
{
|
||||
AnsiPrintf( ANSI_MAGENTA, " %s\n", worker.GetString( frameData->imageName ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "\n" );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
printf( "\n\033[31;1mInstrumentation failure: %s\033[0m", tracy::Worker::GetFailureString( failure ) );
|
||||
}
|
||||
|
||||
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nElapsed time: %s\nSaving trace...",
|
||||
worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() ), tracy::RealToString( worker.GetZoneCount() ),
|
||||
worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() ), tracy::RealToString( worker.GetZoneCount(), true ),
|
||||
tracy::TimeToString( std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count() ) );
|
||||
fflush( stdout );
|
||||
auto f = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output ) );
|
||||
if( f )
|
||||
{
|
||||
worker.Write( *f, false );
|
||||
AnsiPrintf( ANSI_GREEN ANSI_BOLD, " done!\n" );
|
||||
f->Finish();
|
||||
const auto stats = f->GetCompressionStatistics();
|
||||
printf( "Trace size %s (%.2f%% ratio)\n", tracy::MemSizeToString( stats.second ), 100.f * stats.second / stats.first );
|
||||
worker.Write( *f );
|
||||
printf( " \033[32;1mdone!\033[0m\n" );
|
||||
}
|
||||
else
|
||||
{
|
||||
AnsiPrintf( ANSI_RED ANSI_BOLD, " failed!\n");
|
||||
printf( " \033[31;1failed!\033[0m\n" );
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
#ifdef TRACY_ENABLE
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
#include "../common/TracyYield.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
extern thread_local bool RpThreadInitDone;
|
||||
extern std::atomic<int> RpInitDone;
|
||||
extern std::atomic<int> RpInitLock;
|
||||
|
||||
tracy_no_inline static void InitRpmallocPlumbing()
|
||||
{
|
||||
const auto done = RpInitDone.load( std::memory_order_acquire );
|
||||
if( !done )
|
||||
{
|
||||
int expected = 0;
|
||||
while( !RpInitLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); }
|
||||
const auto done = RpInitDone.load( std::memory_order_acquire );
|
||||
if( !done )
|
||||
{
|
||||
rpmalloc_initialize();
|
||||
RpInitDone.store( 1, std::memory_order_release );
|
||||
}
|
||||
RpInitLock.store( 0, std::memory_order_release );
|
||||
}
|
||||
rpmalloc_thread_initialize();
|
||||
RpThreadInitDone = true;
|
||||
}
|
||||
|
||||
TRACY_API void InitRpmalloc()
|
||||
{
|
||||
if( !RpThreadInitDone ) InitRpmallocPlumbing();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,8 +1,6 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined __linux__ && defined __ARM_ARCH
|
||||
|
||||
static const char* DecodeArmImplementer( uint32_t v )
|
||||
{
|
||||
static char buf[16];
|
||||
@@ -14,7 +12,6 @@ static const char* DecodeArmImplementer( uint32_t v )
|
||||
case 0x44: return "DEC";
|
||||
case 0x46: return "Fujitsu";
|
||||
case 0x48: return "HiSilicon";
|
||||
case 0x49: return "Infineon";
|
||||
case 0x4d: return "Motorola";
|
||||
case 0x4e: return "Nvidia";
|
||||
case 0x50: return "Applied Micro";
|
||||
@@ -26,7 +23,6 @@ static const char* DecodeArmImplementer( uint32_t v )
|
||||
case 0x66: return "Faraday";
|
||||
case 0x68: return "HXT";
|
||||
case 0x69: return "Intel";
|
||||
case 0xc0: return "Ampere Computing";
|
||||
default: break;
|
||||
}
|
||||
sprintf( buf, "0x%x", v );
|
||||
@@ -38,7 +34,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
static char buf[16];
|
||||
switch( impl )
|
||||
{
|
||||
case 0x41: // ARM
|
||||
case 0x41:
|
||||
switch( part )
|
||||
{
|
||||
case 0x810: return "810";
|
||||
@@ -61,8 +57,8 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0xc09: return " Cortex-A9";
|
||||
case 0xc0c: return " Cortex-A12";
|
||||
case 0xc0d: return " Rockchip RK3288";
|
||||
case 0xc0e: return " Cortex-A17";
|
||||
case 0xc0f: return " Cortex-A15";
|
||||
case 0xc0e: return " Cortex-A17";
|
||||
case 0xc14: return " Cortex-R4";
|
||||
case 0xc15: return " Cortex-R5";
|
||||
case 0xc17: return " Cortex-R7";
|
||||
@@ -75,7 +71,6 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0xc60: return " Cortex-M0+";
|
||||
case 0xd00: return " AArch64 simulator";
|
||||
case 0xd01: return " Cortex-A32";
|
||||
case 0xd02: return " Cortex-A34";
|
||||
case 0xd03: return " Cortex-A53";
|
||||
case 0xd04: return " Cortex-A35";
|
||||
case 0xd05: return " Cortex-A55";
|
||||
@@ -92,21 +87,10 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0xd13: return " Cortex-R52";
|
||||
case 0xd20: return " Cortex-M23";
|
||||
case 0xd21: return " Cortex-M33";
|
||||
case 0xd22: return " Cortex-M55";
|
||||
case 0xd40: return " Neoverse V1";
|
||||
case 0xd41: return " Cortex-A78";
|
||||
case 0xd42: return " Cortex-A78AE";
|
||||
case 0xd43: return " Cortex-A65AE";
|
||||
case 0xd44: return " Cortex-X1";
|
||||
case 0xd47: return " Cortex-A710";
|
||||
case 0xd48: return " Cortex-X2";
|
||||
case 0xd49: return " Neoverse N2";
|
||||
case 0xd4a: return " Neoverse E1";
|
||||
case 0xd4b: return " Cortex-A78C";
|
||||
case 0xd4c: return " Cortex-X1C";
|
||||
default: break;
|
||||
}
|
||||
case 0x42: // Broadcom
|
||||
case 0x42:
|
||||
switch( part )
|
||||
{
|
||||
case 0xf: return " Brahma B15";
|
||||
@@ -114,7 +98,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0x516: return " ThunderX2";
|
||||
default: break;
|
||||
}
|
||||
case 0x43: // Cavium
|
||||
case 0x43:
|
||||
switch( part )
|
||||
{
|
||||
case 0xa0: return " ThunderX";
|
||||
@@ -122,37 +106,29 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0xa2: return " ThunderX 81XX";
|
||||
case 0xa3: return " ThunderX 83XX";
|
||||
case 0xaf: return " ThunderX2 99xx";
|
||||
case 0xb0: return " OcteonTX2";
|
||||
case 0xb1: return " OcteonTX2 T98";
|
||||
case 0xb2: return " OcteonTX2 T96";
|
||||
case 0xb3: return " OcteonTX2 F95";
|
||||
case 0xb4: return " OcteonTX2 F95N";
|
||||
case 0xb5: return " OcteonTX2 F95MM";
|
||||
case 0xb6: return " OcteonTX2 F95O";
|
||||
case 0xb8: return " ThunderX3 T110";
|
||||
default: break;
|
||||
}
|
||||
case 0x44: // DEC
|
||||
case 0x44:
|
||||
switch( part )
|
||||
{
|
||||
case 0xa10: return " SA110";
|
||||
case 0xa11: return " SA1100";
|
||||
default: break;
|
||||
}
|
||||
case 0x46: // Fujitsu
|
||||
case 0x46:
|
||||
switch( part )
|
||||
{
|
||||
case 0x1: return " A64FX";
|
||||
default: break;
|
||||
}
|
||||
case 0x48: // HiSilicon
|
||||
case 0x48:
|
||||
switch( part )
|
||||
{
|
||||
case 0xd01: return " TSV100";
|
||||
case 0xd40: return " Kirin 980";
|
||||
default: break;
|
||||
}
|
||||
case 0x4e: // Nvidia
|
||||
case 0x4e:
|
||||
switch( part )
|
||||
{
|
||||
case 0x0: return " Denver";
|
||||
@@ -160,13 +136,13 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0x4: return " Carmel";
|
||||
default: break;
|
||||
}
|
||||
case 0x50: // Applied Micro
|
||||
case 0x50:
|
||||
switch( part )
|
||||
{
|
||||
case 0x0: return " X-Gene";
|
||||
default: break;
|
||||
}
|
||||
case 0x51: // Qualcomm
|
||||
case 0x51:
|
||||
switch( part )
|
||||
{
|
||||
case 0xf: return " Scorpion";
|
||||
@@ -182,27 +158,18 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0x802: return " Kryo 385 Gold";
|
||||
case 0x803: return " Kryo 385 Silver";
|
||||
case 0x804: return " Kryo 485 Gold";
|
||||
case 0x805: return " Kryo 4xx/5xx Silver";
|
||||
case 0xc00: return " Falkor";
|
||||
case 0xc01: return " Saphira";
|
||||
default: break;
|
||||
}
|
||||
case 0x53: // Samsung
|
||||
case 0x53:
|
||||
switch( part )
|
||||
{
|
||||
case 0x1: return " Exynos M1/M2";
|
||||
case 0x2: return " Exynos M3";
|
||||
case 0x3: return " Exynos M4";
|
||||
case 0x4: return " Exynos M5";
|
||||
default: break;
|
||||
}
|
||||
case 0x54: // Texas Instruments
|
||||
switch( part )
|
||||
{
|
||||
case 0x925: return " TI925";
|
||||
default: break;
|
||||
}
|
||||
case 0x56: // Marvell
|
||||
case 0x56:
|
||||
switch( part )
|
||||
{
|
||||
case 0x131: return " Feroceon 88FR131";
|
||||
@@ -210,7 +177,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0x584: return " PJ4B-MP / PJ4C";
|
||||
default: break;
|
||||
}
|
||||
case 0x61: // Apple
|
||||
case 0x61:
|
||||
switch( part )
|
||||
{
|
||||
case 0x1: return " Cyclone";
|
||||
@@ -220,41 +187,27 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
|
||||
case 0x5: return " Twister/Elba/Malta";
|
||||
case 0x6: return " Hurricane";
|
||||
case 0x7: return " Hurricane/Myst";
|
||||
case 0x22: return " M1 Icestorm";
|
||||
case 0x23: return " M1 Firestorm";
|
||||
case 0x24: return " M1 Icestorm Pro";
|
||||
case 0x25: return " M1 Firestorm Pro";
|
||||
case 0x28: return " M1 Icestorm Max";
|
||||
case 0x29: return " M1 Firestorm Max";
|
||||
default: break;
|
||||
}
|
||||
case 0x66: // Faraday
|
||||
case 0x66:
|
||||
switch( part )
|
||||
{
|
||||
case 0x526: return " FA526";
|
||||
case 0x626: return " FA626";
|
||||
default: break;
|
||||
}
|
||||
case 0x68: // HXT
|
||||
case 0x68:
|
||||
switch( part )
|
||||
{
|
||||
case 0x0: return " Phecda";
|
||||
default: break;
|
||||
}
|
||||
case 0xc0: // Ampere Computing
|
||||
switch( part )
|
||||
{
|
||||
case 0xac3: return " Ampere1";
|
||||
default: break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
sprintf( buf, " 0x%x", part );
|
||||
return buf;
|
||||
}
|
||||
|
||||
#elif defined __APPLE__ && TARGET_OS_IPHONE == 1
|
||||
|
||||
static const char* DecodeIosDevice( const char* id )
|
||||
{
|
||||
static const char* DeviceTable[] = {
|
||||
@@ -292,19 +245,6 @@ static const char* DecodeIosDevice( const char* id )
|
||||
"iPhone11,4", "iPhone XS Max",
|
||||
"iPhone11,6", "iPhone XS Max China",
|
||||
"iPhone11,8", "iPhone XR",
|
||||
"iPhone12,1", "iPhone 11",
|
||||
"iPhone12,3", "iPhone 11 Pro",
|
||||
"iPhone12,5", "iPhone 11 Pro Max",
|
||||
"iPhone12,8", "iPhone SE 2nd Gen",
|
||||
"iPhone13,1", "iPhone 12 Mini",
|
||||
"iPhone13,2", "iPhone 12",
|
||||
"iPhone13,3", "iPhone 12 Pro",
|
||||
"iPhone13,4", "iPhone 12 Pro Max",
|
||||
"iPhone14,2", "iPhone 13 Pro",
|
||||
"iPhone14,3", "iPhone 13 Pro Max",
|
||||
"iPhone14,4", "iPhone 13 Mini",
|
||||
"iPhone14,5", "iPhone 13",
|
||||
"iPhone14,6", "iPhone SE 3rd Gen",
|
||||
"iPad1,1", "iPad (A1219/A1337)",
|
||||
"iPad2,1", "iPad 2 (A1395)",
|
||||
"iPad2,2", "iPad 2 (A1396)",
|
||||
@@ -345,8 +285,6 @@ static const char* DecodeIosDevice( const char* id )
|
||||
"iPad7,4", "iPad Pro 10.5\" (A1709)",
|
||||
"iPad7,5", "iPad 6th gen (A1893)",
|
||||
"iPad7,6", "iPad 6th gen (A1954)",
|
||||
"iPad7,11", "iPad 7th gen 10.2\" (Wifi)",
|
||||
"iPad7,12", "iPad 7th gen 10.2\" (Wifi+Cellular)",
|
||||
"iPad8,1", "iPad Pro 11\" (A1980)",
|
||||
"iPad8,2", "iPad Pro 11\" (A1980)",
|
||||
"iPad8,3", "iPad Pro 11\" (A1934/A1979/A2013)",
|
||||
@@ -355,28 +293,10 @@ static const char* DecodeIosDevice( const char* id )
|
||||
"iPad8,6", "iPad Pro 12.9\" 3rd gen (A1876)",
|
||||
"iPad8,7", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
|
||||
"iPad8,8", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
|
||||
"iPad8,9", "iPad Pro 11\" 2nd gen (Wifi)",
|
||||
"iPad8,10", "iPad Pro 11\" 2nd gen (Wifi+Cellular)",
|
||||
"iPad8,11", "iPad Pro 12.9\" 4th gen (Wifi)",
|
||||
"iPad8,12", "iPad Pro 12.9\" 4th gen (Wifi+Cellular)",
|
||||
"iPad11,1", "iPad Mini 5th gen (A2133)",
|
||||
"iPad11,2", "iPad Mini 5th gen (A2124/A2125/A2126)",
|
||||
"iPad11,3", "iPad Air 3rd gen (A2152)",
|
||||
"iPad11,4", "iPad Air 3rd gen (A2123/A2153/A2154)",
|
||||
"iPad11,6", "iPad 8th gen (WiFi)",
|
||||
"iPad11,7", "iPad 8th gen (WiFi+Cellular)",
|
||||
"iPad13,1", "iPad Air 4th gen (WiFi)",
|
||||
"iPad13,2", "iPad Air 4th gen (WiFi+Cellular)",
|
||||
"iPad13,4", "iPad Pro 11\" 3rd gen",
|
||||
"iPad13,5", "iPad Pro 11\" 3rd gen",
|
||||
"iPad13,6", "iPad Pro 11\" 3rd gen",
|
||||
"iPad13,7", "iPad Pro 11\" 3rd gen",
|
||||
"iPad13,8", "iPad Pro 12.9\" 5th gen",
|
||||
"iPad13,9", "iPad Pro 12.9\" 5th gen",
|
||||
"iPad13,10", "iPad Pro 12.9\" 5th gen",
|
||||
"iPad13,11", "iPad Pro 12.9\" 5th gen",
|
||||
"iPad13,16", "iPad Air 5th Gen (WiFi)",
|
||||
"iPad13,17", "iPad Air 5th Gen (WiFi+Cellular)",
|
||||
"iPod1,1", "iPod Touch",
|
||||
"iPod2,1", "iPod Touch 2nd gen",
|
||||
"iPod3,1", "iPod Touch 3rd gen",
|
||||
@@ -396,6 +316,4 @@ static const char* DecodeIosDevice( const char* id )
|
||||
return id;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,35 +1,28 @@
|
||||
#ifndef __TRACYCALLSTACK_H__
|
||||
#define __TRACYCALLSTACK_H__
|
||||
|
||||
#ifndef TRACY_NO_CALLSTACK
|
||||
#if !defined _WIN32 && !defined __CYGWIN__
|
||||
# include <sys/param.h>
|
||||
#endif
|
||||
|
||||
# if !defined _WIN32
|
||||
# include <sys/param.h>
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# define TRACY_HAS_CALLSTACK 1
|
||||
#elif defined __ANDROID__
|
||||
# if !defined __arm__ || __ANDROID_API__ >= 21
|
||||
# define TRACY_HAS_CALLSTACK 2
|
||||
# else
|
||||
# define TRACY_HAS_CALLSTACK 5
|
||||
# endif
|
||||
|
||||
# if defined _WIN32
|
||||
# include "../common/TracyUwp.hpp"
|
||||
# ifndef TRACY_UWP
|
||||
# define TRACY_HAS_CALLSTACK 1
|
||||
# endif
|
||||
# elif defined __ANDROID__
|
||||
# if !defined __arm__ || __ANDROID_API__ >= 21
|
||||
# define TRACY_HAS_CALLSTACK 2
|
||||
# else
|
||||
# define TRACY_HAS_CALLSTACK 5
|
||||
# endif
|
||||
# elif defined __linux
|
||||
# if defined _GNU_SOURCE && defined __GLIBC__
|
||||
# define TRACY_HAS_CALLSTACK 3
|
||||
# else
|
||||
# define TRACY_HAS_CALLSTACK 2
|
||||
# endif
|
||||
# elif defined __APPLE__
|
||||
# define TRACY_HAS_CALLSTACK 4
|
||||
# elif defined BSD
|
||||
# define TRACY_HAS_CALLSTACK 6
|
||||
#elif defined __linux
|
||||
# if defined _GNU_SOURCE && defined __GLIBC__
|
||||
# define TRACY_HAS_CALLSTACK 3
|
||||
# else
|
||||
# define TRACY_HAS_CALLSTACK 2
|
||||
# endif
|
||||
|
||||
#elif defined __APPLE__
|
||||
# define TRACY_HAS_CALLSTACK 4
|
||||
#elif defined BSD
|
||||
# define TRACY_HAS_CALLSTACK 6
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
#ifndef __TRACYCALLSTACK_HPP__
|
||||
#define __TRACYCALLSTACK_HPP__
|
||||
|
||||
#include "../common/TracyApi.h"
|
||||
#include "TracyCallstack.h"
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5
|
||||
#if TRACY_HAS_CALLSTACK == 1
|
||||
extern "C"
|
||||
{
|
||||
typedef unsigned long (__stdcall *t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
|
||||
extern t_RtlWalkFrameChain RtlWalkFrameChain;
|
||||
}
|
||||
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5
|
||||
# include <unwind.h>
|
||||
#elif TRACY_HAS_CALLSTACK >= 3
|
||||
# include <execinfo.h>
|
||||
@@ -13,10 +18,6 @@
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
|
||||
#ifdef TRACY_DEBUGINFOD
|
||||
# include <elfutils/debuginfod.h>
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
@@ -26,57 +27,33 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct CallstackSymbolData
|
||||
{
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
bool needFree;
|
||||
uint64_t symAddr;
|
||||
};
|
||||
|
||||
struct CallstackEntry
|
||||
{
|
||||
const char* name;
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
uint32_t symLen;
|
||||
uint64_t symAddr;
|
||||
};
|
||||
|
||||
struct CallstackEntryData
|
||||
{
|
||||
const CallstackEntry* data;
|
||||
uint8_t size;
|
||||
const char* imageName;
|
||||
};
|
||||
|
||||
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr );
|
||||
CallstackSymbolData DecodeCodeAddress( uint64_t ptr );
|
||||
const char* DecodeCallstackPtrFast( uint64_t ptr );
|
||||
CallstackEntryData DecodeCallstackPtr( uint64_t ptr );
|
||||
void InitCallstack();
|
||||
void EndCallstack();
|
||||
const char* GetKernelModulePath( uint64_t addr );
|
||||
|
||||
#ifdef TRACY_DEBUGINFOD
|
||||
const uint8_t* GetBuildIdForImage( const char* image, size_t& size );
|
||||
debuginfod_client* GetDebuginfodClient();
|
||||
#endif
|
||||
|
||||
#if TRACY_HAS_CALLSTACK == 1
|
||||
|
||||
extern "C"
|
||||
{
|
||||
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
|
||||
TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain;
|
||||
}
|
||||
|
||||
static tracy_force_inline void* Callstack( int depth )
|
||||
{
|
||||
assert( depth >= 1 && depth < 63 );
|
||||
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
|
||||
const auto num = ___tracy_RtlWalkFrameChain( (void**)( trace + 1 ), depth, 0 );
|
||||
const auto num = RtlWalkFrameChain( (void**)( trace + 1 ), depth, 0 );
|
||||
*trace = num;
|
||||
|
||||
return trace;
|
||||
}
|
||||
|
||||
@@ -119,8 +96,8 @@ static tracy_force_inline void* Callstack( int depth )
|
||||
{
|
||||
assert( depth >= 1 );
|
||||
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + (size_t)depth ) * sizeof( uintptr_t ) );
|
||||
const auto num = (size_t)backtrace( (void**)(trace+1), depth );
|
||||
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
|
||||
const auto num = backtrace( (void**)(trace+1), depth );
|
||||
*trace = num;
|
||||
|
||||
return trace;
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
#ifndef __TRACYPRINT_HPP__
|
||||
#define __TRACYPRINT_HPP__
|
||||
|
||||
#ifdef TRACY_VERBOSE
|
||||
# include <stdio.h>
|
||||
# define TracyDebug(...) fprintf( stderr, __VA_ARGS__ );
|
||||
#else
|
||||
# define TracyDebug(...)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -90,8 +90,56 @@ static const uint16_t DivTable[255*3+1] = {
|
||||
0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c,
|
||||
0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156
|
||||
};
|
||||
|
||||
#if defined __ARM_NEON && defined __aarch64__
|
||||
static const uint16_t DivTableAVX[255*3+1] = {
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x38e3, 0x35e5, 0x3333, 0x30c3, 0x2e8b, 0x2c85, 0x2aaa, 0x28f5, 0x2762, 0x25ed, 0x2492, 0x234f, 0x2222, 0x2108, 0x2000,
|
||||
0x1f07, 0x1e1e, 0x1d41, 0x1c71, 0x1bac, 0x1af2, 0x1a41, 0x1999, 0x18f9, 0x1861, 0x17d0, 0x1745, 0x16c1, 0x1642, 0x15c9, 0x1555,
|
||||
0x14e5, 0x147a, 0x1414, 0x13b1, 0x1352, 0x12f6, 0x129e, 0x1249, 0x11f7, 0x11a7, 0x115b, 0x1111, 0x10c9, 0x1084, 0x1041, 0x1000,
|
||||
0x0fc0, 0x0f83, 0x0f48, 0x0f0f, 0x0ed7, 0x0ea0, 0x0e6c, 0x0e38, 0x0e07, 0x0dd6, 0x0da7, 0x0d79, 0x0d4c, 0x0d20, 0x0cf6, 0x0ccc,
|
||||
0x0ca4, 0x0c7c, 0x0c56, 0x0c30, 0x0c0c, 0x0be8, 0x0bc5, 0x0ba2, 0x0b81, 0x0b60, 0x0b40, 0x0b21, 0x0b02, 0x0ae4, 0x0ac7, 0x0aaa,
|
||||
0x0a8e, 0x0a72, 0x0a57, 0x0a3d, 0x0a23, 0x0a0a, 0x09f1, 0x09d8, 0x09c0, 0x09a9, 0x0991, 0x097b, 0x0964, 0x094f, 0x0939, 0x0924,
|
||||
0x090f, 0x08fb, 0x08e7, 0x08d3, 0x08c0, 0x08ad, 0x089a, 0x0888, 0x0876, 0x0864, 0x0853, 0x0842, 0x0831, 0x0820, 0x0810, 0x0800,
|
||||
0x07f0, 0x07e0, 0x07d1, 0x07c1, 0x07b3, 0x07a4, 0x0795, 0x0787, 0x0779, 0x076b, 0x075d, 0x0750, 0x0743, 0x0736, 0x0729, 0x071c,
|
||||
0x070f, 0x0703, 0x06f7, 0x06eb, 0x06df, 0x06d3, 0x06c8, 0x06bc, 0x06b1, 0x06a6, 0x069b, 0x0690, 0x0685, 0x067b, 0x0670, 0x0666,
|
||||
0x065c, 0x0652, 0x0648, 0x063e, 0x0634, 0x062b, 0x0621, 0x0618, 0x060f, 0x0606, 0x05fd, 0x05f4, 0x05eb, 0x05e2, 0x05d9, 0x05d1,
|
||||
0x05c9, 0x05c0, 0x05b8, 0x05b0, 0x05a8, 0x05a0, 0x0598, 0x0590, 0x0588, 0x0581, 0x0579, 0x0572, 0x056b, 0x0563, 0x055c, 0x0555,
|
||||
0x054e, 0x0547, 0x0540, 0x0539, 0x0532, 0x052b, 0x0525, 0x051e, 0x0518, 0x0511, 0x050b, 0x0505, 0x04fe, 0x04f8, 0x04f2, 0x04ec,
|
||||
0x04e6, 0x04e0, 0x04da, 0x04d4, 0x04ce, 0x04c8, 0x04c3, 0x04bd, 0x04b8, 0x04b2, 0x04ad, 0x04a7, 0x04a2, 0x049c, 0x0497, 0x0492,
|
||||
0x048d, 0x0487, 0x0482, 0x047d, 0x0478, 0x0473, 0x046e, 0x0469, 0x0465, 0x0460, 0x045b, 0x0456, 0x0452, 0x044d, 0x0448, 0x0444,
|
||||
0x043f, 0x043b, 0x0436, 0x0432, 0x042d, 0x0429, 0x0425, 0x0421, 0x041c, 0x0418, 0x0414, 0x0410, 0x040c, 0x0408, 0x0404, 0x0400,
|
||||
0x03fc, 0x03f8, 0x03f4, 0x03f0, 0x03ec, 0x03e8, 0x03e4, 0x03e0, 0x03dd, 0x03d9, 0x03d5, 0x03d2, 0x03ce, 0x03ca, 0x03c7, 0x03c3,
|
||||
0x03c0, 0x03bc, 0x03b9, 0x03b5, 0x03b2, 0x03ae, 0x03ab, 0x03a8, 0x03a4, 0x03a1, 0x039e, 0x039b, 0x0397, 0x0394, 0x0391, 0x038e,
|
||||
0x038b, 0x0387, 0x0384, 0x0381, 0x037e, 0x037b, 0x0378, 0x0375, 0x0372, 0x036f, 0x036c, 0x0369, 0x0366, 0x0364, 0x0361, 0x035e,
|
||||
0x035b, 0x0358, 0x0355, 0x0353, 0x0350, 0x034d, 0x034a, 0x0348, 0x0345, 0x0342, 0x0340, 0x033d, 0x033a, 0x0338, 0x0335, 0x0333,
|
||||
0x0330, 0x032e, 0x032b, 0x0329, 0x0326, 0x0324, 0x0321, 0x031f, 0x031c, 0x031a, 0x0317, 0x0315, 0x0313, 0x0310, 0x030e, 0x030c,
|
||||
0x0309, 0x0307, 0x0305, 0x0303, 0x0300, 0x02fe, 0x02fc, 0x02fa, 0x02f7, 0x02f5, 0x02f3, 0x02f1, 0x02ef, 0x02ec, 0x02ea, 0x02e8,
|
||||
0x02e6, 0x02e4, 0x02e2, 0x02e0, 0x02de, 0x02dc, 0x02da, 0x02d8, 0x02d6, 0x02d4, 0x02d2, 0x02d0, 0x02ce, 0x02cc, 0x02ca, 0x02c8,
|
||||
0x02c6, 0x02c4, 0x02c2, 0x02c0, 0x02be, 0x02bc, 0x02bb, 0x02b9, 0x02b7, 0x02b5, 0x02b3, 0x02b1, 0x02b0, 0x02ae, 0x02ac, 0x02aa,
|
||||
0x02a8, 0x02a7, 0x02a5, 0x02a3, 0x02a1, 0x02a0, 0x029e, 0x029c, 0x029b, 0x0299, 0x0297, 0x0295, 0x0294, 0x0292, 0x0291, 0x028f,
|
||||
0x028d, 0x028c, 0x028a, 0x0288, 0x0287, 0x0285, 0x0284, 0x0282, 0x0280, 0x027f, 0x027d, 0x027c, 0x027a, 0x0279, 0x0277, 0x0276,
|
||||
0x0274, 0x0273, 0x0271, 0x0270, 0x026e, 0x026d, 0x026b, 0x026a, 0x0268, 0x0267, 0x0265, 0x0264, 0x0263, 0x0261, 0x0260, 0x025e,
|
||||
0x025d, 0x025c, 0x025a, 0x0259, 0x0257, 0x0256, 0x0255, 0x0253, 0x0252, 0x0251, 0x024f, 0x024e, 0x024d, 0x024b, 0x024a, 0x0249,
|
||||
0x0247, 0x0246, 0x0245, 0x0243, 0x0242, 0x0241, 0x0240, 0x023e, 0x023d, 0x023c, 0x023b, 0x0239, 0x0238, 0x0237, 0x0236, 0x0234,
|
||||
0x0233, 0x0232, 0x0231, 0x0230, 0x022e, 0x022d, 0x022c, 0x022b, 0x022a, 0x0229, 0x0227, 0x0226, 0x0225, 0x0224, 0x0223, 0x0222,
|
||||
0x0220, 0x021f, 0x021e, 0x021d, 0x021c, 0x021b, 0x021a, 0x0219, 0x0218, 0x0216, 0x0215, 0x0214, 0x0213, 0x0212, 0x0211, 0x0210,
|
||||
0x020f, 0x020e, 0x020d, 0x020c, 0x020b, 0x020a, 0x0209, 0x0208, 0x0207, 0x0206, 0x0205, 0x0204, 0x0203, 0x0202, 0x0201, 0x0200,
|
||||
0x01ff, 0x01fe, 0x01fd, 0x01fc, 0x01fb, 0x01fa, 0x01f9, 0x01f8, 0x01f7, 0x01f6, 0x01f5, 0x01f4, 0x01f3, 0x01f2, 0x01f1, 0x01f0,
|
||||
0x01ef, 0x01ee, 0x01ed, 0x01ec, 0x01eb, 0x01ea, 0x01e9, 0x01e9, 0x01e8, 0x01e7, 0x01e6, 0x01e5, 0x01e4, 0x01e3, 0x01e2, 0x01e1,
|
||||
0x01e0, 0x01e0, 0x01df, 0x01de, 0x01dd, 0x01dc, 0x01db, 0x01da, 0x01da, 0x01d9, 0x01d8, 0x01d7, 0x01d6, 0x01d5, 0x01d4, 0x01d4,
|
||||
0x01d3, 0x01d2, 0x01d1, 0x01d0, 0x01cf, 0x01cf, 0x01ce, 0x01cd, 0x01cc, 0x01cb, 0x01cb, 0x01ca, 0x01c9, 0x01c8, 0x01c7, 0x01c7,
|
||||
0x01c6, 0x01c5, 0x01c4, 0x01c3, 0x01c3, 0x01c2, 0x01c1, 0x01c0, 0x01c0, 0x01bf, 0x01be, 0x01bd, 0x01bd, 0x01bc, 0x01bb, 0x01ba,
|
||||
0x01ba, 0x01b9, 0x01b8, 0x01b7, 0x01b7, 0x01b6, 0x01b5, 0x01b4, 0x01b4, 0x01b3, 0x01b2, 0x01b2, 0x01b1, 0x01b0, 0x01af, 0x01af,
|
||||
0x01ae, 0x01ad, 0x01ad, 0x01ac, 0x01ab, 0x01aa, 0x01aa, 0x01a9, 0x01a8, 0x01a8, 0x01a7, 0x01a6, 0x01a6, 0x01a5, 0x01a4, 0x01a4,
|
||||
0x01a3, 0x01a2, 0x01a2, 0x01a1, 0x01a0, 0x01a0, 0x019f, 0x019e, 0x019e, 0x019d, 0x019c, 0x019c, 0x019b, 0x019a, 0x019a, 0x0199,
|
||||
0x0198, 0x0198, 0x0197, 0x0197, 0x0196, 0x0195, 0x0195, 0x0194, 0x0193, 0x0193, 0x0192, 0x0192, 0x0191, 0x0190, 0x0190, 0x018f,
|
||||
0x018f, 0x018e, 0x018d, 0x018d, 0x018c, 0x018b, 0x018b, 0x018a, 0x018a, 0x0189, 0x0189, 0x0188, 0x0187, 0x0187, 0x0186, 0x0186,
|
||||
0x0185, 0x0184, 0x0184, 0x0183, 0x0183, 0x0182, 0x0182, 0x0181, 0x0180, 0x0180, 0x017f, 0x017f, 0x017e, 0x017e, 0x017d, 0x017d,
|
||||
0x017c, 0x017b, 0x017b, 0x017a, 0x017a, 0x0179, 0x0179, 0x0178, 0x0178, 0x0177, 0x0177, 0x0176, 0x0175, 0x0175, 0x0174, 0x0174,
|
||||
0x0173, 0x0173, 0x0172, 0x0172, 0x0171, 0x0171, 0x0170, 0x0170, 0x016f, 0x016f, 0x016e, 0x016e, 0x016d, 0x016d, 0x016c, 0x016c,
|
||||
0x016b, 0x016b, 0x016a, 0x016a, 0x0169, 0x0169, 0x0168, 0x0168, 0x0167, 0x0167, 0x0166, 0x0166, 0x0165, 0x0165, 0x0164, 0x0164,
|
||||
0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c,
|
||||
0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156
|
||||
};
|
||||
static const uint16_t DivTableNEON[255*3+1] = {
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x1c71, 0x1af2, 0x1999, 0x1861, 0x1745, 0x1642, 0x1555, 0x147a, 0x13b1, 0x12f6, 0x1249, 0x11a7, 0x1111, 0x1084, 0x1000,
|
||||
@@ -142,7 +190,6 @@ static const uint16_t DivTableNEON[255*3+1] = {
|
||||
0x00b1, 0x00b1, 0x00b1, 0x00b1, 0x00b0, 0x00b0, 0x00b0, 0x00b0, 0x00af, 0x00af, 0x00af, 0x00af, 0x00ae, 0x00ae, 0x00ae, 0x00ae,
|
||||
0x00ae, 0x00ad, 0x00ad, 0x00ad, 0x00ad, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ab, 0x00ab, 0x00ab, 0x00ab,
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
|
||||
@@ -175,12 +222,6 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
|
||||
return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
|
||||
}
|
||||
|
||||
__m128i amask = _mm_set1_epi32( 0xFFFFFF );
|
||||
px0 = _mm_and_si128( px0, amask );
|
||||
px1 = _mm_and_si128( px1, amask );
|
||||
px2 = _mm_and_si128( px2, amask );
|
||||
px3 = _mm_and_si128( px3, amask );
|
||||
|
||||
__m128i min0 = _mm_min_epu8( px0, px1 );
|
||||
__m128i min1 = _mm_min_epu8( px2, px3 );
|
||||
__m128i min2 = _mm_min_epu8( min0, min1 );
|
||||
@@ -412,20 +453,19 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
|
||||
return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
|
||||
# endif
|
||||
#else
|
||||
uint32_t ref;
|
||||
memcpy( &ref, src, 4 );
|
||||
uint32_t refMask = ref & 0xF8FCF8;
|
||||
const auto ref = to565( src[0], src[1], src[2] );
|
||||
auto stmp = src + 4;
|
||||
for( int i=1; i<16; i++ )
|
||||
{
|
||||
uint32_t px;
|
||||
memcpy( &px, stmp, 4 );
|
||||
if( ( px & 0xF8FCF8 ) != refMask ) break;
|
||||
if( to565( stmp[0], stmp[1], stmp[2] ) != ref )
|
||||
{
|
||||
break;
|
||||
}
|
||||
stmp += 4;
|
||||
}
|
||||
if( stmp == src + 64 )
|
||||
{
|
||||
return uint64_t( to565( ref ) ) << 16;
|
||||
return uint64_t( ref ) << 16;
|
||||
}
|
||||
|
||||
uint8_t min[3] = { src[0], src[1], src[2] };
|
||||
@@ -471,42 +511,6 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
|
||||
__m256i px2 = _mm256_loadu_si256(((__m256i*)src) + 2);
|
||||
__m256i px3 = _mm256_loadu_si256(((__m256i*)src) + 3);
|
||||
|
||||
__m256i smask = _mm256_set1_epi32( 0xF8FCF8 );
|
||||
__m256i sd0 = _mm256_and_si256( px0, smask );
|
||||
__m256i sd1 = _mm256_and_si256( px1, smask );
|
||||
__m256i sd2 = _mm256_and_si256( px2, smask );
|
||||
__m256i sd3 = _mm256_and_si256( px3, smask );
|
||||
|
||||
__m256i sc = _mm256_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
__m256i sc0 = _mm256_cmpeq_epi8( sd0, sc );
|
||||
__m256i sc1 = _mm256_cmpeq_epi8( sd1, sc );
|
||||
__m256i sc2 = _mm256_cmpeq_epi8( sd2, sc );
|
||||
__m256i sc3 = _mm256_cmpeq_epi8( sd3, sc );
|
||||
|
||||
__m256i sm0 = _mm256_and_si256( sc0, sc1 );
|
||||
__m256i sm1 = _mm256_and_si256( sc2, sc3 );
|
||||
__m256i sm = _mm256_and_si256( sm0, sm1 );
|
||||
|
||||
const int64_t solid0 = 1 - _mm_testc_si128( _mm256_castsi256_si128( sm ), _mm_set1_epi32( -1 ) );
|
||||
const int64_t solid1 = 1 - _mm_testc_si128( _mm256_extracti128_si256( sm, 1 ), _mm_set1_epi32( -1 ) );
|
||||
|
||||
if( solid0 + solid1 == 0 )
|
||||
{
|
||||
const auto c0 = uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
|
||||
const auto c1 = uint64_t( to565( src[16], src[17], src[18] ) ) << 16;
|
||||
memcpy( dst, &c0, 8 );
|
||||
memcpy( dst+8, &c1, 8 );
|
||||
dst += 16;
|
||||
return;
|
||||
}
|
||||
|
||||
__m256i amask = _mm256_set1_epi32( 0xFFFFFF );
|
||||
px0 = _mm256_and_si256( px0, amask );
|
||||
px1 = _mm256_and_si256( px1, amask );
|
||||
px2 = _mm256_and_si256( px2, amask );
|
||||
px3 = _mm256_and_si256( px3, amask );
|
||||
|
||||
__m256i min0 = _mm256_min_epu8( px0, px1 );
|
||||
__m256i min1 = _mm256_min_epu8( px2, px3 );
|
||||
__m256i min2 = _mm256_min_epu8( min0, min1 );
|
||||
@@ -528,8 +532,8 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
|
||||
__m256i range1 = _mm256_subs_epu8( rmax, rmin );
|
||||
__m256i range2 = _mm256_sad_epu8( rmax, rmin );
|
||||
|
||||
uint16_t vrange0 = DivTable[_mm256_cvtsi256_si32( range2 ) >> 1];
|
||||
uint16_t vrange1 = DivTable[_mm256_extract_epi16( range2, 8 ) >> 1];
|
||||
uint16_t vrange0 = DivTableAVX[_mm256_cvtsi256_si32( range2 ) >> 1];
|
||||
uint16_t vrange1 = DivTableAVX[_mm256_extract_epi16( range2, 8 ) >> 1];
|
||||
__m256i range00 = _mm256_set1_epi16( vrange0 );
|
||||
__m256i range = _mm256_inserti128_si256( range00, _mm_set1_epi16( vrange1 ), 1 );
|
||||
|
||||
@@ -573,11 +577,7 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
|
||||
|
||||
__m256i d0 = _mm256_unpacklo_epi32( mm5, p );
|
||||
__m256i d1 = _mm256_permute4x64_epi64( d0, _MM_SHUFFLE( 3, 2, 2, 0 ) );
|
||||
__m128i d2 = _mm256_castsi256_si128( d1 );
|
||||
|
||||
__m128i mask = _mm_set_epi64x( 0xFFFF0000 | -solid1, 0xFFFF0000 | -solid0 );
|
||||
__m128i d3 = _mm_and_si128( d2, mask );
|
||||
_mm_storeu_si128( (__m128i*)dst, d3 );
|
||||
_mm_storeu_si128( (__m128i*)dst, _mm256_castsi256_si128( d1 ) );
|
||||
dst += 16;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef __TRACYFASTVECTOR_HPP__
|
||||
#define __TRACYFASTVECTOR_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
@@ -22,7 +21,6 @@ public:
|
||||
, m_write( m_ptr )
|
||||
, m_end( m_ptr + capacity )
|
||||
{
|
||||
assert( capacity != 0 );
|
||||
}
|
||||
|
||||
FastVector( const FastVector& ) = delete;
|
||||
@@ -98,11 +96,11 @@ public:
|
||||
private:
|
||||
tracy_no_inline void AllocMore()
|
||||
{
|
||||
const auto cap = size_t( m_end - m_ptr ) * 2;
|
||||
const auto size = size_t( m_write - m_ptr );
|
||||
const auto cap = ( m_end - m_ptr ) * 2;
|
||||
const auto size = m_write - m_ptr;
|
||||
T* ptr = (T*)tracy_malloc( sizeof( T ) * cap );
|
||||
memcpy( ptr, m_ptr, size * sizeof( T ) );
|
||||
tracy_free_fast( m_ptr );
|
||||
tracy_free( m_ptr );
|
||||
m_ptr = ptr;
|
||||
m_write = m_ptr + size;
|
||||
m_end = m_ptr + cap;
|
||||
|
||||
20
client/TracyLfq.cpp
Normal file
20
client/TracyLfq.cpp
Normal file
@@ -0,0 +1,20 @@
|
||||
#include "TracyLfq.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
LfqBlock* LfqProducerImpl::NextBlock()
|
||||
{
|
||||
LfqBlock* blk = m_queue->GetFreeBlock();
|
||||
assert( blk );
|
||||
assert( blk->next.load( std::memory_order_relaxed ) == nullptr );
|
||||
blk->thread = m_thread;
|
||||
lfq.dataEnd = blk->dataEnd;
|
||||
lfq.tail = &blk->tail;
|
||||
LfqBlock* oldBlk = m_block.load( std::memory_order_relaxed );
|
||||
m_block.store( blk, std::memory_order_release );
|
||||
m_queue->ReleaseBlock( oldBlk );
|
||||
return blk;
|
||||
}
|
||||
|
||||
}
|
||||
482
client/TracyLfq.hpp
Normal file
482
client/TracyLfq.hpp
Normal file
@@ -0,0 +1,482 @@
|
||||
#ifndef __TRACYLFQ_HPP__
|
||||
#define __TRACYLFQ_HPP__
|
||||
|
||||
#include <atomic>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <thread>
|
||||
|
||||
#include "../common/TracyApi.h"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
#include "../common/TracyQueue.hpp"
|
||||
#include "../common/TracySystem.hpp"
|
||||
#include "../common/TracyYield.hpp"
|
||||
|
||||
|
||||
#define TracyLfqPrepare( type ) \
|
||||
char* __nextPtr; \
|
||||
QueueItem* item; \
|
||||
auto& __tail = LfqProducer::PrepareNext( item, __nextPtr, type );
|
||||
|
||||
#define TracyLfqCommit \
|
||||
LfqProducer::CommitNext( __tail, __nextPtr );
|
||||
|
||||
#define TracyLfqPrepareC( type ) \
|
||||
char* nextPtr; \
|
||||
tracy::QueueItem* item; \
|
||||
auto& tail = tracy::LfqProducer::PrepareNext( item, nextPtr, type );
|
||||
|
||||
#define TracyLfqCommitC \
|
||||
tracy::LfqProducer::CommitNext( tail, nextPtr );
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
|
||||
class LockFreeQueue;
|
||||
class LfqProducer;
|
||||
|
||||
TRACY_API LfqProducer& GetProducer();
|
||||
|
||||
|
||||
class LfqBlock
|
||||
{
|
||||
public:
|
||||
enum { BlockSize = 64*1024 };
|
||||
|
||||
tracy_force_inline LfqBlock()
|
||||
: head( nullptr )
|
||||
, tail( nullptr )
|
||||
, next( nullptr )
|
||||
, thread( 0 )
|
||||
{
|
||||
dataEnd = data + BlockSize;
|
||||
head.store( data, std::memory_order_relaxed );
|
||||
tail.store( data, std::memory_order_release );
|
||||
}
|
||||
|
||||
tracy_force_inline void Reset()
|
||||
{
|
||||
head.store( data, std::memory_order_relaxed );
|
||||
tail.store( data, std::memory_order_release );
|
||||
}
|
||||
|
||||
LfqBlock( const LfqBlock& ) = delete;
|
||||
LfqBlock( LfqBlock&& ) = delete;
|
||||
|
||||
LfqBlock& operator=( const LfqBlock& ) = delete;
|
||||
LfqBlock& operator=( LfqBlock&& ) = delete;
|
||||
|
||||
alignas(64) std::atomic<char*> head;
|
||||
alignas(64) std::atomic<char*> tail;
|
||||
alignas(64) std::atomic<LfqBlock*> next;
|
||||
alignas(64) const char* dataEnd;
|
||||
uint64_t thread;
|
||||
char data[BlockSize];
|
||||
};
|
||||
|
||||
|
||||
struct LfqData
|
||||
{
|
||||
const char* dataEnd;
|
||||
std::atomic<char*>* tail;
|
||||
};
|
||||
|
||||
extern thread_local LfqData lfq;
|
||||
|
||||
|
||||
class LfqProducerImpl
|
||||
{
|
||||
public:
|
||||
tracy_force_inline LfqProducerImpl( LockFreeQueue* queue )
|
||||
: m_block( nullptr )
|
||||
, m_active( false )
|
||||
, m_available( true )
|
||||
, m_queue( queue )
|
||||
{
|
||||
assert( m_queue );
|
||||
}
|
||||
|
||||
tracy_force_inline void PrepareThread();
|
||||
tracy_force_inline void CleanupThread();
|
||||
|
||||
tracy_force_inline std::atomic<char*>& PrepareNext( char*& ptr, char*& nextPtr, size_t sz )
|
||||
{
|
||||
auto blk = NextBlock();
|
||||
auto& tail = blk->tail;
|
||||
ptr = tail.load( std::memory_order_relaxed );
|
||||
nextPtr = ptr + sz;
|
||||
return tail;
|
||||
}
|
||||
|
||||
tracy_no_inline LfqBlock* NextBlock();
|
||||
|
||||
inline void FlushDataImpl();
|
||||
|
||||
alignas(64) std::atomic<LfqProducerImpl*> m_next;
|
||||
alignas(64) std::atomic<bool> m_active;
|
||||
alignas(64) std::atomic<bool> m_available;
|
||||
alignas(64) std::atomic<LfqBlock*> m_block;
|
||||
|
||||
|
||||
LfqProducerImpl( const LfqProducerImpl& ) = delete;
|
||||
LfqProducerImpl( LfqProducerImpl&& ) = delete;
|
||||
|
||||
LfqProducerImpl& operator=( const LfqProducerImpl& ) = delete;
|
||||
LfqProducerImpl& operator=( LfqProducerImpl&& ) = delete;
|
||||
|
||||
private:
|
||||
uint64_t m_thread;
|
||||
LockFreeQueue* m_queue;
|
||||
};
|
||||
|
||||
|
||||
class LfqProducer
|
||||
{
|
||||
public:
|
||||
inline LfqProducer( LockFreeQueue& queue );
|
||||
inline ~LfqProducer();
|
||||
|
||||
inline LfqProducer& operator=( LfqProducer&& ) noexcept;
|
||||
|
||||
static tracy_force_inline std::atomic<char*>& PrepareNext( QueueItem*& item, char*& nextPtr, QueueType type )
|
||||
{
|
||||
char* ptr;
|
||||
auto& ret = PrepareNext( ptr, nextPtr, QueueDataSize[(uint8_t)type] );
|
||||
item = (QueueItem*)ptr;
|
||||
MemWrite( &item->hdr.type, type );
|
||||
return ret;
|
||||
}
|
||||
|
||||
static tracy_force_inline std::atomic<char*>& PrepareNext( char*& ptr, char*& nextPtr, size_t sz )
|
||||
{
|
||||
auto& tail = *lfq.tail;
|
||||
ptr = tail.load( std::memory_order_relaxed );
|
||||
auto np = ptr + sz;
|
||||
if( np <= lfq.dataEnd )
|
||||
{
|
||||
nextPtr = np;
|
||||
return tail;
|
||||
}
|
||||
else
|
||||
{
|
||||
return GetProducer().m_prod->PrepareNext( ptr, nextPtr, sz );
|
||||
}
|
||||
}
|
||||
|
||||
static tracy_force_inline void CommitNext( std::atomic<char*>& tail, char* nextPtr )
|
||||
{
|
||||
tail.store( nextPtr, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void FlushData()
|
||||
{
|
||||
GetProducer().m_prod->FlushDataImpl();
|
||||
}
|
||||
|
||||
|
||||
LfqProducer( const LfqProducer& ) = delete;
|
||||
LfqProducer( LfqProducer&& ) = delete;
|
||||
|
||||
LfqProducer& operator=( const LfqProducer& ) = delete;
|
||||
|
||||
private:
|
||||
LfqProducerImpl* m_prod;
|
||||
LockFreeQueue* m_queue;
|
||||
};
|
||||
|
||||
|
||||
class LockFreeQueue
|
||||
{
|
||||
public:
|
||||
LockFreeQueue()
|
||||
: m_freeBlocks( nullptr )
|
||||
, m_blocksHead( nullptr )
|
||||
, m_blocksTail( nullptr )
|
||||
, m_producers( nullptr )
|
||||
, m_currentProducer( nullptr )
|
||||
{
|
||||
const auto numCpus = std::thread::hardware_concurrency();
|
||||
|
||||
LfqBlock* prev = nullptr;
|
||||
for( unsigned int i=0; i<numCpus; i++ )
|
||||
{
|
||||
auto blk = AllocNewBlock();
|
||||
blk->next.store( prev, std::memory_order_relaxed );
|
||||
prev = blk;
|
||||
}
|
||||
m_freeBlocks.store( prev, std::memory_order_release );
|
||||
|
||||
LfqProducerImpl* prevProd = nullptr;
|
||||
for( unsigned int i=0; i<numCpus; i++ )
|
||||
{
|
||||
auto prod = AllocNewProducer();
|
||||
prod->m_next.store( prevProd, std::memory_order_relaxed );
|
||||
prevProd = prod;
|
||||
}
|
||||
m_producers.store( prevProd, std::memory_order_release );
|
||||
}
|
||||
|
||||
// Don't free anything, application is shutting down anyway
|
||||
~LockFreeQueue()
|
||||
{
|
||||
}
|
||||
|
||||
LfqBlock* GetFreeBlock()
|
||||
{
|
||||
LfqBlock* ptr = m_freeBlocks.load( std::memory_order_acquire );
|
||||
for(;;)
|
||||
{
|
||||
if( !ptr ) return AllocNewBlock();
|
||||
auto next = ptr->next.load( std::memory_order_acquire );
|
||||
if( m_freeBlocks.compare_exchange_strong( ptr, next, std::memory_order_release, std::memory_order_relaxed ) )
|
||||
{
|
||||
ptr->next.store( nullptr, std::memory_order_relaxed );
|
||||
ptr->Reset();
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ReleaseBlock( LfqBlock* blk )
|
||||
{
|
||||
assert( blk );
|
||||
assert( blk->next.load( std::memory_order_relaxed ) == nullptr );
|
||||
auto tail = m_blocksTail.load( std::memory_order_acquire );
|
||||
for(;;)
|
||||
{
|
||||
if( !tail )
|
||||
{
|
||||
auto head = m_blocksHead.load( std::memory_order_acquire );
|
||||
if( !head )
|
||||
{
|
||||
if( m_blocksHead.compare_exchange_strong( head, blk, std::memory_order_release, std::memory_order_relaxed ) )
|
||||
{
|
||||
assert( m_blocksTail.load( std::memory_order_relaxed ) == nullptr );
|
||||
m_blocksTail.store( blk, std::memory_order_release );
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto next = tail->next.load( std::memory_order_acquire );
|
||||
if( !next )
|
||||
{
|
||||
if( tail->next.compare_exchange_strong( next, blk, std::memory_order_release, std::memory_order_relaxed ) )
|
||||
{
|
||||
m_blocksTail.store( blk, std::memory_order_release );
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FreeBlock( LfqBlock* blk )
|
||||
{
|
||||
assert( blk );
|
||||
auto head = m_freeBlocks.load( std::memory_order_relaxed );
|
||||
blk->next.store( head, std::memory_order_relaxed );
|
||||
while( !m_freeBlocks.compare_exchange_weak( head, blk, std::memory_order_release, std::memory_order_relaxed ) ) { blk->next.store( head, std::memory_order_relaxed ); YieldThread(); }
|
||||
}
|
||||
|
||||
LfqProducerImpl* GetIdleProducer()
|
||||
{
|
||||
LfqProducerImpl* prod = m_producers.load( std::memory_order_acquire );
|
||||
assert( prod );
|
||||
for(;;)
|
||||
{
|
||||
bool available = prod->m_available.load( std::memory_order_acquire );
|
||||
if( available )
|
||||
{
|
||||
if( prod->m_available.compare_exchange_strong( available, false, std::memory_order_release, std::memory_order_relaxed ) ) return prod;
|
||||
}
|
||||
prod = prod->m_next.load( std::memory_order_acquire );
|
||||
if( !prod )
|
||||
{
|
||||
prod = AllocNewProducer();
|
||||
prod->m_available.store( false, std::memory_order_release );
|
||||
auto head = m_producers.load( std::memory_order_relaxed );
|
||||
prod->m_next.store( head, std::memory_order_relaxed );
|
||||
while( !m_producers.compare_exchange_weak( head, prod, std::memory_order_release, std::memory_order_relaxed ) ) { prod->m_next.store( head, std::memory_order_relaxed ); YieldThread(); }
|
||||
return prod;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ReleaseProducer( LfqProducerImpl* prod )
|
||||
{
|
||||
assert( prod->m_available.load( std::memory_order_relaxed ) == false );
|
||||
prod->m_available.store( true, std::memory_order_release );
|
||||
}
|
||||
|
||||
size_t Dequeue( char* ptr, size_t sz, uint64_t& thread )
|
||||
{
|
||||
{
|
||||
auto blk = m_blocksHead.load( std::memory_order_acquire );
|
||||
if( blk != nullptr )
|
||||
{
|
||||
auto next = blk->next.load( std::memory_order_acquire );
|
||||
if( m_blocksHead.compare_exchange_strong( blk, next, std::memory_order_release, std::memory_order_relaxed ) )
|
||||
{
|
||||
if( next == nullptr )
|
||||
{
|
||||
m_blocksTail.store( nullptr, std::memory_order_release );
|
||||
}
|
||||
auto head = blk->head.load( std::memory_order_relaxed );
|
||||
auto tail = blk->tail.load( std::memory_order_acquire );
|
||||
const auto datasz = tail - head;
|
||||
if( datasz > 0 )
|
||||
{
|
||||
thread = blk->thread;
|
||||
memcpy( ptr, head, datasz );
|
||||
FreeBlock( blk );
|
||||
return datasz;
|
||||
}
|
||||
FreeBlock( blk );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
LfqBlock* blk = nullptr;
|
||||
char* head;
|
||||
char* tail;
|
||||
auto prod = m_currentProducer;
|
||||
if( !prod ) prod = m_producers.load( std::memory_order_acquire );
|
||||
while( prod )
|
||||
{
|
||||
if( prod->m_active.load( std::memory_order_acquire ) == true )
|
||||
{
|
||||
blk = prod->m_block.load( std::memory_order_acquire );
|
||||
head = blk->head.load( std::memory_order_relaxed );
|
||||
tail = blk->tail.load( std::memory_order_acquire );
|
||||
if( tail - head != 0 )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
prod = prod->m_next.load( std::memory_order_acquire );
|
||||
}
|
||||
m_currentProducer = prod;
|
||||
|
||||
if( prod )
|
||||
{
|
||||
const auto datasz = tail - head;
|
||||
assert( datasz != 0 );
|
||||
thread = blk->thread;
|
||||
memcpy( ptr, head, datasz );
|
||||
blk->head.store( tail, std::memory_order_release );
|
||||
return datasz;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
LockFreeQueue( const LockFreeQueue& ) = delete;
|
||||
LockFreeQueue( LockFreeQueue&& ) = delete;
|
||||
|
||||
LockFreeQueue& operator=( const LockFreeQueue& ) = delete;
|
||||
LockFreeQueue& operator=( LockFreeQueue&& ) = delete;
|
||||
|
||||
private:
|
||||
LfqBlock* AllocNewBlock()
|
||||
{
|
||||
auto blk = (LfqBlock*)tracy_malloc( sizeof( LfqBlock ) );
|
||||
new(blk) LfqBlock();
|
||||
return blk;
|
||||
}
|
||||
|
||||
LfqProducerImpl* AllocNewProducer()
|
||||
{
|
||||
auto prod = (LfqProducerImpl*)tracy_malloc( sizeof( LfqProducerImpl ) );
|
||||
new(prod) LfqProducerImpl( this );
|
||||
return prod;
|
||||
}
|
||||
|
||||
alignas(64) std::atomic<LfqBlock*> m_freeBlocks;
|
||||
alignas(64) std::atomic<LfqBlock*> m_blocksHead;
|
||||
alignas(64) std::atomic<LfqBlock*> m_blocksTail;
|
||||
alignas(64) std::atomic<LfqProducerImpl*> m_producers;
|
||||
alignas(64) LfqProducerImpl* m_currentProducer;
|
||||
};
|
||||
|
||||
|
||||
inline LfqProducer::LfqProducer( LockFreeQueue& queue )
|
||||
: m_prod( queue.GetIdleProducer() )
|
||||
, m_queue( &queue )
|
||||
{
|
||||
assert( m_queue );
|
||||
m_prod->PrepareThread();
|
||||
assert( m_prod->m_active.load( std::memory_order_relaxed ) == false );
|
||||
m_prod->m_active.store( true, std::memory_order_release );
|
||||
}
|
||||
|
||||
inline LfqProducer::~LfqProducer()
|
||||
{
|
||||
if( m_prod )
|
||||
{
|
||||
assert( m_prod->m_active.load( std::memory_order_relaxed ) == true );
|
||||
m_prod->m_active.store( false, std::memory_order_release );
|
||||
m_prod->CleanupThread();
|
||||
m_queue->ReleaseProducer( m_prod );
|
||||
}
|
||||
}
|
||||
|
||||
inline LfqProducer& LfqProducer::operator=( LfqProducer&& other ) noexcept
|
||||
{
|
||||
m_prod = other.m_prod;
|
||||
m_queue = other.m_queue;
|
||||
|
||||
other.m_prod = nullptr;
|
||||
other.m_queue = nullptr;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
tracy_force_inline void LfqProducerImpl::PrepareThread()
|
||||
{
|
||||
m_thread = detail::GetThreadHandleImpl();
|
||||
auto blk = m_queue->GetFreeBlock();
|
||||
assert( blk );
|
||||
assert( blk->next.load( std::memory_order_relaxed ) == nullptr );
|
||||
blk->thread = m_thread;
|
||||
lfq.dataEnd = blk->dataEnd;
|
||||
lfq.tail = &blk->tail;
|
||||
m_block.store( blk, std::memory_order_release );
|
||||
}
|
||||
|
||||
tracy_force_inline void LfqProducerImpl::CleanupThread()
|
||||
{
|
||||
auto blk = m_block.load( std::memory_order_relaxed );
|
||||
assert( blk );
|
||||
while( !m_block.compare_exchange_weak( blk, nullptr, std::memory_order_release, std::memory_order_relaxed ) ) { YieldThread(); }
|
||||
auto head = blk->head.load( std::memory_order_relaxed );
|
||||
auto tail = blk->tail.load( std::memory_order_acquire );
|
||||
if( head == tail )
|
||||
{
|
||||
m_queue->FreeBlock( blk );
|
||||
}
|
||||
else
|
||||
{
|
||||
m_queue->ReleaseBlock( blk );
|
||||
}
|
||||
}
|
||||
|
||||
void LfqProducerImpl::FlushDataImpl()
|
||||
{
|
||||
LfqBlock* blk = m_block.load( std::memory_order_acquire );
|
||||
m_block.store( nullptr, std::memory_order_release );
|
||||
if( blk ) m_queue->FreeBlock( blk );
|
||||
PrepareThread();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -23,8 +23,7 @@ public:
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
|
||||
TracyLfqPrepare( QueueType::LockAnnounce );
|
||||
MemWrite( &item->lockAnnounce.id, m_id );
|
||||
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
|
||||
@@ -32,7 +31,7 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
LockableCtx( const LockableCtx& ) = delete;
|
||||
@@ -40,14 +39,14 @@ public:
|
||||
|
||||
tracy_force_inline ~LockableCtx()
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockTerminate );
|
||||
TracyLfqPrepare( QueueType::LockTerminate );
|
||||
MemWrite( &item->lockTerminate.id, m_id );
|
||||
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockTerminate.type, LockType::Lockable );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline bool BeforeLock()
|
||||
@@ -70,6 +69,7 @@ public:
|
||||
MemWrite( &item->lockWait.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockWait.id, m_id );
|
||||
MemWrite( &item->lockWait.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockWait.type, LockType::Lockable );
|
||||
Profiler::QueueSerialFinish();
|
||||
return true;
|
||||
}
|
||||
@@ -153,22 +153,6 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, name, size );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockName );
|
||||
MemWrite( &item->lockNameFat.id, m_id );
|
||||
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
|
||||
MemWrite( &item->lockNameFat.size, (uint16_t)size );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_id;
|
||||
|
||||
@@ -215,11 +199,6 @@ public:
|
||||
m_ctx.Mark( srcloc );
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
LockableCtx m_ctx;
|
||||
@@ -238,16 +217,17 @@ public:
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
|
||||
TracyLfqPrepare( QueueType::LockAnnounce );
|
||||
MemWrite( &item->lockAnnounce.id, m_id );
|
||||
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
|
||||
MemWrite( &item->lockAnnounce.type, LockType::SharedLockable );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
SharedLockableCtx( const SharedLockableCtx& ) = delete;
|
||||
@@ -255,14 +235,16 @@ public:
|
||||
|
||||
tracy_force_inline ~SharedLockableCtx()
|
||||
{
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockTerminate );
|
||||
TracyLfqPrepare( QueueType::LockTerminate );
|
||||
MemWrite( &item->lockTerminate.id, m_id );
|
||||
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockTerminate.type, LockType::SharedLockable );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline bool BeforeLock()
|
||||
@@ -285,6 +267,7 @@ public:
|
||||
MemWrite( &item->lockWait.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockWait.id, m_id );
|
||||
MemWrite( &item->lockWait.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockWait.type, LockType::SharedLockable );
|
||||
Profiler::QueueSerialFinish();
|
||||
return true;
|
||||
}
|
||||
@@ -367,6 +350,7 @@ public:
|
||||
MemWrite( &item->lockWait.thread, GetThreadHandle() );
|
||||
MemWrite( &item->lockWait.id, m_id );
|
||||
MemWrite( &item->lockWait.time, Profiler::GetTime() );
|
||||
MemWrite( &item->lockWait.type, LockType::SharedLockable );
|
||||
Profiler::QueueSerialFinish();
|
||||
return true;
|
||||
}
|
||||
@@ -450,22 +434,6 @@ public:
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
memcpy( ptr, name, size );
|
||||
auto item = Profiler::QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::LockName );
|
||||
MemWrite( &item->lockNameFat.id, m_id );
|
||||
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
|
||||
MemWrite( &item->lockNameFat.size, (uint16_t)size );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
Profiler::QueueSerialFinish();
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_id;
|
||||
|
||||
@@ -532,17 +500,12 @@ public:
|
||||
m_ctx.Mark( srcloc );
|
||||
}
|
||||
|
||||
tracy_force_inline void CustomName( const char* name, size_t size )
|
||||
{
|
||||
m_ctx.CustomName( name, size );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
SharedLockableCtx m_ctx;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,20 +5,17 @@
|
||||
#include <atomic>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "tracy_concurrentqueue.h"
|
||||
#include "tracy_SPSCQueue.h"
|
||||
#include "TracyCallstack.hpp"
|
||||
#include "TracySysTime.hpp"
|
||||
#include "TracyFastVector.hpp"
|
||||
#include "TracyLfq.hpp"
|
||||
#include "../common/TracyQueue.hpp"
|
||||
#include "../common/TracyAlign.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracyMutex.hpp"
|
||||
#include "../common/TracyProtocol.hpp"
|
||||
|
||||
#if defined _WIN32
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
#ifdef __APPLE__
|
||||
@@ -26,16 +23,12 @@
|
||||
# include <mach/mach_time.h>
|
||||
#endif
|
||||
|
||||
#if ( defined _WIN32 || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) )
|
||||
#if defined _WIN32 || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ ) || __ARM_ARCH >= 6
|
||||
# define TRACY_HW_TIMER
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
# include <signal.h>
|
||||
#endif
|
||||
|
||||
#if defined TRACY_TIMER_FALLBACK || !defined TRACY_HW_TIMER
|
||||
# include <chrono>
|
||||
#if !defined TRACY_HW_TIMER || ( __ARM_ARCH >= 6 && !defined CLOCK_MONOTONIC_RAW )
|
||||
#include <chrono>
|
||||
#endif
|
||||
|
||||
#ifndef TracyConcat
|
||||
@@ -47,10 +40,6 @@
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
#if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME)
|
||||
TRACY_API void StartupProfiler();
|
||||
TRACY_API void ShutdownProfiler();
|
||||
#endif
|
||||
|
||||
class GpuCtx;
|
||||
class Profiler;
|
||||
@@ -62,32 +51,14 @@ struct GpuCtxWrapper
|
||||
GpuCtx* ptr;
|
||||
};
|
||||
|
||||
TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken();
|
||||
TRACY_API LfqProducer& GetProducer();
|
||||
TRACY_API Profiler& GetProfiler();
|
||||
TRACY_API std::atomic<uint32_t>& GetLockCounter();
|
||||
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
|
||||
TRACY_API GpuCtxWrapper& GetGpuCtx();
|
||||
TRACY_API uint32_t GetThreadHandle();
|
||||
TRACY_API bool ProfilerAvailable();
|
||||
TRACY_API bool ProfilerAllocatorAvailable();
|
||||
TRACY_API int64_t GetFrequencyQpc();
|
||||
|
||||
#if defined TRACY_TIMER_FALLBACK && defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
|
||||
TRACY_API bool HardwareSupportsInvariantTSC(); // check, if we need fallback scenario
|
||||
#else
|
||||
# if defined TRACY_HW_TIMER
|
||||
tracy_force_inline bool HardwareSupportsInvariantTSC()
|
||||
{
|
||||
return true; // this is checked at startup
|
||||
}
|
||||
# else
|
||||
tracy_force_inline bool HardwareSupportsInvariantTSC()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
TRACY_API uint64_t GetThreadHandle();
|
||||
|
||||
TRACY_API void InitRPMallocThread();
|
||||
|
||||
struct SourceLocationData
|
||||
{
|
||||
@@ -107,48 +78,6 @@ struct LuaZoneState
|
||||
#endif
|
||||
|
||||
|
||||
#define TracyLfqPrepare( _type ) \
|
||||
moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = GetToken(); \
|
||||
auto& __tail = __token->get_tail_index(); \
|
||||
auto item = __token->enqueue_begin( __magic ); \
|
||||
MemWrite( &item->hdr.type, _type );
|
||||
|
||||
#define TracyLfqCommit \
|
||||
__tail.store( __magic + 1, std::memory_order_release );
|
||||
|
||||
#define TracyLfqPrepareC( _type ) \
|
||||
tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \
|
||||
auto __token = tracy::GetToken(); \
|
||||
auto& __tail = __token->get_tail_index(); \
|
||||
auto item = __token->enqueue_begin( __magic ); \
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
|
||||
#define TracyLfqCommitC \
|
||||
__tail.store( __magic + 1, std::memory_order_release );
|
||||
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
# define TracyQueuePrepare( _type ) \
|
||||
auto item = Profiler::QueueSerial(); \
|
||||
MemWrite( &item->hdr.type, _type );
|
||||
# define TracyQueueCommit( _name ) \
|
||||
MemWrite( &item->_name.thread, GetThreadHandle() ); \
|
||||
Profiler::QueueSerialFinish();
|
||||
# define TracyQueuePrepareC( _type ) \
|
||||
auto item = tracy::Profiler::QueueSerial(); \
|
||||
tracy::MemWrite( &item->hdr.type, _type );
|
||||
# define TracyQueueCommitC( _name ) \
|
||||
tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \
|
||||
tracy::Profiler::QueueSerialFinish();
|
||||
#else
|
||||
# define TracyQueuePrepare( _type ) TracyLfqPrepare( _type )
|
||||
# define TracyQueueCommit( _name ) TracyLfqCommit
|
||||
# define TracyQueuePrepareC( _type ) TracyLfqPrepareC( _type )
|
||||
# define TracyQueueCommitC( _name ) TracyLfqCommitC
|
||||
#endif
|
||||
|
||||
|
||||
typedef void(*ParameterCallback)( uint32_t idx, int32_t val );
|
||||
|
||||
class Profiler
|
||||
@@ -156,76 +85,43 @@ class Profiler
|
||||
struct FrameImageQueueItem
|
||||
{
|
||||
void* image;
|
||||
uint32_t frame;
|
||||
uint64_t frame;
|
||||
uint16_t w;
|
||||
uint16_t h;
|
||||
uint8_t offset;
|
||||
bool flip;
|
||||
};
|
||||
|
||||
enum class SymbolQueueItemType
|
||||
{
|
||||
CallstackFrame,
|
||||
SymbolQuery,
|
||||
CodeLocation,
|
||||
ExternalName,
|
||||
KernelCode
|
||||
};
|
||||
|
||||
struct SymbolQueueItem
|
||||
{
|
||||
SymbolQueueItemType type;
|
||||
uint64_t ptr;
|
||||
uint32_t extra;
|
||||
};
|
||||
|
||||
public:
|
||||
Profiler();
|
||||
~Profiler();
|
||||
|
||||
void SpawnWorkerThreads();
|
||||
|
||||
static tracy_force_inline int64_t GetTime()
|
||||
{
|
||||
#ifdef TRACY_HW_TIMER
|
||||
# if defined TARGET_OS_IOS && TARGET_OS_IOS == 1
|
||||
if( HardwareSupportsInvariantTSC() ) return mach_absolute_time();
|
||||
# elif defined _WIN32
|
||||
# ifdef TRACY_TIMER_QPC
|
||||
return GetTimeQpc();
|
||||
# else
|
||||
if( HardwareSupportsInvariantTSC() ) return int64_t( __rdtsc() );
|
||||
# endif
|
||||
# elif defined __i386 || defined _M_IX86
|
||||
if( HardwareSupportsInvariantTSC() )
|
||||
{
|
||||
uint32_t eax, edx;
|
||||
asm volatile ( "rdtsc" : "=a" (eax), "=d" (edx) );
|
||||
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
|
||||
}
|
||||
# elif defined __x86_64__ || defined _M_X64
|
||||
if( HardwareSupportsInvariantTSC() )
|
||||
{
|
||||
uint64_t rax, rdx;
|
||||
asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) );
|
||||
return (int64_t)(( rdx << 32 ) + rax);
|
||||
}
|
||||
# else
|
||||
# error "TRACY_HW_TIMER detection logic needs fixing"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if !defined TRACY_HW_TIMER || defined TRACY_TIMER_FALLBACK
|
||||
# if defined __linux__ && defined CLOCK_MONOTONIC_RAW
|
||||
# if TARGET_OS_IOS == 1
|
||||
return mach_absolute_time();
|
||||
# elif __ARM_ARCH >= 6
|
||||
# ifdef CLOCK_MONOTONIC_RAW
|
||||
struct timespec ts;
|
||||
clock_gettime( CLOCK_MONOTONIC_RAW, &ts );
|
||||
return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec );
|
||||
# else
|
||||
# else
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
||||
# endif
|
||||
# elif defined _WIN32 || defined __CYGWIN__
|
||||
return int64_t( __rdtsc() );
|
||||
# elif defined __i386 || defined _M_IX86
|
||||
uint32_t eax, edx;
|
||||
asm volatile ( "rdtsc" : "=a" (eax), "=d" (edx) );
|
||||
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
|
||||
# elif defined __x86_64__ || defined _M_X64
|
||||
uint64_t rax, rdx;
|
||||
asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) );
|
||||
return ( rdx << 32 ) + rax;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if !defined TRACY_TIMER_FALLBACK
|
||||
return 0; // unreachable branch
|
||||
#else
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -241,14 +137,6 @@ public:
|
||||
return p.m_serialQueue.prepare_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline QueueItem* QueueSerialCallstack( void* ptr )
|
||||
{
|
||||
auto& p = GetProfiler();
|
||||
p.m_serialLock.lock();
|
||||
p.SendCallstackSerial( ptr );
|
||||
return p.m_serialQueue.prepare_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void QueueSerialFinish()
|
||||
{
|
||||
auto& p = GetProfiler();
|
||||
@@ -262,11 +150,10 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
auto item = QueueSerial();
|
||||
MemWrite( &item->hdr.type, QueueType::FrameMarkMsg );
|
||||
TracyLfqPrepare( QueueType::FrameMarkMsg );
|
||||
MemWrite( &item->frameMark.time, GetTime() );
|
||||
MemWrite( &item->frameMark.name, uint64_t( name ) );
|
||||
QueueSerialFinish();
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendFrameMark( const char* name, QueueType type )
|
||||
@@ -284,12 +171,10 @@ public:
|
||||
|
||||
static tracy_force_inline void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip )
|
||||
{
|
||||
#ifndef TRACY_NO_FRAME_IMAGE
|
||||
auto& profiler = GetProfiler();
|
||||
assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < std::numeric_limits<uint32_t>::max() );
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
#endif
|
||||
const auto sz = size_t( w ) * size_t( h ) * 4;
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, image, sz );
|
||||
@@ -297,13 +182,12 @@ public:
|
||||
profiler.m_fiLock.lock();
|
||||
auto fi = profiler.m_fiQueue.prepare_next();
|
||||
fi->image = ptr;
|
||||
fi->frame = uint32_t( profiler.m_frameCount.load( std::memory_order_relaxed ) - offset );
|
||||
fi->frame = profiler.m_frameCount.load( std::memory_order_relaxed ) - offset;
|
||||
fi->w = w;
|
||||
fi->h = h;
|
||||
fi->flip = flip;
|
||||
profiler.m_fiQueue.commit_next();
|
||||
profiler.m_fiLock.unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void PlotData( const char* name, int64_t val )
|
||||
@@ -360,23 +244,18 @@ public:
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
|
||||
MemWrite( &item->message.time, GetTime() );
|
||||
MemWrite( &item->message.text, (uint64_t)ptr );
|
||||
TracyLfqCommit;
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
|
||||
MemWrite( &item->messageFat.time, GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( messageFatThread );
|
||||
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, int callstack )
|
||||
@@ -384,39 +263,31 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
|
||||
MemWrite( &item->message.time, GetTime() );
|
||||
MemWrite( &item->message.text, (uint64_t)txt );
|
||||
TracyLfqCommit;
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
|
||||
MemWrite( &item->messageLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageLiteral.text, (uint64_t)txt );
|
||||
TracyQueueCommit( messageLiteralThread );
|
||||
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
|
||||
MemWrite( &item->messageColor.time, GetTime() );
|
||||
MemWrite( &item->messageColor.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
TracyLfqCommit;
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
|
||||
MemWrite( &item->messageColorFat.time, GetTime() );
|
||||
MemWrite( &item->messageColorFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageColorFat.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorFat.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorFat.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( messageColorFatThread );
|
||||
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack )
|
||||
@@ -424,29 +295,25 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
if( callstack != 0 )
|
||||
{
|
||||
tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
|
||||
MemWrite( &item->messageColor.time, GetTime() );
|
||||
MemWrite( &item->messageColor.text, (uint64_t)txt );
|
||||
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
TracyLfqCommit;
|
||||
|
||||
TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
|
||||
MemWrite( &item->messageColorLiteral.time, GetTime() );
|
||||
MemWrite( &item->messageColorLiteral.text, (uint64_t)txt );
|
||||
MemWrite( &item->messageColorLiteral.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorLiteral.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->messageColorLiteral.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
TracyQueueCommit( messageColorLiteralThread );
|
||||
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
|
||||
}
|
||||
|
||||
static tracy_force_inline void MessageAppInfo( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
TracyLfqPrepare( QueueType::MessageAppInfo );
|
||||
MemWrite( &item->messageFat.time, GetTime() );
|
||||
MemWrite( &item->messageFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->messageFat.size, (uint16_t)size );
|
||||
MemWrite( &item->message.time, GetTime() );
|
||||
MemWrite( &item->message.text, (uint64_t)ptr );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
@@ -455,9 +322,8 @@ public:
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAlloc( const void* ptr, size_t size, bool secure )
|
||||
static tracy_force_inline void MemAlloc( const void* ptr, size_t size )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
@@ -468,9 +334,8 @@ public:
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFree( const void* ptr, bool secure )
|
||||
static tracy_force_inline void MemFree( const void* ptr )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
@@ -481,9 +346,8 @@ public:
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure )
|
||||
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
@@ -491,26 +355,20 @@ public:
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
rpmalloc_thread_initialize();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
|
||||
SendCallstackMemory( callstack );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemAlloc( ptr, size, secure );
|
||||
MemAlloc( ptr, size );
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure )
|
||||
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
if( !ProfilerAllocatorAvailable() )
|
||||
{
|
||||
MemFree( ptr, secure );
|
||||
return;
|
||||
}
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
@@ -518,91 +376,15 @@ public:
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
rpmalloc_thread_initialize();
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
|
||||
SendCallstackMemory( callstack );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
MemFree( ptr, secure );
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendMemName( name );
|
||||
SendMemAlloc( QueueType::MemAllocNamed, thread, ptr, size );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeNamed( const void* ptr, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( !GetProfiler().IsConnected() ) return;
|
||||
#endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
GetProfiler().m_serialLock.lock();
|
||||
SendMemName( name );
|
||||
SendMemFree( QueueType::MemFreeNamed, thread, ptr );
|
||||
GetProfiler().m_serialLock.unlock();
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
static_cast<void>(name); // unused
|
||||
MemAlloc( ptr, size, secure );
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name )
|
||||
{
|
||||
if( secure && !ProfilerAvailable() ) return;
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto& profiler = GetProfiler();
|
||||
# ifdef TRACY_ON_DEMAND
|
||||
if( !profiler.IsConnected() ) return;
|
||||
# endif
|
||||
const auto thread = GetThreadHandle();
|
||||
|
||||
auto callstack = Callstack( depth );
|
||||
|
||||
profiler.m_serialLock.lock();
|
||||
SendCallstackSerial( callstack );
|
||||
SendMemName( name );
|
||||
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
|
||||
profiler.m_serialLock.unlock();
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
static_cast<void>(name); // unused
|
||||
MemFree( ptr, secure );
|
||||
MemFree( ptr );
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -610,58 +392,26 @@ public:
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto ptr = Callstack( depth );
|
||||
TracyQueuePrepare( QueueType::Callstack );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
TracyQueueCommit( callstackFatThread );
|
||||
#else
|
||||
static_cast<void>(depth); // unused
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void ParameterRegister( ParameterCallback cb ) { GetProfiler().m_paramCallback = cb; }
|
||||
static tracy_force_inline void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val )
|
||||
{
|
||||
TracyLfqPrepare( QueueType::ParamSetup );
|
||||
tracy::MemWrite( &item->paramSetup.idx, idx );
|
||||
tracy::MemWrite( &item->paramSetup.name, (uint64_t)name );
|
||||
tracy::MemWrite( &item->paramSetup.isBool, (uint8_t)isBool );
|
||||
tracy::MemWrite( &item->paramSetup.val, val );
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
GetProfiler().DeferItem( *item );
|
||||
#endif
|
||||
|
||||
TracyLfqPrepare( QueueType::Callstack );
|
||||
MemWrite( &item->callstack.ptr, ptr );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
#ifdef TRACY_FIBERS
|
||||
static tracy_force_inline void EnterFiber( const char* fiber )
|
||||
{
|
||||
TracyQueuePrepare( QueueType::FiberEnter );
|
||||
MemWrite( &item->fiberEnter.time, GetTime() );
|
||||
MemWrite( &item->fiberEnter.fiber, (uint64_t)fiber );
|
||||
TracyQueueCommit( fiberEnter );
|
||||
}
|
||||
|
||||
static tracy_force_inline void LeaveFiber()
|
||||
{
|
||||
TracyQueuePrepare( QueueType::FiberLeave );
|
||||
MemWrite( &item->fiberLeave.time, GetTime() );
|
||||
TracyQueueCommit( fiberLeave );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void ParameterRegister( ParameterCallback cb ) { GetProfiler().m_paramCallback = cb; }
|
||||
static void ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val );
|
||||
|
||||
void SendCallstack( int depth, const char* skipBefore );
|
||||
static void CutCallstack( void* callstack, const char* skipBefore );
|
||||
|
||||
static bool ShouldExit();
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
tracy_force_inline bool IsConnected() const
|
||||
{
|
||||
return m_isConnected.load( std::memory_order_acquire );
|
||||
}
|
||||
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
tracy_force_inline uint64_t ConnectionId() const
|
||||
{
|
||||
return m_connectionId.load( std::memory_order_acquire );
|
||||
@@ -679,16 +429,11 @@ public:
|
||||
void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); }
|
||||
bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); }
|
||||
|
||||
void SendString( uint64_t str, const char* ptr, QueueType type ) { SendString( str, ptr, strlen( ptr ), type ); }
|
||||
void SendString( uint64_t str, const char* ptr, size_t len, QueueType type );
|
||||
void SendSingleString( const char* ptr ) { SendSingleString( ptr, strlen( ptr ) ); }
|
||||
void SendSingleString( const char* ptr, size_t len );
|
||||
void SendSecondString( const char* ptr ) { SendSecondString( ptr, strlen( ptr ) ); }
|
||||
void SendSecondString( const char* ptr, size_t len );
|
||||
void SendString( uint64_t ptr, const char* str, QueueType type );
|
||||
|
||||
|
||||
// Allocated source location data layout:
|
||||
// 2b payload size
|
||||
// 4b payload size
|
||||
// 4b color
|
||||
// 4b source line
|
||||
// fsz function name
|
||||
@@ -699,82 +444,50 @@ public:
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function )
|
||||
{
|
||||
return AllocSourceLocation( line, source, function, nullptr, 0 );
|
||||
const auto fsz = strlen( function );
|
||||
const auto ssz = strlen( source );
|
||||
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 );
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 4 );
|
||||
memset( ptr + 4, 0, 4 );
|
||||
memcpy( ptr + 8, &line, 4 );
|
||||
memcpy( ptr + 12, function, fsz+1 );
|
||||
memcpy( ptr + 12 + fsz + 1, source, ssz + 1 );
|
||||
return uint64_t( ptr );
|
||||
}
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz )
|
||||
{
|
||||
return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz );
|
||||
}
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz )
|
||||
{
|
||||
return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 );
|
||||
}
|
||||
|
||||
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz )
|
||||
{
|
||||
const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz );
|
||||
assert( sz32 <= std::numeric_limits<uint16_t>::max() );
|
||||
const auto sz = uint16_t( sz32 );
|
||||
const auto fsz = strlen( function );
|
||||
const auto ssz = strlen( source );
|
||||
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 + nameSz );
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 2 );
|
||||
memset( ptr + 2, 0, 4 );
|
||||
memcpy( ptr + 6, &line, 4 );
|
||||
memcpy( ptr + 10, function, functionSz );
|
||||
ptr[10 + functionSz] = '\0';
|
||||
memcpy( ptr + 10 + functionSz + 1, source, sourceSz );
|
||||
ptr[10 + functionSz + 1 + sourceSz] = '\0';
|
||||
if( nameSz != 0 )
|
||||
{
|
||||
memcpy( ptr + 10 + functionSz + 1 + sourceSz + 1, name, nameSz );
|
||||
}
|
||||
memcpy( ptr, &sz, 4 );
|
||||
memset( ptr + 4, 0, 4 );
|
||||
memcpy( ptr + 8, &line, 4 );
|
||||
memcpy( ptr + 12, function, fsz+1 );
|
||||
memcpy( ptr + 12 + fsz + 1, source, ssz + 1 );
|
||||
memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nameSz );
|
||||
return uint64_t( ptr );
|
||||
}
|
||||
|
||||
private:
|
||||
enum class DequeueStatus { DataDequeued, ConnectionLost, QueueEmpty };
|
||||
enum class ThreadCtxStatus { Same, Changed, ConnectionLost };
|
||||
|
||||
static void LaunchWorker( void* ptr ) { ((Profiler*)ptr)->Worker(); }
|
||||
void Worker();
|
||||
|
||||
#ifndef TRACY_NO_FRAME_IMAGE
|
||||
static void LaunchCompressWorker( void* ptr ) { ((Profiler*)ptr)->CompressWorker(); }
|
||||
void CompressWorker();
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
static void LaunchSymbolWorker( void* ptr ) { ((Profiler*)ptr)->SymbolWorker(); }
|
||||
void SymbolWorker();
|
||||
void HandleSymbolQueueItem( const SymbolQueueItem& si );
|
||||
#endif
|
||||
|
||||
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
|
||||
void ClearQueues();
|
||||
void ClearSerial();
|
||||
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
|
||||
DequeueStatus DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop );
|
||||
DequeueStatus Dequeue();
|
||||
DequeueStatus DequeueContextSwitches( int64_t& timeStop );
|
||||
DequeueStatus DequeueSerial();
|
||||
ThreadCtxStatus ThreadCtxCheck( uint32_t threadId );
|
||||
bool AppendData( const void* data, size_t len );
|
||||
bool CommitData();
|
||||
|
||||
tracy_force_inline bool AppendData( const void* data, size_t len )
|
||||
{
|
||||
const auto ret = NeedDataSize( len );
|
||||
AppendDataUnsafe( data, len );
|
||||
return ret;
|
||||
}
|
||||
|
||||
tracy_force_inline bool NeedDataSize( size_t len )
|
||||
{
|
||||
assert( len <= TargetFrameSize );
|
||||
bool ret = true;
|
||||
if( m_bufferOffset - m_bufferStart + (int)len > TargetFrameSize )
|
||||
{
|
||||
ret = CommitData();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
bool NeedDataSize( size_t len );
|
||||
|
||||
tracy_force_inline void AppendDataUnsafe( const void* data, size_t len )
|
||||
{
|
||||
@@ -787,51 +500,37 @@ private:
|
||||
void SendSourceLocation( uint64_t ptr );
|
||||
void SendSourceLocationPayload( uint64_t ptr );
|
||||
void SendCallstackPayload( uint64_t ptr );
|
||||
void SendCallstackPayload64( uint64_t ptr );
|
||||
void SendCallstackAlloc( uint64_t ptr );
|
||||
|
||||
void QueueCallstackFrame( uint64_t ptr );
|
||||
void QueueSymbolQuery( uint64_t symbol );
|
||||
void QueueCodeLocation( uint64_t ptr );
|
||||
void QueueExternalName( uint64_t ptr );
|
||||
void QueueKernelCode( uint64_t symbol, uint32_t size );
|
||||
void SendCallstackFrame( uint64_t ptr );
|
||||
|
||||
bool HandleServerQuery();
|
||||
void HandleDisconnect();
|
||||
void HandleParameter( uint64_t payload );
|
||||
void HandleSymbolCodeQuery( uint64_t symbol, uint32_t size );
|
||||
void HandleSourceCodeQuery();
|
||||
|
||||
void AckServerQuery();
|
||||
void AckSourceCodeNotAvailable();
|
||||
void AckSymbolCodeNotAvailable();
|
||||
|
||||
void CalibrateTimer();
|
||||
void CalibrateDelay();
|
||||
void ReportTopology();
|
||||
|
||||
static tracy_force_inline void SendCallstackSerial( void* ptr )
|
||||
static tracy_force_inline void SendCallstackMemory( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_HAS_CALLSTACK
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, QueueType::CallstackSerial );
|
||||
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
|
||||
MemWrite( &item->hdr.type, QueueType::CallstackMemory );
|
||||
MemWrite( &item->callstackMemory.ptr, (uint64_t)ptr );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
#else
|
||||
static_cast<void>(ptr); // unused
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size )
|
||||
static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size )
|
||||
{
|
||||
assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack || type == QueueType::MemAllocNamed || type == QueueType::MemAllocCallstackNamed );
|
||||
assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack );
|
||||
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, type );
|
||||
MemWrite( &item->memAlloc.time, GetTime() );
|
||||
MemWrite( &item->memAlloc.thread, thread );
|
||||
MemWrite( &item->memAlloc.ptr, (uint64_t)ptr );
|
||||
if( compile_time_condition<sizeof( size ) == 4>::value )
|
||||
if( sizeof( size ) == 4 )
|
||||
{
|
||||
memcpy( &item->memAlloc.size, &size, 4 );
|
||||
memset( &item->memAlloc.size + 4, 0, 2 );
|
||||
@@ -839,15 +538,14 @@ private:
|
||||
else
|
||||
{
|
||||
assert( sizeof( size ) == 8 );
|
||||
memcpy( &item->memAlloc.size, &size, 4 );
|
||||
memcpy( ((char*)&item->memAlloc.size)+4, ((char*)&size)+4, 2 );
|
||||
memcpy( &item->memAlloc.size, &size, 6 );
|
||||
}
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemFree( QueueType type, const uint32_t thread, const void* ptr )
|
||||
static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr )
|
||||
{
|
||||
assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack || type == QueueType::MemFreeNamed || type == QueueType::MemFreeCallstackNamed );
|
||||
assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack );
|
||||
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, type );
|
||||
@@ -857,36 +555,21 @@ private:
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
static tracy_force_inline void SendMemName( const char* name )
|
||||
{
|
||||
assert( name );
|
||||
auto item = GetProfiler().m_serialQueue.prepare_next();
|
||||
MemWrite( &item->hdr.type, QueueType::MemNamePayload );
|
||||
MemWrite( &item->memName.name, (uint64_t)name );
|
||||
GetProfiler().m_serialQueue.commit_next();
|
||||
}
|
||||
|
||||
#if defined _WIN32 && defined TRACY_TIMER_QPC
|
||||
static int64_t GetTimeQpc();
|
||||
#endif
|
||||
|
||||
double m_timerMul;
|
||||
uint64_t m_resolution;
|
||||
uint64_t m_delay;
|
||||
std::atomic<int64_t> m_timeBegin;
|
||||
uint32_t m_mainThread;
|
||||
uint64_t m_epoch, m_exectime;
|
||||
uint64_t m_mainThread;
|
||||
uint64_t m_epoch;
|
||||
std::atomic<bool> m_shutdown;
|
||||
std::atomic<bool> m_shutdownManual;
|
||||
std::atomic<bool> m_shutdownFinished;
|
||||
Socket* m_sock;
|
||||
UdpBroadcast* m_broadcast;
|
||||
bool m_noExit;
|
||||
uint32_t m_userPort;
|
||||
std::atomic<uint32_t> m_zoneId;
|
||||
int64_t m_samplingPeriod;
|
||||
|
||||
uint32_t m_threadCtx;
|
||||
uint64_t m_threadCtx;
|
||||
int64_t m_refTimeThread;
|
||||
int64_t m_refTimeSerial;
|
||||
int64_t m_refTimeCtx;
|
||||
@@ -897,21 +580,18 @@ private:
|
||||
int m_bufferOffset;
|
||||
int m_bufferStart;
|
||||
|
||||
char* m_itemBuf;
|
||||
char* m_lz4Buf;
|
||||
|
||||
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
|
||||
TracyMutex m_serialLock;
|
||||
|
||||
#ifndef TRACY_NO_FRAME_IMAGE
|
||||
FastVector<FrameImageQueueItem> m_fiQueue, m_fiDequeue;
|
||||
TracyMutex m_fiLock;
|
||||
#endif
|
||||
|
||||
SPSCQueue<SymbolQueueItem> m_symbolQueue;
|
||||
|
||||
std::atomic<uint64_t> m_frameCount;
|
||||
std::atomic<bool> m_isConnected;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
std::atomic<bool> m_isConnected;
|
||||
std::atomic<uint64_t> m_connectionId;
|
||||
|
||||
TracyMutex m_deferredLock;
|
||||
@@ -928,22 +608,8 @@ private:
|
||||
#endif
|
||||
|
||||
ParameterCallback m_paramCallback;
|
||||
|
||||
char* m_queryImage;
|
||||
char* m_queryData;
|
||||
char* m_queryDataPtr;
|
||||
|
||||
#if defined _WIN32
|
||||
void* m_exceptionHandler;
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
struct {
|
||||
struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt;
|
||||
} m_prevSignal;
|
||||
#endif
|
||||
bool m_crashHandlerInstalled;
|
||||
};
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,141 +0,0 @@
|
||||
#include <atomic>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "TracyDebug.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class RingBuffer
|
||||
{
|
||||
public:
|
||||
RingBuffer( unsigned int size, int fd, int id, int cpu = -1 )
|
||||
: m_size( size )
|
||||
, m_id( id )
|
||||
, m_cpu( cpu )
|
||||
, m_fd( fd )
|
||||
{
|
||||
const auto pageSize = uint32_t( getpagesize() );
|
||||
assert( size >= pageSize );
|
||||
assert( __builtin_popcount( size ) == 1 );
|
||||
m_mapSize = size + pageSize;
|
||||
auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
|
||||
if( mapAddr == MAP_FAILED )
|
||||
{
|
||||
TracyDebug( "mmap failed: errno %i (%s)\n", errno, strerror( errno ) );
|
||||
m_fd = 0;
|
||||
m_metadata = nullptr;
|
||||
close( fd );
|
||||
return;
|
||||
}
|
||||
m_metadata = (perf_event_mmap_page*)mapAddr;
|
||||
assert( m_metadata->data_offset == pageSize );
|
||||
m_buffer = ((char*)mapAddr) + pageSize;
|
||||
m_tail = m_metadata->data_tail;
|
||||
}
|
||||
|
||||
~RingBuffer()
|
||||
{
|
||||
if( m_metadata ) munmap( m_metadata, m_mapSize );
|
||||
if( m_fd ) close( m_fd );
|
||||
}
|
||||
|
||||
RingBuffer( const RingBuffer& ) = delete;
|
||||
RingBuffer& operator=( const RingBuffer& ) = delete;
|
||||
|
||||
RingBuffer( RingBuffer&& other )
|
||||
{
|
||||
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
|
||||
m_metadata = nullptr;
|
||||
m_fd = 0;
|
||||
}
|
||||
|
||||
RingBuffer& operator=( RingBuffer&& other )
|
||||
{
|
||||
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
|
||||
m_metadata = nullptr;
|
||||
m_fd = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool IsValid() const { return m_metadata != nullptr; }
|
||||
int GetId() const { return m_id; }
|
||||
int GetCpu() const { return m_cpu; }
|
||||
|
||||
void Enable()
|
||||
{
|
||||
ioctl( m_fd, PERF_EVENT_IOC_ENABLE, 0 );
|
||||
}
|
||||
|
||||
void Read( void* dst, uint64_t offset, uint64_t cnt )
|
||||
{
|
||||
const auto size = m_size;
|
||||
auto src = ( m_tail + offset ) % size;
|
||||
if( src + cnt <= size )
|
||||
{
|
||||
memcpy( dst, m_buffer + src, cnt );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto s0 = size - src;
|
||||
const auto buf = m_buffer;
|
||||
memcpy( dst, buf + src, s0 );
|
||||
memcpy( (char*)dst + s0, buf, cnt - s0 );
|
||||
}
|
||||
}
|
||||
|
||||
void Advance( uint64_t cnt )
|
||||
{
|
||||
m_tail += cnt;
|
||||
StoreTail();
|
||||
}
|
||||
|
||||
bool CheckTscCaps() const
|
||||
{
|
||||
return m_metadata->cap_user_time_zero;
|
||||
}
|
||||
|
||||
int64_t ConvertTimeToTsc( int64_t timestamp ) const
|
||||
{
|
||||
if( !m_metadata->cap_user_time_zero ) return 0;
|
||||
const auto time = timestamp - m_metadata->time_zero;
|
||||
const auto quot = time / m_metadata->time_mult;
|
||||
const auto rem = time % m_metadata->time_mult;
|
||||
return ( quot << m_metadata->time_shift ) + ( rem << m_metadata->time_shift ) / m_metadata->time_mult;
|
||||
}
|
||||
|
||||
uint64_t LoadHead() const
|
||||
{
|
||||
return std::atomic_load_explicit( (const volatile std::atomic<uint64_t>*)&m_metadata->data_head, std::memory_order_acquire );
|
||||
}
|
||||
|
||||
uint64_t GetTail() const
|
||||
{
|
||||
return m_tail;
|
||||
}
|
||||
|
||||
private:
|
||||
void StoreTail()
|
||||
{
|
||||
std::atomic_store_explicit( (volatile std::atomic<uint64_t>*)&m_metadata->data_tail, m_tail, std::memory_order_release );
|
||||
}
|
||||
|
||||
unsigned int m_size;
|
||||
uint64_t m_tail;
|
||||
char* m_buffer;
|
||||
int m_id;
|
||||
int m_cpu;
|
||||
perf_event_mmap_page* m_metadata;
|
||||
|
||||
size_t m_mapSize;
|
||||
int m_fd;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef __TRACYSCOPED_HPP__
|
||||
#define __TRACYSCOPED_HPP__
|
||||
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
@@ -16,83 +15,36 @@ namespace tracy
|
||||
class ScopedZone
|
||||
{
|
||||
public:
|
||||
ScopedZone( const ScopedZone& ) = delete;
|
||||
ScopedZone( ScopedZone&& ) = delete;
|
||||
ScopedZone& operator=( const ScopedZone& ) = delete;
|
||||
ScopedZone& operator=( ScopedZone&& ) = delete;
|
||||
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
, m_connectionId( GetProfiler().ConnectionId() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneBegin );
|
||||
TracyLfqPrepare( QueueType::ZoneBegin );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
, m_connectionId( GetProfiler().ConnectionId() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginCallstack );
|
||||
TracyLfqPrepare( QueueType::ZoneBeginCallstack );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
TracyLfqCommit;
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true )
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
: m_active( is_active && GetProfiler().IsConnected() )
|
||||
#else
|
||||
: m_active( is_active )
|
||||
#endif
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
m_connectionId = GetProfiler().ConnectionId();
|
||||
#endif
|
||||
GetProfiler().SendCallstack( depth );
|
||||
|
||||
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
|
||||
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
||||
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
|
||||
MemWrite( &item->zoneBegin.srcloc, srcloc );
|
||||
TracyQueueCommit( zoneBeginThread );
|
||||
}
|
||||
|
||||
tracy_force_inline ~ScopedZone()
|
||||
@@ -101,67 +53,39 @@ public:
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneEnd );
|
||||
TracyLfqPrepare( QueueType::ZoneEnd );
|
||||
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
|
||||
TracyQueueCommit( zoneEndThread );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Text( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyQueuePrepare( QueueType::ZoneText );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
ptr[size] = '\0';
|
||||
TracyLfqPrepare( QueueType::ZoneText );
|
||||
MemWrite( &item->zoneText.text, (uint64_t)ptr );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Name( const char* txt, size_t size )
|
||||
{
|
||||
assert( size < std::numeric_limits<uint16_t>::max() );
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
auto ptr = (char*)tracy_malloc( size );
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
TracyQueuePrepare( QueueType::ZoneName );
|
||||
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
|
||||
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
|
||||
TracyQueueCommit( zoneTextFatThread );
|
||||
ptr[size] = '\0';
|
||||
TracyLfqPrepare( QueueType::ZoneName );
|
||||
MemWrite( &item->zoneText.text, (uint64_t)ptr );
|
||||
TracyLfqCommit;
|
||||
}
|
||||
|
||||
tracy_force_inline void Color( uint32_t color )
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneColor );
|
||||
MemWrite( &item->zoneColor.r, uint8_t( ( color ) & 0xFF ) );
|
||||
MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
|
||||
MemWrite( &item->zoneColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
|
||||
TracyQueueCommit( zoneColorThread );
|
||||
}
|
||||
|
||||
tracy_force_inline void Value( uint64_t value )
|
||||
{
|
||||
if( !m_active ) return;
|
||||
#ifdef TRACY_ON_DEMAND
|
||||
if( GetProfiler().ConnectionId() != m_connectionId ) return;
|
||||
#endif
|
||||
TracyQueuePrepare( QueueType::ZoneValue );
|
||||
MemWrite( &item->zoneValue.value, value );
|
||||
TracyQueueCommit( zoneValueThread );
|
||||
}
|
||||
|
||||
tracy_force_inline bool IsActive() const { return m_active; }
|
||||
|
||||
private:
|
||||
const bool m_active;
|
||||
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
#ifndef __TRACYSTRINGHELPERS_HPP__
|
||||
#define __TRACYSTRINGHELPERS_HPP__
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static inline char* CopyString( const char* src, size_t sz )
|
||||
{
|
||||
assert( strlen( src ) == sz );
|
||||
auto dst = (char*)tracy_malloc( sz + 1 );
|
||||
memcpy( dst, src, sz );
|
||||
dst[sz] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
static inline char* CopyString( const char* src )
|
||||
{
|
||||
const auto sz = strlen( src );
|
||||
auto dst = (char*)tracy_malloc( sz + 1 );
|
||||
memcpy( dst, src, sz );
|
||||
dst[sz] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
static inline char* CopyStringFast( const char* src, size_t sz )
|
||||
{
|
||||
assert( strlen( src ) == sz );
|
||||
auto dst = (char*)tracy_malloc_fast( sz + 1 );
|
||||
memcpy( dst, src, sz );
|
||||
dst[sz] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
static inline char* CopyStringFast( const char* src )
|
||||
{
|
||||
const auto sz = strlen( src );
|
||||
auto dst = (char*)tracy_malloc_fast( sz + 1 );
|
||||
memcpy( dst, src, sz );
|
||||
dst[sz] = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#ifdef TRACY_HAS_SYSTIME
|
||||
|
||||
# if defined _WIN32
|
||||
# if defined _WIN32 || defined __CYGWIN__
|
||||
# include <windows.h>
|
||||
# elif defined __linux__
|
||||
# include <stdio.h>
|
||||
@@ -18,7 +18,7 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
# if defined _WIN32
|
||||
# if defined _WIN32 || defined __CYGWIN__
|
||||
|
||||
static inline uint64_t ConvertTime( const FILETIME& t )
|
||||
{
|
||||
@@ -47,12 +47,9 @@ void SysTime::ReadTimes()
|
||||
FILE* f = fopen( "/proc/stat", "r" );
|
||||
if( f )
|
||||
{
|
||||
int read = fscanf( f, "cpu %" PRIu64 " %" PRIu64 " %" PRIu64" %" PRIu64, &user, &nice, &system, &idle );
|
||||
fscanf( f, "cpu %" PRIu64 " %" PRIu64 " %" PRIu64" %" PRIu64, &user, &nice, &system, &idle );
|
||||
fclose( f );
|
||||
if (read == 4)
|
||||
{
|
||||
used = user + nice + system;
|
||||
}
|
||||
used = user + nice + system;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,7 +59,7 @@ void SysTime::ReadTimes()
|
||||
{
|
||||
host_cpu_load_info_data_t info;
|
||||
mach_msg_type_number_t cnt = HOST_CPU_LOAD_INFO_COUNT;
|
||||
host_statistics( mach_host_self(), HOST_CPU_LOAD_INFO, reinterpret_cast<host_info_t>( &info ), &cnt );
|
||||
host_statistics( mach_host_self(), HOST_CPU_LOAD_INFO, reinterpret_cast<host_info_t>( &info ), &cnt );
|
||||
used = info.cpu_ticks[CPU_STATE_USER] + info.cpu_ticks[CPU_STATE_NICE] + info.cpu_ticks[CPU_STATE_SYSTEM];
|
||||
idle = info.cpu_ticks[CPU_STATE_IDLE];
|
||||
}
|
||||
@@ -95,7 +92,7 @@ float SysTime::Get()
|
||||
const auto diffIdle = idle - oldIdle;
|
||||
const auto diffUsed = used - oldUsed;
|
||||
|
||||
#if defined _WIN32
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
return diffUsed == 0 ? -1 : ( diffUsed - diffIdle ) * 100.f / diffUsed;
|
||||
#elif defined __linux__ || defined __APPLE__ || defined BSD
|
||||
const auto total = diffUsed + diffIdle;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef __TRACYSYSTIME_HPP__
|
||||
#define __TRACYSYSTIME_HPP__
|
||||
|
||||
#if defined _WIN32 || defined __linux__ || defined __APPLE__
|
||||
#if defined _WIN32 || defined __CYGWIN__ || defined __linux__ || defined __APPLE__
|
||||
# define TRACY_HAS_SYSTIME
|
||||
#else
|
||||
# include <sys/param.h>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,11 +1,8 @@
|
||||
#ifndef __TRACYSYSTRACE_HPP__
|
||||
#define __TRACYSYSTRACE_HPP__
|
||||
|
||||
#if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __linux__ )
|
||||
# include "../common/TracyUwp.hpp"
|
||||
# ifndef TRACY_UWP
|
||||
# define TRACY_HAS_SYSTEM_TRACING
|
||||
# endif
|
||||
#if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __CYGWIN__ || defined __linux__ )
|
||||
# define TRACY_HAS_SYSTEM_TRACING
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_HAS_SYSTEM_TRACING
|
||||
@@ -15,11 +12,11 @@
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
bool SysTraceStart( int64_t& samplingPeriod );
|
||||
bool SysTraceStart();
|
||||
void SysTraceStop();
|
||||
void SysTraceWorker( void* ptr );
|
||||
|
||||
void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name );
|
||||
void SysTraceSendExternalName( uint64_t thread );
|
||||
|
||||
}
|
||||
|
||||
|
||||
80
client/TracySysTracePayload.hpp
Normal file
80
client/TracySysTracePayload.hpp
Normal file
@@ -0,0 +1,80 @@
|
||||
// File: '/home/wolf/desktop/tracy_systrace.armv7' (1210 bytes)`
|
||||
// File: '/home/wolf/desktop/tracy_systrace.aarch64' (1650 bytes)
|
||||
|
||||
// Exported using binary_to_compressed_c.cpp
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static const unsigned int tracy_systrace_armv7_size = 1210;
|
||||
static const unsigned int tracy_systrace_armv7_data[1212/4] =
|
||||
{
|
||||
0x464c457f, 0x00010101, 0x00000000, 0x00000000, 0x00280003, 0x00000001, 0x00000208, 0x00000034, 0x00000000, 0x05000200, 0x00200034, 0x00280007,
|
||||
0x00000000, 0x00000006, 0x00000034, 0x00000034, 0x00000034, 0x000000e0, 0x000000e0, 0x00000004, 0x00000004, 0x00000003, 0x00000114, 0x00000114,
|
||||
0x00000114, 0x00000013, 0x00000013, 0x00000004, 0x00000001, 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x000003ed, 0x000003ed, 0x00000005,
|
||||
0x00001000, 0x00000001, 0x000003ed, 0x000013ed, 0x000013ed, 0x000000cd, 0x000000cf, 0x00000006, 0x00001000, 0x00000002, 0x000003f0, 0x000013f0,
|
||||
0x000013f0, 0x000000b8, 0x000000b8, 0x00000006, 0x00000004, 0x6474e551, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000006,
|
||||
0x00000010, 0x70000001, 0x00000394, 0x00000394, 0x00000394, 0x00000008, 0x00000008, 0x00000004, 0x00000004, 0x7379732f, 0x2f6d6574, 0x2f6e6962,
|
||||
0x6b6e696c, 0x00007265, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000, 0x00000012, 0x00000016, 0x00000000,
|
||||
0x00000000, 0x00000012, 0x6f6c6400, 0x006e6570, 0x4342494c, 0x62696c00, 0x732e6c64, 0x6c64006f, 0x006d7973, 0x00000001, 0x00000003, 0x00000001,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000003, 0x00000002, 0x00000000, 0x00000000, 0x00000001, 0x00020000, 0x00000002, 0x00010001,
|
||||
0x0000000d, 0x00000010, 0x00000000, 0x00050d63, 0x00020000, 0x00000008, 0x00000000, 0x000014b4, 0x00000116, 0x000014b8, 0x00000216, 0xe52de004,
|
||||
0xe59fe004, 0xe08fe00e, 0xe5bef008, 0x000012bc, 0xe28fc600, 0xe28cca01, 0xe5bcf2bc, 0xe28fc600, 0xe28cca01, 0xe5bcf2b4, 0xe92d4ff0, 0xe28db01c,
|
||||
0xe24dd01c, 0xe24dd801, 0xe59f0154, 0xe3a01001, 0xe08f0000, 0xebfffff1, 0xe59f1148, 0xe1a07000, 0xe08f1001, 0xebfffff0, 0xe59f113c, 0xe1a09000,
|
||||
0xe1a00007, 0xe08f1001, 0xebffffeb, 0xe59f112c, 0xe1a04000, 0xe1a00007, 0xe08f1001, 0xebffffe6, 0xe59f111c, 0xe1a05000, 0xe1a00007, 0xe08f1001,
|
||||
0xebffffe1, 0xe59f110c, 0xe1a06000, 0xe1a00007, 0xe08f1001, 0xebffffdc, 0xe58d0004, 0xe1a00007, 0xe59f10f4, 0xe08f1001, 0xebffffd7, 0xe1a0a000,
|
||||
0xe59f00e8, 0xe3a01000, 0xe3a08000, 0xe08f0000, 0xe12fff39, 0xe1a07000, 0xe3700001, 0xca000001, 0xe3a00000, 0xe12fff34, 0xe3a00009, 0xe58d4000,
|
||||
0xe1cd01b4, 0xe3090680, 0xe3400098, 0xe28d4010, 0xe58d000c, 0xe28d9018, 0xe58d8008, 0xe28d8008, 0xe58d7010, 0xea000003, 0xe1a02000, 0xe3a00001,
|
||||
0xe1a01009, 0xe12fff3a, 0xe1a00004, 0xe3a01001, 0xe3a02000, 0xe12fff35, 0xe3500000, 0xca000008, 0xe1a00008, 0xe3a01000, 0xe12fff36, 0xe1a00004,
|
||||
0xe3a01001, 0xe3a02000, 0xe12fff35, 0xe3500001, 0xbafffff6, 0xe59d3004, 0xe1a00007, 0xe1a01009, 0xe3a02801, 0xe12fff33, 0xe3500001, 0xaaffffe5,
|
||||
0xe59d1000, 0xe3a00000, 0xe12fff31, 0xe24bd01c, 0xe8bd8ff0, 0x00000174, 0x0000016c, 0x0000015d, 0x0000014e, 0x0000013f, 0x00000135, 0x00000126,
|
||||
0x00000114, 0x7ffffe74, 0x00000001, 0x6362696c, 0x006f732e, 0x6e65706f, 0x69786500, 0x6f700074, 0x6e006c6c, 0x736f6e61, 0x7065656c, 0x61657200,
|
||||
0x72770064, 0x00657469, 0x7379732f, 0x72656b2f, 0x2f6c656e, 0x75626564, 0x72742f67, 0x6e696361, 0x72742f67, 0x5f656361, 0x65706970, 0x00000000,
|
||||
0x00000003, 0x000014a8, 0x00000002, 0x00000010, 0x00000017, 0x000001cc, 0x00000014, 0x00000011, 0x00000015, 0x00000000, 0x00000006, 0x00000128,
|
||||
0x0000000b, 0x00000010, 0x00000005, 0x00000158, 0x0000000a, 0x0000001c, 0x6ffffef5, 0x00000174, 0x00000004, 0x0000018c, 0x00000001, 0x0000000d,
|
||||
0x0000001e, 0x00000008, 0x6ffffffb, 0x00000001, 0x6ffffff0, 0x000001a4, 0x6ffffffe, 0x000001ac, 0x6fffffff, 0x00000001, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x000001dc, 0x000001dc,
|
||||
};
|
||||
|
||||
static const unsigned int tracy_systrace_aarch64_size = 1650;
|
||||
static const unsigned int tracy_systrace_aarch64_data[1652/4] =
|
||||
{
|
||||
0x464c457f, 0x00010102, 0x00000000, 0x00000000, 0x00b70003, 0x00000001, 0x00000300, 0x00000000, 0x00000040, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00380040, 0x00400006, 0x00000000, 0x00000006, 0x00000005, 0x00000040, 0x00000000, 0x00000040, 0x00000000, 0x00000040, 0x00000000,
|
||||
0x00000150, 0x00000000, 0x00000150, 0x00000000, 0x00000008, 0x00000000, 0x00000003, 0x00000004, 0x00000190, 0x00000000, 0x00000190, 0x00000000,
|
||||
0x00000190, 0x00000000, 0x00000015, 0x00000000, 0x00000015, 0x00000000, 0x00000001, 0x00000000, 0x00000001, 0x00000005, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000004d1, 0x00000000, 0x000004d1, 0x00000000, 0x00010000, 0x00000000, 0x00000001, 0x00000006,
|
||||
0x000004d8, 0x00000000, 0x000104d8, 0x00000000, 0x000104d8, 0x00000000, 0x0000019a, 0x00000000, 0x000001a0, 0x00000000, 0x00010000, 0x00000000,
|
||||
0x00000002, 0x00000006, 0x000004d8, 0x00000000, 0x000104d8, 0x00000000, 0x000104d8, 0x00000000, 0x00000170, 0x00000000, 0x00000170, 0x00000000,
|
||||
0x00000008, 0x00000000, 0x6474e551, 0x00000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000010, 0x00000000, 0x7379732f, 0x2f6d6574, 0x2f6e6962, 0x6b6e696c, 0x34367265, 0x00000000, 0x00000001, 0x00000004,
|
||||
0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000001, 0x00000001, 0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000a0003, 0x00000300, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x0000000a, 0x00000012, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x00000012, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x62696c00, 0x732e6c64, 0x6c64006f, 0x6e65706f, 0x736c6400, 0x4c006d79, 0x00434249, 0x00000000, 0x00020002, 0x00000000,
|
||||
0x00010001, 0x00000001, 0x00000010, 0x00000000, 0x00050d63, 0x00020000, 0x00000017, 0x00000000, 0x00010668, 0x00000000, 0x00000402, 0x00000002,
|
||||
0x00000000, 0x00000000, 0x00010670, 0x00000000, 0x00000402, 0x00000003, 0x00000000, 0x00000000, 0xa9bf7bf0, 0x90000090, 0xf9433211, 0x91198210,
|
||||
0xd61f0220, 0xd503201f, 0xd503201f, 0xd503201f, 0x90000090, 0xf9433611, 0x9119a210, 0xd61f0220, 0x90000090, 0xf9433a11, 0x9119c210, 0xd61f0220,
|
||||
0xf81b0ffc, 0xa9015ff8, 0xa90257f6, 0xa9034ff4, 0xa9047bfd, 0x910103fd, 0xd14043ff, 0xd10043ff, 0x90000000, 0x91120000, 0x320003e1, 0x97ffffed,
|
||||
0x90000001, 0x91122021, 0xaa0003f7, 0x97ffffed, 0x90000001, 0xaa0003f8, 0x91123421, 0xaa1703e0, 0x97ffffe8, 0x90000001, 0xaa0003f3, 0x91124821,
|
||||
0xaa1703e0, 0x97ffffe3, 0x90000001, 0xaa0003f4, 0x91125c21, 0xaa1703e0, 0x97ffffde, 0x90000001, 0xaa0003f5, 0x91128421, 0xaa1703e0, 0x97ffffd9,
|
||||
0x90000001, 0xaa0003f6, 0x91129821, 0xaa1703e0, 0x97ffffd4, 0xaa0003f7, 0x90000000, 0x9112b000, 0x2a1f03e1, 0xd63f0300, 0x2a0003f8, 0x36f80060,
|
||||
0x2a1f03e0, 0xd63f0260, 0x90000008, 0x3dc11d00, 0x52800128, 0xb81c83b8, 0x781cc3a8, 0x3d8003e0, 0x14000005, 0x93407c02, 0x320003e0, 0x910043e1,
|
||||
0xd63f02e0, 0xd100e3a0, 0x320003e1, 0x2a1f03e2, 0xd63f0280, 0x7100001f, 0x5400014c, 0x910003e0, 0xaa1f03e1, 0xd63f02a0, 0xd100e3a0, 0x320003e1,
|
||||
0x2a1f03e2, 0xd63f0280, 0x7100041f, 0x54ffff0b, 0x910043e1, 0x321003e2, 0x2a1803e0, 0xd63f02c0, 0x7100041f, 0x54fffd0a, 0x2a1f03e0, 0xd63f0260,
|
||||
0x914043ff, 0x910043ff, 0xa9447bfd, 0xa9434ff4, 0xa94257f6, 0xa9415ff8, 0xf84507fc, 0xd65f03c0, 0x00000000, 0x00000000, 0x00989680, 0x00000000,
|
||||
0x6362696c, 0x006f732e, 0x6e65706f, 0x69786500, 0x6f700074, 0x6e006c6c, 0x736f6e61, 0x7065656c, 0x61657200, 0x72770064, 0x00657469, 0x7379732f,
|
||||
0x72656b2f, 0x2f6c656e, 0x75626564, 0x72742f67, 0x6e696361, 0x72742f67, 0x5f656361, 0x65706970, 0x00000000, 0x00000000, 0x00000001, 0x00000000,
|
||||
0x00000001, 0x00000000, 0x00000004, 0x00000000, 0x000001a8, 0x00000000, 0x6ffffef5, 0x00000000, 0x000001c8, 0x00000000, 0x00000005, 0x00000000,
|
||||
0x00000248, 0x00000000, 0x00000006, 0x00000000, 0x000001e8, 0x00000000, 0x0000000a, 0x00000000, 0x0000001c, 0x00000000, 0x0000000b, 0x00000000,
|
||||
0x00000018, 0x00000000, 0x00000015, 0x00000000, 0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00010650, 0x00000000, 0x00000002, 0x00000000,
|
||||
0x00000030, 0x00000000, 0x00000014, 0x00000000, 0x00000007, 0x00000000, 0x00000017, 0x00000000, 0x00000290, 0x00000000, 0x0000001e, 0x00000000,
|
||||
0x00000008, 0x00000000, 0x6ffffffb, 0x00000000, 0x00000001, 0x00000000, 0x6ffffffe, 0x00000000, 0x00000270, 0x00000000, 0x6fffffff, 0x00000000,
|
||||
0x00000001, 0x00000000, 0x6ffffff0, 0x00000000, 0x00000264, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000104d8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x000002c0, 0x00000000, 0x000002c0,
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,36 +1,16 @@
|
||||
#ifndef __TRACYTHREAD_HPP__
|
||||
#define __TRACYTHREAD_HPP__
|
||||
|
||||
#if defined _WIN32
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# include <windows.h>
|
||||
#else
|
||||
# include <pthread.h>
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
# include "tracy_rpmalloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
extern thread_local bool RpThreadInitDone;
|
||||
#endif
|
||||
|
||||
class ThreadExitHandler
|
||||
{
|
||||
public:
|
||||
~ThreadExitHandler()
|
||||
{
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
rpmalloc_thread_finalize();
|
||||
RpThreadInitDone = false;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
#if defined _WIN32
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
|
||||
class Thread
|
||||
{
|
||||
|
||||
@@ -1,148 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2020 Erik Rigtorp <erik@rigtorp.se>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <stdexcept>
|
||||
#include <type_traits> // std::enable_if, std::is_*_constructible
|
||||
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
|
||||
#if defined (_MSC_VER)
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable:4324)
|
||||
#endif
|
||||
|
||||
namespace tracy {
|
||||
|
||||
template <typename T> class SPSCQueue {
|
||||
public:
|
||||
explicit SPSCQueue(const size_t capacity)
|
||||
: capacity_(capacity) {
|
||||
capacity_++; // Needs one slack element
|
||||
slots_ = (T*)tracy_malloc(sizeof(T) * (capacity_ + 2 * kPadding));
|
||||
|
||||
static_assert(alignof(SPSCQueue<T>) == kCacheLineSize, "");
|
||||
static_assert(sizeof(SPSCQueue<T>) >= 3 * kCacheLineSize, "");
|
||||
assert(reinterpret_cast<char *>(&readIdx_) -
|
||||
reinterpret_cast<char *>(&writeIdx_) >=
|
||||
static_cast<std::ptrdiff_t>(kCacheLineSize));
|
||||
}
|
||||
|
||||
~SPSCQueue() {
|
||||
while (front()) {
|
||||
pop();
|
||||
}
|
||||
tracy_free(slots_);
|
||||
}
|
||||
|
||||
// non-copyable and non-movable
|
||||
SPSCQueue(const SPSCQueue &) = delete;
|
||||
SPSCQueue &operator=(const SPSCQueue &) = delete;
|
||||
|
||||
template <typename... Args>
|
||||
void emplace(Args &&...args) noexcept(
|
||||
std::is_nothrow_constructible<T, Args &&...>::value) {
|
||||
static_assert(std::is_constructible<T, Args &&...>::value,
|
||||
"T must be constructible with Args&&...");
|
||||
auto const writeIdx = writeIdx_.load(std::memory_order_relaxed);
|
||||
auto nextWriteIdx = writeIdx + 1;
|
||||
if (nextWriteIdx == capacity_) {
|
||||
nextWriteIdx = 0;
|
||||
}
|
||||
while (nextWriteIdx == readIdxCache_) {
|
||||
readIdxCache_ = readIdx_.load(std::memory_order_acquire);
|
||||
}
|
||||
new (&slots_[writeIdx + kPadding]) T(std::forward<Args>(args)...);
|
||||
writeIdx_.store(nextWriteIdx, std::memory_order_release);
|
||||
}
|
||||
|
||||
T *front() noexcept {
|
||||
auto const readIdx = readIdx_.load(std::memory_order_relaxed);
|
||||
if (readIdx == writeIdxCache_) {
|
||||
writeIdxCache_ = writeIdx_.load(std::memory_order_acquire);
|
||||
if (writeIdxCache_ == readIdx) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
return &slots_[readIdx + kPadding];
|
||||
}
|
||||
|
||||
void pop() noexcept {
|
||||
static_assert(std::is_nothrow_destructible<T>::value,
|
||||
"T must be nothrow destructible");
|
||||
auto const readIdx = readIdx_.load(std::memory_order_relaxed);
|
||||
assert(writeIdx_.load(std::memory_order_acquire) != readIdx);
|
||||
slots_[readIdx + kPadding].~T();
|
||||
auto nextReadIdx = readIdx + 1;
|
||||
if (nextReadIdx == capacity_) {
|
||||
nextReadIdx = 0;
|
||||
}
|
||||
readIdx_.store(nextReadIdx, std::memory_order_release);
|
||||
}
|
||||
|
||||
size_t size() const noexcept {
|
||||
std::ptrdiff_t diff = writeIdx_.load(std::memory_order_acquire) -
|
||||
readIdx_.load(std::memory_order_acquire);
|
||||
if (diff < 0) {
|
||||
diff += capacity_;
|
||||
}
|
||||
return static_cast<size_t>(diff);
|
||||
}
|
||||
|
||||
bool empty() const noexcept {
|
||||
return writeIdx_.load(std::memory_order_acquire) ==
|
||||
readIdx_.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
size_t capacity() const noexcept { return capacity_ - 1; }
|
||||
|
||||
private:
|
||||
static constexpr size_t kCacheLineSize = 64;
|
||||
|
||||
// Padding to avoid false sharing between slots_ and adjacent allocations
|
||||
static constexpr size_t kPadding = (kCacheLineSize - 1) / sizeof(T) + 1;
|
||||
|
||||
private:
|
||||
size_t capacity_;
|
||||
T *slots_;
|
||||
|
||||
// Align to cache line size in order to avoid false sharing
|
||||
// readIdxCache_ and writeIdxCache_ is used to reduce the amount of cache
|
||||
// coherency traffic
|
||||
alignas(kCacheLineSize) std::atomic<size_t> writeIdx_ = {0};
|
||||
alignas(kCacheLineSize) size_t readIdxCache_ = 0;
|
||||
alignas(kCacheLineSize) std::atomic<size_t> readIdx_ = {0};
|
||||
alignas(kCacheLineSize) size_t writeIdxCache_ = 0;
|
||||
|
||||
// Padding to avoid adjacent allocations to share cache line with
|
||||
// writeIdxCache_
|
||||
char padding_[kCacheLineSize - sizeof(SPSCQueue<T>::writeIdxCache_)];
|
||||
};
|
||||
} // namespace rigtorp
|
||||
|
||||
#if defined (_MSC_VER)
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,9 +1,9 @@
|
||||
/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson
|
||||
/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson / Rampant Pixels
|
||||
*
|
||||
* This library provides a cross-platform lock free thread caching malloc implementation in C11.
|
||||
* The latest source code is always available at
|
||||
*
|
||||
* https://github.com/mjansson/rpmalloc
|
||||
* https://github.com/rampantpixels/rpmalloc
|
||||
*
|
||||
* This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
|
||||
*
|
||||
@@ -12,113 +12,53 @@
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "../common/TracyApi.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
# define RPMALLOC_EXPORT __attribute__((visibility("default")))
|
||||
# define RPMALLOC_ALLOCATOR
|
||||
# define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
|
||||
# if defined(__clang_major__) && (__clang_major__ < 4)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
|
||||
# else
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) __attribute__((alloc_size(count, size)))
|
||||
# endif
|
||||
# define RPMALLOC_ATTRIBUTE __attribute__((__malloc__))
|
||||
# define RPMALLOC_RESTRICT
|
||||
# define RPMALLOC_CDECL
|
||||
#elif defined(_MSC_VER)
|
||||
# define RPMALLOC_EXPORT
|
||||
# define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
|
||||
# define RPMALLOC_ATTRIB_MALLOC
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
|
||||
# define RPMALLOC_ATTRIBUTE
|
||||
# define RPMALLOC_RESTRICT __declspec(restrict)
|
||||
# define RPMALLOC_CDECL __cdecl
|
||||
#else
|
||||
# define RPMALLOC_EXPORT
|
||||
# define RPMALLOC_ALLOCATOR
|
||||
# define RPMALLOC_ATTRIB_MALLOC
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
|
||||
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
|
||||
# define RPMALLOC_ATTRIBUTE
|
||||
# define RPMALLOC_RESTRICT
|
||||
# define RPMALLOC_CDECL
|
||||
#endif
|
||||
|
||||
//! Define RPMALLOC_CONFIGURABLE to enable configuring sizes
|
||||
#ifndef RPMALLOC_CONFIGURABLE
|
||||
#define RPMALLOC_CONFIGURABLE 0
|
||||
#endif
|
||||
|
||||
//! Flag to rpaligned_realloc to not preserve content in reallocation
|
||||
#define RPMALLOC_NO_PRESERVE 1
|
||||
|
||||
typedef struct rpmalloc_global_statistics_t {
|
||||
//! Current amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
|
||||
//! Current amount of virtual memory mapped (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped;
|
||||
//! Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped_peak;
|
||||
//! Current amount of memory in global caches for small and medium sizes (<32KiB)
|
||||
//! Current amount of memory in global caches for small and medium sizes (<64KiB)
|
||||
size_t cached;
|
||||
//! Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
|
||||
size_t huge_alloc;
|
||||
//! Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
|
||||
size_t huge_alloc_peak;
|
||||
//! Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1)
|
||||
//! Total amount of memory mapped (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped_total;
|
||||
//! Total amount of memory unmapped since initialization (only if ENABLE_STATISTICS=1)
|
||||
//! Total amount of memory unmapped (only if ENABLE_STATISTICS=1)
|
||||
size_t unmapped_total;
|
||||
} rpmalloc_global_statistics_t;
|
||||
|
||||
typedef struct rpmalloc_thread_statistics_t {
|
||||
//! Current number of bytes available in thread size class caches for small and medium sizes (<32KiB)
|
||||
//! Current number of bytes available for allocation from active spans
|
||||
size_t active;
|
||||
//! Current number of bytes available in thread size class caches
|
||||
size_t sizecache;
|
||||
//! Current number of bytes available in thread span caches for small and medium sizes (<32KiB)
|
||||
//! Current number of bytes available in thread span caches
|
||||
size_t spancache;
|
||||
//! Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1)
|
||||
//! Current number of bytes in pending deferred deallocations
|
||||
size_t deferred;
|
||||
//! Total number of bytes transitioned from thread cache to global cache
|
||||
size_t thread_to_global;
|
||||
//! Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1)
|
||||
//! Total number of bytes transitioned from global cache to thread cache
|
||||
size_t global_to_thread;
|
||||
//! Per span count statistics (only if ENABLE_STATISTICS=1)
|
||||
struct {
|
||||
//! Currently used number of spans
|
||||
size_t current;
|
||||
//! High water mark of spans used
|
||||
size_t peak;
|
||||
//! Number of spans transitioned to global cache
|
||||
size_t to_global;
|
||||
//! Number of spans transitioned from global cache
|
||||
size_t from_global;
|
||||
//! Number of spans transitioned to thread cache
|
||||
size_t to_cache;
|
||||
//! Number of spans transitioned from thread cache
|
||||
size_t from_cache;
|
||||
//! Number of spans transitioned to reserved state
|
||||
size_t to_reserved;
|
||||
//! Number of spans transitioned from reserved state
|
||||
size_t from_reserved;
|
||||
//! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
|
||||
size_t map_calls;
|
||||
} span_use[32];
|
||||
//! Per size class statistics (only if ENABLE_STATISTICS=1)
|
||||
struct {
|
||||
//! Current number of allocations
|
||||
size_t alloc_current;
|
||||
//! Peak number of allocations
|
||||
size_t alloc_peak;
|
||||
//! Total number of allocations
|
||||
size_t alloc_total;
|
||||
//! Total number of frees
|
||||
size_t free_total;
|
||||
//! Number of spans transitioned to cache
|
||||
size_t spans_to_cache;
|
||||
//! Number of spans transitioned from cache
|
||||
size_t spans_from_cache;
|
||||
//! Number of spans transitioned from reserved state
|
||||
size_t spans_from_reserved;
|
||||
//! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
|
||||
size_t map_calls;
|
||||
} size_use[128];
|
||||
} rpmalloc_thread_statistics_t;
|
||||
|
||||
typedef struct rpmalloc_config_t {
|
||||
@@ -129,133 +69,85 @@ typedef struct rpmalloc_config_t {
|
||||
// actual start of the memory region due to this alignment. The alignment offset
|
||||
// will be passed to the memory unmap function. The alignment offset MUST NOT be
|
||||
// larger than 65535 (storable in an uint16_t), if it is you must use natural
|
||||
// alignment to shift it into 16 bits. If you set a memory_map function, you
|
||||
// must also set a memory_unmap function or else the default implementation will
|
||||
// be used for both.
|
||||
// alignment to shift it into 16 bits.
|
||||
void* (*memory_map)(size_t size, size_t* offset);
|
||||
//! Unmap the memory pages starting at address and spanning the given number of bytes.
|
||||
// If release is set to non-zero, the unmap is for an entire span range as returned by
|
||||
// a previous call to memory_map and that the entire range should be released. The
|
||||
// release argument holds the size of the entire span range. If release is set to 0,
|
||||
// the unmap is a partial decommit of a subset of the mapped memory range.
|
||||
// If you set a memory_unmap function, you must also set a memory_map function or
|
||||
// else the default implementation will be used for both.
|
||||
void (*memory_unmap)(void* address, size_t size, size_t offset, size_t release);
|
||||
//! Size of memory pages. The page size MUST be a power of two. All memory mapping
|
||||
// If release is set to 1, the unmap is for an entire span range as returned by
|
||||
// a previous call to memory_map and that the entire range should be released.
|
||||
// If release is set to 0, the unmap is a partial decommit of a subset of the mapped
|
||||
// memory range.
|
||||
void (*memory_unmap)(void* address, size_t size, size_t offset, int release);
|
||||
//! Size of memory pages. The page size MUST be a power of two in [512,16384] range
|
||||
// (2^9 to 2^14) unless 0 - set to 0 to use system page size. All memory mapping
|
||||
// requests to memory_map will be made with size set to a multiple of the page size.
|
||||
// Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system page size is used.
|
||||
size_t page_size;
|
||||
//! Size of a span of memory blocks. MUST be a power of two, and in [4096,262144]
|
||||
// range (unless 0 - set to 0 to use the default span size). Used if RPMALLOC_CONFIGURABLE
|
||||
// is defined to 1.
|
||||
//! Size of a span of memory pages. MUST be a multiple of page size, and in [4096,262144]
|
||||
// range (unless 0 - set to 0 to use the default span size).
|
||||
size_t span_size;
|
||||
//! Number of spans to map at each request to map new virtual memory blocks. This can
|
||||
// be used to minimize the system call overhead at the cost of virtual memory address
|
||||
// space. The extra mapped pages will not be written until actually used, so physical
|
||||
// committed memory should not be affected in the default implementation. Will be
|
||||
// aligned to a multiple of spans that match memory page size in case of huge pages.
|
||||
// committed memory should not be affected in the default implementation.
|
||||
size_t span_map_count;
|
||||
//! Enable use of large/huge pages. If this flag is set to non-zero and page size is
|
||||
// zero, the allocator will try to enable huge pages and auto detect the configuration.
|
||||
// If this is set to non-zero and page_size is also non-zero, the allocator will
|
||||
// assume huge pages have been configured and enabled prior to initializing the
|
||||
// allocator.
|
||||
// For Windows, see https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
|
||||
// For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
|
||||
int enable_huge_pages;
|
||||
//! Debug callback if memory guards are enabled. Called if a memory overwrite is detected
|
||||
void (*memory_overwrite)(void* address);
|
||||
} rpmalloc_config_t;
|
||||
|
||||
//! Initialize allocator with default configuration
|
||||
TRACY_API int
|
||||
extern int
|
||||
rpmalloc_initialize(void);
|
||||
|
||||
//! Initialize allocator with given configuration
|
||||
RPMALLOC_EXPORT int
|
||||
extern int
|
||||
rpmalloc_initialize_config(const rpmalloc_config_t* config);
|
||||
|
||||
//! Get allocator configuration
|
||||
RPMALLOC_EXPORT const rpmalloc_config_t*
|
||||
extern const rpmalloc_config_t*
|
||||
rpmalloc_config(void);
|
||||
|
||||
//! Finalize allocator
|
||||
TRACY_API void
|
||||
extern void
|
||||
rpmalloc_finalize(void);
|
||||
|
||||
//! Initialize allocator for calling thread
|
||||
TRACY_API void
|
||||
void
|
||||
rpmalloc_thread_initialize(void);
|
||||
|
||||
//! Finalize allocator for calling thread
|
||||
TRACY_API void
|
||||
extern void
|
||||
rpmalloc_thread_finalize(void);
|
||||
|
||||
//! Perform deferred deallocations pending for the calling thread heap
|
||||
RPMALLOC_EXPORT void
|
||||
extern void
|
||||
rpmalloc_thread_collect(void);
|
||||
|
||||
//! Query if allocator is initialized for calling thread
|
||||
RPMALLOC_EXPORT int
|
||||
extern int
|
||||
rpmalloc_is_thread_initialized(void);
|
||||
|
||||
//! Get per-thread statistics
|
||||
RPMALLOC_EXPORT void
|
||||
extern void
|
||||
rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats);
|
||||
|
||||
//! Get global statistics
|
||||
RPMALLOC_EXPORT void
|
||||
extern void
|
||||
rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats);
|
||||
|
||||
//! Dump all statistics in human readable format to file (should be a FILE*)
|
||||
RPMALLOC_EXPORT void
|
||||
rpmalloc_dump_statistics(void* file);
|
||||
TRACY_API RPMALLOC_RESTRICT void*
|
||||
rpmalloc(size_t size) RPMALLOC_ATTRIBUTE;
|
||||
|
||||
//! Allocate a memory block of at least the given size
|
||||
TRACY_API RPMALLOC_ALLOCATOR void*
|
||||
rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
|
||||
|
||||
//! Free the given memory block
|
||||
TRACY_API void
|
||||
rpfree(void* ptr);
|
||||
|
||||
//! Allocate a memory block of at least the given size and zero initialize it
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIBUTE;
|
||||
|
||||
//! Reallocate the given block to at least the given size
|
||||
TRACY_API RPMALLOC_ALLOCATOR void*
|
||||
rprealloc(void* ptr, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
|
||||
extern void*
|
||||
rprealloc(void* ptr, size_t size);
|
||||
|
||||
//! Reallocate the given block to at least the given size and alignment,
|
||||
// with optional control flags (see RPMALLOC_NO_PRESERVE).
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3);
|
||||
extern void*
|
||||
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags);
|
||||
|
||||
//! Allocate a memory block of at least the given size and alignment.
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
|
||||
|
||||
//! Allocate a memory block of at least the given size and alignment.
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
|
||||
rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
|
||||
|
||||
//! Allocate a memory block of at least the given size and alignment.
|
||||
// Alignment must be a power of two and a multiple of sizeof(void*),
|
||||
// and should ideally be less than memory page size. A caveat of rpmalloc
|
||||
// internals is that this must also be strictly less than the span size (default 64KiB)
|
||||
RPMALLOC_EXPORT int
|
||||
extern int
|
||||
rpposix_memalign(void **memptr, size_t alignment, size_t size);
|
||||
|
||||
//! Query the usable size of the given memory block (from given pointer to the end of block)
|
||||
RPMALLOC_EXPORT size_t
|
||||
extern size_t
|
||||
rpmalloc_usable_size(void* ptr);
|
||||
|
||||
}
|
||||
|
||||
@@ -4,30 +4,14 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# include "TracyApi.h"
|
||||
# include "TracyForceInline.hpp"
|
||||
# include "../client/tracy_rpmalloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
TRACY_API void InitRpmalloc();
|
||||
#endif
|
||||
|
||||
static inline void* tracy_malloc( size_t size )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
InitRpmalloc();
|
||||
return rpmalloc( size );
|
||||
#else
|
||||
return malloc( size );
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void* tracy_malloc_fast( size_t size )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
return rpmalloc( size );
|
||||
#else
|
||||
@@ -38,32 +22,12 @@ static inline void* tracy_malloc_fast( size_t size )
|
||||
static inline void tracy_free( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
InitRpmalloc();
|
||||
rpfree( ptr );
|
||||
#else
|
||||
free( ptr );
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void tracy_free_fast( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
rpfree( ptr );
|
||||
#else
|
||||
free( ptr );
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void* tracy_realloc( void* ptr, size_t size )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
InitRpmalloc();
|
||||
return rprealloc( ptr, size );
|
||||
#else
|
||||
return realloc( ptr, size );
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,16 +1,14 @@
|
||||
#ifndef __TRACYAPI_H__
|
||||
#define __TRACYAPI_H__
|
||||
|
||||
#if defined _WIN32
|
||||
# if defined TRACY_EXPORTS
|
||||
# define TRACY_API __declspec(dllexport)
|
||||
# elif defined TRACY_IMPORTS
|
||||
# define TRACY_API __declspec(dllimport)
|
||||
# else
|
||||
# define TRACY_API
|
||||
# endif
|
||||
#else
|
||||
# define TRACY_API __attribute__((visibility("default")))
|
||||
#endif
|
||||
|
||||
#endif // __TRACYAPI_H__
|
||||
#ifndef __TRACYAPI_H__
|
||||
#define __TRACYAPI_H__
|
||||
|
||||
#ifdef _WIN32
|
||||
# if defined TRACY_IMPORTS
|
||||
# define TRACY_API __declspec(dllimport)
|
||||
# else
|
||||
# define TRACY_API __declspec(dllexport)
|
||||
# endif
|
||||
#else
|
||||
# define TRACY_API __attribute__((visibility("default")))
|
||||
#endif
|
||||
|
||||
#endif // __TRACYAPI_H__
|
||||
|
||||
@@ -10,6 +10,15 @@ namespace tracy
|
||||
using TracyMutex = std::shared_mutex;
|
||||
}
|
||||
|
||||
#elif defined __CYGWIN__
|
||||
|
||||
#include "tracy_benaphore.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
using TracyMutex = NonRecursiveBenaphore;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <mutex>
|
||||
|
||||
@@ -9,8 +9,8 @@ namespace tracy
|
||||
|
||||
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
|
||||
|
||||
enum : uint32_t { ProtocolVersion = 57 };
|
||||
enum : uint16_t { BroadcastVersion = 2 };
|
||||
enum : uint32_t { ProtocolVersion = 25 };
|
||||
enum : uint32_t { BroadcastVersion = 0 };
|
||||
|
||||
using lz4sz_t = uint32_t;
|
||||
|
||||
@@ -36,7 +36,6 @@ enum { WelcomeMessageHostInfoSize = 1024 };
|
||||
|
||||
#pragma pack( 1 )
|
||||
|
||||
// Must increase left query space after handling!
|
||||
enum ServerQuery : uint8_t
|
||||
{
|
||||
ServerQueryTerminate,
|
||||
@@ -44,53 +43,22 @@ enum ServerQuery : uint8_t
|
||||
ServerQueryThreadString,
|
||||
ServerQuerySourceLocation,
|
||||
ServerQueryPlotName,
|
||||
ServerQueryFrameName,
|
||||
ServerQueryParameter,
|
||||
ServerQueryFiberName,
|
||||
// Items above are high priority. Split order must be preserved. See IsQueryPrio().
|
||||
ServerQueryDisconnect,
|
||||
ServerQueryCallstackFrame,
|
||||
ServerQueryFrameName,
|
||||
ServerQueryDisconnect,
|
||||
ServerQueryExternalName,
|
||||
ServerQuerySymbol,
|
||||
ServerQuerySymbolCode,
|
||||
ServerQueryCodeLocation,
|
||||
ServerQuerySourceCode,
|
||||
ServerQueryDataTransfer,
|
||||
ServerQueryDataTransferPart
|
||||
ServerQueryParameter
|
||||
};
|
||||
|
||||
struct ServerQueryPacket
|
||||
{
|
||||
ServerQuery type;
|
||||
uint64_t ptr;
|
||||
uint32_t extra;
|
||||
};
|
||||
|
||||
enum { ServerQueryPacketSize = sizeof( ServerQueryPacket ) };
|
||||
|
||||
|
||||
enum CpuArchitecture : uint8_t
|
||||
{
|
||||
CpuArchUnknown,
|
||||
CpuArchX86,
|
||||
CpuArchX64,
|
||||
CpuArchArm32,
|
||||
CpuArchArm64
|
||||
};
|
||||
|
||||
|
||||
struct WelcomeFlag
|
||||
{
|
||||
enum _t : uint8_t
|
||||
{
|
||||
OnDemand = 1 << 0,
|
||||
IsApple = 1 << 1,
|
||||
CodeTransfer = 1 << 2,
|
||||
CombineSamples = 1 << 3,
|
||||
IdentifySamples = 1 << 4,
|
||||
};
|
||||
};
|
||||
|
||||
struct WelcomeMessage
|
||||
{
|
||||
double timerMul;
|
||||
@@ -99,13 +67,9 @@ struct WelcomeMessage
|
||||
uint64_t delay;
|
||||
uint64_t resolution;
|
||||
uint64_t epoch;
|
||||
uint64_t exectime;
|
||||
uint64_t pid;
|
||||
int64_t samplingPeriod;
|
||||
uint8_t flags;
|
||||
uint8_t cpuArch;
|
||||
char cpuManufacturer[12];
|
||||
uint32_t cpuId;
|
||||
uint8_t onDemand;
|
||||
uint8_t isApple;
|
||||
char programName[WelcomeMessageProgramNameSize];
|
||||
char hostInfo[WelcomeMessageHostInfoSize];
|
||||
};
|
||||
@@ -124,10 +88,9 @@ enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) };
|
||||
|
||||
struct BroadcastMessage
|
||||
{
|
||||
uint16_t broadcastVersion;
|
||||
uint16_t listenPort;
|
||||
uint32_t broadcastVersion;
|
||||
uint32_t protocolVersion;
|
||||
int32_t activeTime; // in seconds
|
||||
uint32_t activeTime; // in seconds
|
||||
char programName[WelcomeMessageProgramNameSize];
|
||||
};
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef __TRACYQUEUE_HPP__
|
||||
#define __TRACYQUEUE_HPP__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
@@ -18,11 +17,9 @@ enum class QueueType : uint8_t
|
||||
MessageAppInfo,
|
||||
ZoneBeginAllocSrcLoc,
|
||||
ZoneBeginAllocSrcLocCallstack,
|
||||
CallstackSerial,
|
||||
CallstackMemory,
|
||||
Callstack,
|
||||
CallstackAlloc,
|
||||
CallstackSample,
|
||||
CallstackSampleContextSwitch,
|
||||
FrameImage,
|
||||
ZoneBegin,
|
||||
ZoneBeginCallstack,
|
||||
@@ -33,46 +30,26 @@ enum class QueueType : uint8_t
|
||||
LockSharedWait,
|
||||
LockSharedObtain,
|
||||
LockSharedRelease,
|
||||
LockName,
|
||||
MemAlloc,
|
||||
MemAllocNamed,
|
||||
MemFree,
|
||||
MemFreeNamed,
|
||||
MemAllocCallstack,
|
||||
MemAllocCallstackNamed,
|
||||
MemFreeCallstack,
|
||||
MemFreeCallstackNamed,
|
||||
GpuZoneBegin,
|
||||
GpuZoneBeginCallstack,
|
||||
GpuZoneBeginAllocSrcLoc,
|
||||
GpuZoneBeginAllocSrcLocCallstack,
|
||||
GpuZoneEnd,
|
||||
GpuZoneBeginSerial,
|
||||
GpuZoneBeginCallstackSerial,
|
||||
GpuZoneBeginAllocSrcLocSerial,
|
||||
GpuZoneBeginAllocSrcLocCallstackSerial,
|
||||
GpuZoneEndSerial,
|
||||
PlotData,
|
||||
ContextSwitch,
|
||||
ThreadWakeup,
|
||||
GpuTime,
|
||||
GpuContextName,
|
||||
CallstackFrameSize,
|
||||
SymbolInformation,
|
||||
CodeInformation,
|
||||
ExternalNameMetadata,
|
||||
SymbolCodeMetadata,
|
||||
FiberEnter,
|
||||
FiberLeave,
|
||||
Terminate,
|
||||
KeepAlive,
|
||||
ThreadContext,
|
||||
GpuCalibration,
|
||||
Crash,
|
||||
CrashReport,
|
||||
ZoneValidation,
|
||||
ZoneColor,
|
||||
ZoneValue,
|
||||
FrameMarkMsg,
|
||||
FrameMarkMsgStart,
|
||||
FrameMarkMsgEnd,
|
||||
@@ -85,26 +62,16 @@ enum class QueueType : uint8_t
|
||||
MessageLiteralCallstack,
|
||||
MessageLiteralColorCallstack,
|
||||
GpuNewContext,
|
||||
CallstackFrameSize,
|
||||
CallstackFrame,
|
||||
SysTimeReport,
|
||||
TidToPid,
|
||||
HwSampleCpuCycle,
|
||||
HwSampleInstructionRetired,
|
||||
HwSampleCacheReference,
|
||||
HwSampleCacheMiss,
|
||||
HwSampleBranchRetired,
|
||||
HwSampleBranchMiss,
|
||||
PlotConfig,
|
||||
ParamSetup,
|
||||
AckServerQueryNoop,
|
||||
AckSourceCodeNotAvailable,
|
||||
AckSymbolCodeNotAvailable,
|
||||
CpuTopology,
|
||||
SingleStringData,
|
||||
SecondStringData,
|
||||
MemNamePayload,
|
||||
StringData,
|
||||
ThreadName,
|
||||
CustomStringData,
|
||||
PlotName,
|
||||
SourceLocationPayload,
|
||||
CallstackPayload,
|
||||
@@ -113,9 +80,6 @@ enum class QueueType : uint8_t
|
||||
FrameImageData,
|
||||
ExternalName,
|
||||
ExternalThreadName,
|
||||
SymbolCode,
|
||||
SourceCode,
|
||||
FiberName,
|
||||
NUM_TYPES
|
||||
};
|
||||
|
||||
@@ -123,66 +87,25 @@ enum class QueueType : uint8_t
|
||||
|
||||
struct QueueThreadContext
|
||||
{
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
};
|
||||
|
||||
struct QueueZoneBeginLean
|
||||
struct QueueZoneBegin
|
||||
{
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueZoneBegin : public QueueZoneBeginLean
|
||||
{
|
||||
uint64_t srcloc; // ptr
|
||||
};
|
||||
|
||||
struct QueueZoneBeginThread : public QueueZoneBegin
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueZoneEnd
|
||||
{
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueZoneEndThread : public QueueZoneEnd
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueZoneValidation
|
||||
{
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
struct QueueZoneValidationThread : public QueueZoneValidation
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueZoneColor
|
||||
{
|
||||
uint8_t r;
|
||||
uint8_t g;
|
||||
uint8_t b;
|
||||
};
|
||||
|
||||
struct QueueZoneColorThread : public QueueZoneColor
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueZoneValue
|
||||
{
|
||||
uint64_t value;
|
||||
};
|
||||
|
||||
struct QueueZoneValueThread : public QueueZoneValue
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueStringTransfer
|
||||
{
|
||||
uint64_t ptr;
|
||||
@@ -196,17 +119,13 @@ struct QueueFrameMark
|
||||
|
||||
struct QueueFrameImage
|
||||
{
|
||||
uint32_t frame;
|
||||
uint64_t image; // ptr
|
||||
uint64_t frame;
|
||||
uint16_t w;
|
||||
uint16_t h;
|
||||
uint8_t flip;
|
||||
};
|
||||
|
||||
struct QueueFrameImageFat : public QueueFrameImage
|
||||
{
|
||||
uint64_t image; // ptr
|
||||
};
|
||||
|
||||
struct QueueSourceLocation
|
||||
{
|
||||
uint64_t name;
|
||||
@@ -218,15 +137,9 @@ struct QueueSourceLocation
|
||||
uint8_t b;
|
||||
};
|
||||
|
||||
struct QueueZoneTextFat
|
||||
struct QueueZoneText
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct QueueZoneTextFatThread : public QueueZoneTextFat
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
enum class LockType : uint8_t
|
||||
@@ -243,64 +156,42 @@ struct QueueLockAnnounce
|
||||
LockType type;
|
||||
};
|
||||
|
||||
struct QueueFiberEnter
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t fiber; // ptr
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueFiberLeave
|
||||
{
|
||||
int64_t time;
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueLockTerminate
|
||||
{
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
LockType type;
|
||||
};
|
||||
|
||||
struct QueueLockWait
|
||||
{
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
LockType type;
|
||||
};
|
||||
|
||||
struct QueueLockObtain
|
||||
{
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueLockRelease
|
||||
{
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueLockMark
|
||||
{
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
uint32_t id;
|
||||
uint64_t srcloc; // ptr
|
||||
};
|
||||
|
||||
struct QueueLockName
|
||||
{
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
struct QueueLockNameFat : public QueueLockName
|
||||
{
|
||||
uint64_t name; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
enum class PlotDataType : uint8_t
|
||||
{
|
||||
Float,
|
||||
@@ -324,6 +215,7 @@ struct QueuePlotData
|
||||
struct QueueMessage
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueMessageColor : public QueueMessage
|
||||
@@ -333,92 +225,29 @@ struct QueueMessageColor : public QueueMessage
|
||||
uint8_t b;
|
||||
};
|
||||
|
||||
struct QueueMessageLiteral : public QueueMessage
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueMessageLiteralThread : public QueueMessageLiteral
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueMessageColorLiteral : public QueueMessageColor
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueMessageColorLiteralThread : public QueueMessageColorLiteral
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueMessageFat : public QueueMessage
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct QueueMessageFatThread : public QueueMessageFat
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueMessageColorFat : public QueueMessageColor
|
||||
{
|
||||
uint64_t text; // ptr
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct QueueMessageColorFatThread : public QueueMessageColorFat
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
// Don't change order, only add new entries at the end, this is also used on trace dumps!
|
||||
enum class GpuContextType : uint8_t
|
||||
{
|
||||
Invalid,
|
||||
OpenGl,
|
||||
Vulkan,
|
||||
OpenCL,
|
||||
Direct3D12,
|
||||
Direct3D11
|
||||
};
|
||||
|
||||
enum GpuContextFlags : uint8_t
|
||||
{
|
||||
GpuContextCalibration = 1 << 0
|
||||
};
|
||||
|
||||
struct QueueGpuNewContext
|
||||
{
|
||||
int64_t cpuTime;
|
||||
int64_t gpuTime;
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
float period;
|
||||
uint8_t context;
|
||||
GpuContextFlags flags;
|
||||
GpuContextType type;
|
||||
uint8_t accuracyBits;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneBeginLean
|
||||
struct QueueGpuZoneBegin
|
||||
{
|
||||
int64_t cpuTime;
|
||||
uint32_t thread;
|
||||
uint64_t srcloc;
|
||||
uint64_t thread;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean
|
||||
{
|
||||
uint64_t srcloc;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneEnd
|
||||
{
|
||||
int64_t cpuTime;
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
uint16_t queryId;
|
||||
uint8_t context;
|
||||
};
|
||||
@@ -430,34 +259,10 @@ struct QueueGpuTime
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuCalibration
|
||||
{
|
||||
int64_t gpuTime;
|
||||
int64_t cpuTime;
|
||||
int64_t cpuDelta;
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuContextName
|
||||
{
|
||||
uint8_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuContextNameFat : public QueueGpuContextName
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint16_t size;
|
||||
};
|
||||
|
||||
struct QueueMemNamePayload
|
||||
{
|
||||
uint64_t name;
|
||||
};
|
||||
|
||||
struct QueueMemAlloc
|
||||
{
|
||||
int64_t time;
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
uint64_t ptr;
|
||||
char size[6];
|
||||
};
|
||||
@@ -465,84 +270,37 @@ struct QueueMemAlloc
|
||||
struct QueueMemFree
|
||||
{
|
||||
int64_t time;
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackFat
|
||||
struct QueueCallstackMemory
|
||||
{
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackFatThread : public QueueCallstackFat
|
||||
struct QueueCallstack
|
||||
{
|
||||
uint32_t thread;
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackAllocFat
|
||||
struct QueueCallstackAlloc
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint64_t nativePtr;
|
||||
};
|
||||
|
||||
struct QueueCallstackAllocFatThread : public QueueCallstackAllocFat
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueCallstackSample
|
||||
{
|
||||
int64_t time;
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueCallstackSampleFat : public QueueCallstackSample
|
||||
{
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueCallstackFrameSize
|
||||
{
|
||||
uint64_t ptr;
|
||||
uint8_t size;
|
||||
};
|
||||
|
||||
struct QueueCallstackFrameSizeFat : public QueueCallstackFrameSize
|
||||
{
|
||||
uint64_t data;
|
||||
uint64_t imageName;
|
||||
};
|
||||
|
||||
struct QueueCallstackFrame
|
||||
{
|
||||
uint64_t name;
|
||||
uint64_t file;
|
||||
uint32_t line;
|
||||
uint64_t symAddr;
|
||||
uint32_t symLen;
|
||||
};
|
||||
|
||||
struct QueueSymbolInformation
|
||||
{
|
||||
uint32_t line;
|
||||
uint64_t symAddr;
|
||||
};
|
||||
|
||||
struct QueueSymbolInformationFat : public QueueSymbolInformation
|
||||
{
|
||||
uint64_t fileString;
|
||||
uint8_t needFree;
|
||||
};
|
||||
|
||||
struct QueueCodeInformation
|
||||
{
|
||||
uint64_t symAddr;
|
||||
uint32_t line;
|
||||
uint64_t ptrOffset;
|
||||
};
|
||||
|
||||
struct QueueCodeInformationFat : public QueueCodeInformation
|
||||
{
|
||||
uint64_t fileString;
|
||||
uint8_t needFree;
|
||||
};
|
||||
|
||||
struct QueueCrashReport
|
||||
@@ -551,11 +309,6 @@ struct QueueCrashReport
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueCrashReportThread
|
||||
{
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueSysTime
|
||||
{
|
||||
int64_t time;
|
||||
@@ -565,8 +318,8 @@ struct QueueSysTime
|
||||
struct QueueContextSwitch
|
||||
{
|
||||
int64_t time;
|
||||
uint32_t oldThread;
|
||||
uint32_t newThread;
|
||||
uint64_t oldThread;
|
||||
uint64_t newThread;
|
||||
uint8_t cpu;
|
||||
uint8_t reason;
|
||||
uint8_t state;
|
||||
@@ -575,7 +328,7 @@ struct QueueContextSwitch
|
||||
struct QueueThreadWakeup
|
||||
{
|
||||
int64_t time;
|
||||
uint32_t thread;
|
||||
uint64_t thread;
|
||||
};
|
||||
|
||||
struct QueueTidToPid
|
||||
@@ -584,12 +337,6 @@ struct QueueTidToPid
|
||||
uint64_t pid;
|
||||
};
|
||||
|
||||
struct QueueHwSample
|
||||
{
|
||||
uint64_t ip;
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
enum class PlotFormatType : uint8_t
|
||||
{
|
||||
Number,
|
||||
@@ -618,20 +365,6 @@ struct QueueCpuTopology
|
||||
uint32_t thread;
|
||||
};
|
||||
|
||||
struct QueueExternalNameMetadata
|
||||
{
|
||||
uint64_t thread;
|
||||
uint64_t name;
|
||||
uint64_t threadName;
|
||||
};
|
||||
|
||||
struct QueueSymbolCodeMetadata
|
||||
{
|
||||
uint64_t symbol;
|
||||
uint64_t ptr;
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
struct QueueHeader
|
||||
{
|
||||
union
|
||||
@@ -648,80 +381,41 @@ struct QueueItem
|
||||
{
|
||||
QueueThreadContext threadCtx;
|
||||
QueueZoneBegin zoneBegin;
|
||||
QueueZoneBeginLean zoneBeginLean;
|
||||
QueueZoneBeginThread zoneBeginThread;
|
||||
QueueZoneEnd zoneEnd;
|
||||
QueueZoneEndThread zoneEndThread;
|
||||
QueueZoneValidation zoneValidation;
|
||||
QueueZoneValidationThread zoneValidationThread;
|
||||
QueueZoneColor zoneColor;
|
||||
QueueZoneColorThread zoneColorThread;
|
||||
QueueZoneValue zoneValue;
|
||||
QueueZoneValueThread zoneValueThread;
|
||||
QueueStringTransfer stringTransfer;
|
||||
QueueFrameMark frameMark;
|
||||
QueueFrameImage frameImage;
|
||||
QueueFrameImageFat frameImageFat;
|
||||
QueueSourceLocation srcloc;
|
||||
QueueZoneTextFat zoneTextFat;
|
||||
QueueZoneTextFatThread zoneTextFatThread;
|
||||
QueueZoneText zoneText;
|
||||
QueueLockAnnounce lockAnnounce;
|
||||
QueueLockTerminate lockTerminate;
|
||||
QueueLockWait lockWait;
|
||||
QueueLockObtain lockObtain;
|
||||
QueueLockRelease lockRelease;
|
||||
QueueLockMark lockMark;
|
||||
QueueLockName lockName;
|
||||
QueueLockNameFat lockNameFat;
|
||||
QueuePlotData plotData;
|
||||
QueueMessage message;
|
||||
QueueMessageColor messageColor;
|
||||
QueueMessageLiteral messageLiteral;
|
||||
QueueMessageLiteralThread messageLiteralThread;
|
||||
QueueMessageColorLiteral messageColorLiteral;
|
||||
QueueMessageColorLiteralThread messageColorLiteralThread;
|
||||
QueueMessageFat messageFat;
|
||||
QueueMessageFatThread messageFatThread;
|
||||
QueueMessageColorFat messageColorFat;
|
||||
QueueMessageColorFatThread messageColorFatThread;
|
||||
QueueGpuNewContext gpuNewContext;
|
||||
QueueGpuZoneBegin gpuZoneBegin;
|
||||
QueueGpuZoneBeginLean gpuZoneBeginLean;
|
||||
QueueGpuZoneEnd gpuZoneEnd;
|
||||
QueueGpuTime gpuTime;
|
||||
QueueGpuCalibration gpuCalibration;
|
||||
QueueGpuContextName gpuContextName;
|
||||
QueueGpuContextNameFat gpuContextNameFat;
|
||||
QueueMemAlloc memAlloc;
|
||||
QueueMemFree memFree;
|
||||
QueueMemNamePayload memName;
|
||||
QueueCallstackFat callstackFat;
|
||||
QueueCallstackFatThread callstackFatThread;
|
||||
QueueCallstackAllocFat callstackAllocFat;
|
||||
QueueCallstackAllocFatThread callstackAllocFatThread;
|
||||
QueueCallstackSample callstackSample;
|
||||
QueueCallstackSampleFat callstackSampleFat;
|
||||
QueueCallstackMemory callstackMemory;
|
||||
QueueCallstack callstack;
|
||||
QueueCallstackAlloc callstackAlloc;
|
||||
QueueCallstackFrameSize callstackFrameSize;
|
||||
QueueCallstackFrameSizeFat callstackFrameSizeFat;
|
||||
QueueCallstackFrame callstackFrame;
|
||||
QueueSymbolInformation symbolInformation;
|
||||
QueueSymbolInformationFat symbolInformationFat;
|
||||
QueueCodeInformation codeInformation;
|
||||
QueueCodeInformationFat codeInformationFat;
|
||||
QueueCrashReport crashReport;
|
||||
QueueCrashReportThread crashReportThread;
|
||||
QueueSysTime sysTime;
|
||||
QueueContextSwitch contextSwitch;
|
||||
QueueThreadWakeup threadWakeup;
|
||||
QueueTidToPid tidToPid;
|
||||
QueueHwSample hwSample;
|
||||
QueuePlotConfig plotConfig;
|
||||
QueueParamSetup paramSetup;
|
||||
QueueCpuTopology cpuTopology;
|
||||
QueueExternalNameMetadata externalNameMetadata;
|
||||
QueueSymbolCodeMetadata symbolCodeMetadata;
|
||||
QueueFiberEnter fiberEnter;
|
||||
QueueFiberLeave fiberLeave;
|
||||
};
|
||||
};
|
||||
#pragma pack()
|
||||
@@ -730,20 +424,18 @@ struct QueueItem
|
||||
enum { QueueItemSize = sizeof( QueueItem ) };
|
||||
|
||||
static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ), // zone text
|
||||
sizeof( QueueHeader ), // zone name
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneText ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneText ), // zone name
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColor ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack
|
||||
sizeof( QueueHeader ), // callstack memory
|
||||
sizeof( QueueHeader ), // callstack
|
||||
sizeof( QueueHeader ), // callstack alloc
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackSample ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), // context switch
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackMemory ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstack ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackAlloc ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameImage ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack
|
||||
@@ -754,47 +446,27 @@ static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockName ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // named
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // named
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial
|
||||
sizeof( QueueHeader ) + sizeof( QueuePlotData ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueContextSwitch ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuTime ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuContextName ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCodeInformation ),
|
||||
sizeof( QueueHeader ), // ExternalNameMetadata - not for wire transfer
|
||||
sizeof( QueueHeader ), // SymbolCodeMetadata - not for wire transfer
|
||||
sizeof( QueueHeader ) + sizeof( QueueFiberEnter ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueFiberLeave ),
|
||||
// above items must be first
|
||||
sizeof( QueueHeader ), // terminate
|
||||
sizeof( QueueHeader ), // keep alive
|
||||
sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
|
||||
sizeof( QueueHeader ), // crash
|
||||
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneColor ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneValue ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // continuous frames
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // start
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // end
|
||||
@@ -802,32 +474,22 @@ static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockTerminate ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockMark ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal, callstack
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSysTime ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueTidToPid ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle
|
||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired
|
||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache reference
|
||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache miss
|
||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch retired
|
||||
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch miss
|
||||
sizeof( QueueHeader ) + sizeof( QueuePlotConfig ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
|
||||
sizeof( QueueHeader ), // server query acknowledgement
|
||||
sizeof( QueueHeader ), // source code not available
|
||||
sizeof( QueueHeader ), // symbol code not available
|
||||
sizeof( QueueHeader ) + sizeof( QueueCpuTopology ),
|
||||
sizeof( QueueHeader ), // single string data
|
||||
sizeof( QueueHeader ), // second string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ),
|
||||
// keep all QueueStringTransfer below
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // custom string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload
|
||||
@@ -836,9 +498,6 @@ static constexpr size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame image data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external thread name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // symbol code
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // source code
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // fiber name
|
||||
};
|
||||
|
||||
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
|
||||
@@ -846,6 +505,6 @@ static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType:
|
||||
static_assert( sizeof( void* ) <= sizeof( uint64_t ), "Pointer size > 8 bytes" );
|
||||
static_assert( sizeof( void* ) == sizeof( uintptr_t ), "Pointer size != uintptr_t" );
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <new>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -8,7 +7,6 @@
|
||||
|
||||
#include "TracyAlloc.hpp"
|
||||
#include "TracySocket.hpp"
|
||||
#include "TracySystem.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifndef NOMINMAX
|
||||
@@ -25,8 +23,6 @@
|
||||
# include <arpa/inet.h>
|
||||
# include <sys/socket.h>
|
||||
# include <sys/param.h>
|
||||
# include <errno.h>
|
||||
# include <fcntl.h>
|
||||
# include <netinet/in.h>
|
||||
# include <netdb.h>
|
||||
# include <unistd.h>
|
||||
@@ -74,7 +70,6 @@ Socket::Socket()
|
||||
, m_bufPtr( nullptr )
|
||||
, m_sock( -1 )
|
||||
, m_bufLeft( 0 )
|
||||
, m_ptr( nullptr )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
InitWinSock();
|
||||
@@ -86,72 +81,21 @@ Socket::Socket( int sock )
|
||||
, m_bufPtr( nullptr )
|
||||
, m_sock( sock )
|
||||
, m_bufLeft( 0 )
|
||||
, m_ptr( nullptr )
|
||||
{
|
||||
}
|
||||
|
||||
Socket::~Socket()
|
||||
{
|
||||
tracy_free( m_buf );
|
||||
if( m_sock.load( std::memory_order_relaxed ) != -1 )
|
||||
if( m_sock != -1 )
|
||||
{
|
||||
Close();
|
||||
}
|
||||
if( m_ptr )
|
||||
{
|
||||
freeaddrinfo( m_res );
|
||||
#ifdef _WIN32
|
||||
closesocket( m_connSock );
|
||||
#else
|
||||
close( m_connSock );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
bool Socket::Connect( const char* addr, uint16_t port )
|
||||
bool Socket::Connect( const char* addr, int port )
|
||||
{
|
||||
assert( !IsValid() );
|
||||
|
||||
if( m_ptr )
|
||||
{
|
||||
const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen );
|
||||
if( c == -1 )
|
||||
{
|
||||
#if defined _WIN32
|
||||
const auto err = WSAGetLastError();
|
||||
if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false;
|
||||
if( err != WSAEISCONN )
|
||||
{
|
||||
freeaddrinfo( m_res );
|
||||
closesocket( m_connSock );
|
||||
m_ptr = nullptr;
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
const auto err = errno;
|
||||
if( err == EALREADY || err == EINPROGRESS ) return false;
|
||||
if( err != EISCONN )
|
||||
{
|
||||
freeaddrinfo( m_res );
|
||||
close( m_connSock );
|
||||
m_ptr = nullptr;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined _WIN32
|
||||
u_long nonblocking = 0;
|
||||
ioctlsocket( m_connSock, FIONBIO, &nonblocking );
|
||||
#else
|
||||
int flags = fcntl( m_connSock, F_GETFL, 0 );
|
||||
fcntl( m_connSock, F_SETFL, flags & ~O_NONBLOCK );
|
||||
#endif
|
||||
m_sock.store( m_connSock, std::memory_order_relaxed );
|
||||
freeaddrinfo( m_res );
|
||||
m_ptr = nullptr;
|
||||
return true;
|
||||
}
|
||||
assert( m_sock == -1 );
|
||||
|
||||
struct addrinfo hints;
|
||||
struct addrinfo *res, *ptr;
|
||||
@@ -161,79 +105,7 @@ bool Socket::Connect( const char* addr, uint16_t port )
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%" PRIu16, port );
|
||||
|
||||
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
|
||||
int sock = 0;
|
||||
for( ptr = res; ptr; ptr = ptr->ai_next )
|
||||
{
|
||||
if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
|
||||
#if defined __APPLE__
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
u_long nonblocking = 1;
|
||||
ioctlsocket( sock, FIONBIO, &nonblocking );
|
||||
#else
|
||||
int flags = fcntl( sock, F_GETFL, 0 );
|
||||
fcntl( sock, F_SETFL, flags | O_NONBLOCK );
|
||||
#endif
|
||||
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == 0 )
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined _WIN32
|
||||
const auto err = WSAGetLastError();
|
||||
if( err != WSAEWOULDBLOCK )
|
||||
{
|
||||
closesocket( sock );
|
||||
continue;
|
||||
}
|
||||
#else
|
||||
if( errno != EINPROGRESS )
|
||||
{
|
||||
close( sock );
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
m_res = res;
|
||||
m_ptr = ptr;
|
||||
m_connSock = sock;
|
||||
return false;
|
||||
}
|
||||
freeaddrinfo( res );
|
||||
if( !ptr ) return false;
|
||||
|
||||
#if defined _WIN32
|
||||
u_long nonblocking = 0;
|
||||
ioctlsocket( sock, FIONBIO, &nonblocking );
|
||||
#else
|
||||
int flags = fcntl( sock, F_GETFL, 0 );
|
||||
fcntl( sock, F_SETFL, flags & ~O_NONBLOCK );
|
||||
#endif
|
||||
|
||||
m_sock.store( sock, std::memory_order_relaxed );
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Socket::ConnectBlocking( const char* addr, uint16_t port )
|
||||
{
|
||||
assert( !IsValid() );
|
||||
assert( !m_ptr );
|
||||
|
||||
struct addrinfo hints;
|
||||
struct addrinfo *res, *ptr;
|
||||
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = AF_UNSPEC;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%" PRIu16, port );
|
||||
sprintf( portbuf, "%i", port );
|
||||
|
||||
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
|
||||
int sock = 0;
|
||||
@@ -258,31 +130,29 @@ bool Socket::ConnectBlocking( const char* addr, uint16_t port )
|
||||
freeaddrinfo( res );
|
||||
if( !ptr ) return false;
|
||||
|
||||
m_sock.store( sock, std::memory_order_relaxed );
|
||||
m_sock = sock;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Socket::Close()
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
assert( sock != -1 );
|
||||
assert( m_sock != -1 );
|
||||
#ifdef _WIN32
|
||||
closesocket( sock );
|
||||
closesocket( m_sock );
|
||||
#else
|
||||
close( sock );
|
||||
close( m_sock );
|
||||
#endif
|
||||
m_sock.store( -1, std::memory_order_relaxed );
|
||||
m_sock = -1;
|
||||
}
|
||||
|
||||
int Socket::Send( const void* _buf, int len )
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
auto buf = (const char*)_buf;
|
||||
assert( sock != -1 );
|
||||
assert( m_sock != -1 );
|
||||
auto start = buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
auto ret = send( sock, buf, len, MSG_NOSIGNAL );
|
||||
auto ret = send( m_sock, buf, len, MSG_NOSIGNAL );
|
||||
if( ret == -1 ) return -1;
|
||||
len -= ret;
|
||||
buf += ret;
|
||||
@@ -292,14 +162,13 @@ int Socket::Send( const void* _buf, int len )
|
||||
|
||||
int Socket::GetSendBufSize()
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
int bufSize;
|
||||
#if defined _WIN32
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
int sz = sizeof( bufSize );
|
||||
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
|
||||
getsockopt( m_sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
|
||||
#else
|
||||
socklen_t sz = sizeof( bufSize );
|
||||
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz );
|
||||
getsockopt( m_sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz );
|
||||
#endif
|
||||
return bufSize;
|
||||
}
|
||||
@@ -336,16 +205,15 @@ int Socket::RecvBuffered( void* buf, int len, int timeout )
|
||||
|
||||
int Socket::Recv( void* _buf, int len, int timeout )
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
auto buf = (char*)_buf;
|
||||
|
||||
struct pollfd fd;
|
||||
fd.fd = (socket_t)sock;
|
||||
fd.fd = (socket_t)m_sock;
|
||||
fd.events = POLLIN;
|
||||
|
||||
if( poll( &fd, 1, timeout ) > 0 )
|
||||
{
|
||||
return recv( sock, buf, len, 0 );
|
||||
return recv( m_sock, buf, len, 0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -353,54 +221,33 @@ int Socket::Recv( void* _buf, int len, int timeout )
|
||||
}
|
||||
}
|
||||
|
||||
int Socket::ReadUpTo( void* _buf, int len, int timeout )
|
||||
bool Socket::Read( void* _buf, int len, int timeout, std::function<bool()> exitCb )
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
auto buf = (char*)_buf;
|
||||
|
||||
int rd = 0;
|
||||
while( len > 0 )
|
||||
{
|
||||
const auto res = recv( sock, buf, len, 0 );
|
||||
if( res == 0 ) break;
|
||||
if( res == -1 ) return -1;
|
||||
len -= res;
|
||||
rd += res;
|
||||
buf += res;
|
||||
}
|
||||
return rd;
|
||||
}
|
||||
|
||||
bool Socket::Read( void* buf, int len, int timeout )
|
||||
{
|
||||
auto cbuf = (char*)buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
if( !ReadImpl( cbuf, len, timeout ) ) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Socket::ReadImpl( char*& buf, int& len, int timeout )
|
||||
{
|
||||
const auto sz = RecvBuffered( buf, len, timeout );
|
||||
switch( sz )
|
||||
{
|
||||
case 0:
|
||||
return false;
|
||||
case -1:
|
||||
if( exitCb() ) return false;
|
||||
const auto sz = RecvBuffered( buf, len, timeout );
|
||||
switch( sz )
|
||||
{
|
||||
case 0:
|
||||
return false;
|
||||
case -1:
|
||||
#ifdef _WIN32
|
||||
{
|
||||
auto err = WSAGetLastError();
|
||||
if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
|
||||
}
|
||||
{
|
||||
auto err = WSAGetLastError();
|
||||
if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
len -= sz;
|
||||
buf += sz;
|
||||
break;
|
||||
break;
|
||||
default:
|
||||
len -= sz;
|
||||
buf += sz;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -419,11 +266,10 @@ bool Socket::ReadRaw( void* _buf, int len, int timeout )
|
||||
|
||||
bool Socket::HasData()
|
||||
{
|
||||
const auto sock = m_sock.load( std::memory_order_relaxed );
|
||||
if( m_bufLeft > 0 ) return true;
|
||||
|
||||
struct pollfd fd;
|
||||
fd.fd = (socket_t)sock;
|
||||
fd.fd = (socket_t)m_sock;
|
||||
fd.events = POLLIN;
|
||||
|
||||
return poll( &fd, 1, 0 ) > 0;
|
||||
@@ -431,7 +277,7 @@ bool Socket::HasData()
|
||||
|
||||
bool Socket::IsValid() const
|
||||
{
|
||||
return m_sock.load( std::memory_order_relaxed ) >= 0;
|
||||
return m_sock >= 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -448,48 +294,25 @@ ListenSocket::~ListenSocket()
|
||||
if( m_sock != -1 ) Close();
|
||||
}
|
||||
|
||||
static int addrinfo_and_socket_for_family( uint16_t port, int ai_family, struct addrinfo** res )
|
||||
{
|
||||
struct addrinfo hints;
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = ai_family;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
#ifndef TRACY_ONLY_LOCALHOST
|
||||
const char* onlyLocalhost = GetEnvVar( "TRACY_ONLY_LOCALHOST" );
|
||||
if( !onlyLocalhost || onlyLocalhost[0] != '1' )
|
||||
{
|
||||
hints.ai_flags = AI_PASSIVE;
|
||||
}
|
||||
#endif
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%" PRIu16, port );
|
||||
if( getaddrinfo( nullptr, portbuf, &hints, res ) != 0 ) return -1;
|
||||
int sock = socket( (*res)->ai_family, (*res)->ai_socktype, (*res)->ai_protocol );
|
||||
if (sock == -1) freeaddrinfo( *res );
|
||||
return sock;
|
||||
}
|
||||
|
||||
bool ListenSocket::Listen( uint16_t port, int backlog )
|
||||
bool ListenSocket::Listen( int port, int backlog )
|
||||
{
|
||||
assert( m_sock == -1 );
|
||||
|
||||
struct addrinfo* res = nullptr;
|
||||
struct addrinfo* res;
|
||||
struct addrinfo hints;
|
||||
|
||||
#if !defined TRACY_ONLY_IPV4 && !defined TRACY_ONLY_LOCALHOST
|
||||
const char* onlyIPv4 = GetEnvVar( "TRACY_ONLY_IPV4" );
|
||||
if( !onlyIPv4 || onlyIPv4[0] != '1' )
|
||||
{
|
||||
m_sock = addrinfo_and_socket_for_family( port, AF_INET6, &res );
|
||||
}
|
||||
#endif
|
||||
if (m_sock == -1)
|
||||
{
|
||||
// IPV6 protocol may not be available/is disabled. Try to create a socket
|
||||
// with the IPV4 protocol
|
||||
m_sock = addrinfo_and_socket_for_family( port, AF_INET, &res );
|
||||
if( m_sock == -1 ) return false;
|
||||
}
|
||||
#if defined _WIN32
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = AF_INET6;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
hints.ai_flags = AI_PASSIVE;
|
||||
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%i", port );
|
||||
|
||||
if( getaddrinfo( nullptr, portbuf, &hints, &res ) != 0 ) return false;
|
||||
|
||||
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
unsigned long val = 0;
|
||||
setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
|
||||
#elif defined BSD
|
||||
@@ -501,8 +324,8 @@ bool ListenSocket::Listen( uint16_t port, int backlog )
|
||||
int val = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
|
||||
#endif
|
||||
if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
|
||||
if( listen( m_sock, backlog ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
|
||||
if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) { freeaddrinfo( res ); return false; }
|
||||
if( listen( m_sock, backlog ) == -1 ) { freeaddrinfo( res ); return false; }
|
||||
freeaddrinfo( res );
|
||||
return true;
|
||||
}
|
||||
@@ -560,7 +383,7 @@ UdpBroadcast::~UdpBroadcast()
|
||||
if( m_sock != -1 ) Close();
|
||||
}
|
||||
|
||||
bool UdpBroadcast::Open( const char* addr, uint16_t port )
|
||||
bool UdpBroadcast::Open( const char* addr, int port )
|
||||
{
|
||||
assert( m_sock == -1 );
|
||||
|
||||
@@ -572,7 +395,7 @@ bool UdpBroadcast::Open( const char* addr, uint16_t port )
|
||||
hints.ai_socktype = SOCK_DGRAM;
|
||||
|
||||
char portbuf[32];
|
||||
sprintf( portbuf, "%" PRIu16, port );
|
||||
sprintf( portbuf, "%i", port );
|
||||
|
||||
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
|
||||
int sock = 0;
|
||||
@@ -583,7 +406,7 @@ bool UdpBroadcast::Open( const char* addr, uint16_t port )
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
unsigned long broadcast = 1;
|
||||
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
|
||||
#else
|
||||
@@ -604,7 +427,6 @@ bool UdpBroadcast::Open( const char* addr, uint16_t port )
|
||||
if( !ptr ) return false;
|
||||
|
||||
m_sock = sock;
|
||||
inet_pton( AF_INET, addr, &m_addr );
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -619,13 +441,13 @@ void UdpBroadcast::Close()
|
||||
m_sock = -1;
|
||||
}
|
||||
|
||||
int UdpBroadcast::Send( uint16_t port, const void* data, int len )
|
||||
int UdpBroadcast::Send( int port, const void* data, int len )
|
||||
{
|
||||
assert( m_sock != -1 );
|
||||
struct sockaddr_in addr;
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_port = htons( port );
|
||||
addr.sin_addr.s_addr = m_addr;
|
||||
addr.sin_addr.s_addr = INADDR_BROADCAST;
|
||||
return sendto( m_sock, (const char*)data, len, MSG_NOSIGNAL, (sockaddr*)&addr, sizeof( addr ) );
|
||||
}
|
||||
|
||||
@@ -641,10 +463,8 @@ IpAddress::~IpAddress()
|
||||
|
||||
void IpAddress::Set( const struct sockaddr& addr )
|
||||
{
|
||||
#if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 )
|
||||
struct sockaddr_in tmp;
|
||||
memcpy( &tmp, &addr, sizeof( tmp ) );
|
||||
auto ai = &tmp;
|
||||
#if __MINGW32__
|
||||
auto ai = (struct sockaddr_in*)&addr;
|
||||
#else
|
||||
auto ai = (const struct sockaddr_in*)&addr;
|
||||
#endif
|
||||
@@ -665,7 +485,7 @@ UdpListen::~UdpListen()
|
||||
if( m_sock != -1 ) Close();
|
||||
}
|
||||
|
||||
bool UdpListen::Listen( uint16_t port )
|
||||
bool UdpListen::Listen( int port )
|
||||
{
|
||||
assert( m_sock == -1 );
|
||||
|
||||
@@ -676,14 +496,14 @@ bool UdpListen::Listen( uint16_t port )
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
unsigned long reuse = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) );
|
||||
#else
|
||||
int reuse = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) );
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
unsigned long broadcast = 1;
|
||||
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
|
||||
#else
|
||||
@@ -729,14 +549,14 @@ void UdpListen::Close()
|
||||
m_sock = -1;
|
||||
}
|
||||
|
||||
const char* UdpListen::Read( size_t& len, IpAddress& addr, int timeout )
|
||||
const char* UdpListen::Read( size_t& len, IpAddress& addr )
|
||||
{
|
||||
static char buf[2048];
|
||||
|
||||
struct pollfd fd;
|
||||
fd.fd = (socket_t)m_sock;
|
||||
fd.events = POLLIN;
|
||||
if( poll( &fd, 1, timeout ) <= 0 ) return nullptr;
|
||||
if( poll( &fd, 1, 10 ) <= 0 ) return nullptr;
|
||||
|
||||
sockaddr sa;
|
||||
socklen_t salen = sizeof( struct sockaddr );
|
||||
|
||||
@@ -1,13 +1,8 @@
|
||||
#ifndef __TRACYSOCKET_HPP__
|
||||
#define __TRACYSOCKET_HPP__
|
||||
|
||||
#include <atomic>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <functional>
|
||||
|
||||
#include "TracyForceInline.hpp"
|
||||
|
||||
struct addrinfo;
|
||||
struct sockaddr;
|
||||
|
||||
namespace tracy
|
||||
@@ -24,28 +19,13 @@ public:
|
||||
Socket( int sock );
|
||||
~Socket();
|
||||
|
||||
bool Connect( const char* addr, uint16_t port );
|
||||
bool ConnectBlocking( const char* addr, uint16_t port );
|
||||
bool Connect( const char* addr, int port );
|
||||
void Close();
|
||||
|
||||
int Send( const void* buf, int len );
|
||||
int GetSendBufSize();
|
||||
|
||||
int ReadUpTo( void* buf, int len, int timeout );
|
||||
bool Read( void* buf, int len, int timeout );
|
||||
|
||||
template<typename ShouldExit>
|
||||
bool Read( void* buf, int len, int timeout, ShouldExit exitCb )
|
||||
{
|
||||
auto cbuf = (char*)buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
if( exitCb() ) return false;
|
||||
if( !ReadImpl( cbuf, len, timeout ) ) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Read( void* buf, int len, int timeout, std::function<bool()> exitCb );
|
||||
bool ReadRaw( void* buf, int len, int timeout );
|
||||
bool HasData();
|
||||
bool IsValid() const;
|
||||
@@ -59,16 +39,10 @@ private:
|
||||
int RecvBuffered( void* buf, int len, int timeout );
|
||||
int Recv( void* buf, int len, int timeout );
|
||||
|
||||
bool ReadImpl( char*& buf, int& len, int timeout );
|
||||
|
||||
char* m_buf;
|
||||
char* m_bufPtr;
|
||||
std::atomic<int> m_sock;
|
||||
int m_sock;
|
||||
int m_bufLeft;
|
||||
|
||||
struct addrinfo *m_res;
|
||||
struct addrinfo *m_ptr;
|
||||
int m_connSock;
|
||||
};
|
||||
|
||||
class ListenSocket
|
||||
@@ -77,7 +51,7 @@ public:
|
||||
ListenSocket();
|
||||
~ListenSocket();
|
||||
|
||||
bool Listen( uint16_t port, int backlog );
|
||||
bool Listen( int port, int backlog );
|
||||
Socket* Accept();
|
||||
void Close();
|
||||
|
||||
@@ -96,10 +70,10 @@ public:
|
||||
UdpBroadcast();
|
||||
~UdpBroadcast();
|
||||
|
||||
bool Open( const char* addr, uint16_t port );
|
||||
bool Open( const char* addr, int port );
|
||||
void Close();
|
||||
|
||||
int Send( uint16_t port, const void* data, int len );
|
||||
int Send( int port, const void* data, int len );
|
||||
|
||||
UdpBroadcast( const UdpBroadcast& ) = delete;
|
||||
UdpBroadcast( UdpBroadcast&& ) = delete;
|
||||
@@ -108,7 +82,6 @@ public:
|
||||
|
||||
private:
|
||||
int m_sock;
|
||||
uint32_t m_addr;
|
||||
};
|
||||
|
||||
class IpAddress
|
||||
@@ -138,10 +111,10 @@ public:
|
||||
UdpListen();
|
||||
~UdpListen();
|
||||
|
||||
bool Listen( uint16_t port );
|
||||
bool Listen( int port );
|
||||
void Close();
|
||||
|
||||
const char* Read( size_t& len, IpAddress& addr, int timeout );
|
||||
const char* Read( size_t& len, IpAddress& addr );
|
||||
|
||||
UdpListen( const UdpListen& ) = delete;
|
||||
UdpListen( UdpListen&& ) = delete;
|
||||
|
||||
@@ -1,122 +0,0 @@
|
||||
#include "TracyStackFrames.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
const char* s_tracyStackFrames_[] = {
|
||||
"tracy::Callstack",
|
||||
"tracy::Callstack(int)",
|
||||
"tracy::GpuCtxScope::{ctor}",
|
||||
"tracy::Profiler::SendCallstack",
|
||||
"tracy::Profiler::SendCallstack(int)",
|
||||
"tracy::Profiler::SendCallstack(int, unsigned long)",
|
||||
"tracy::Profiler::MemAllocCallstack",
|
||||
"tracy::Profiler::MemAllocCallstack(void const*, unsigned long, int)",
|
||||
"tracy::Profiler::MemFreeCallstack",
|
||||
"tracy::Profiler::MemFreeCallstack(void const*, int)",
|
||||
"tracy::ScopedZone::{ctor}",
|
||||
"tracy::ScopedZone::ScopedZone(tracy::SourceLocationData const*, int, bool)",
|
||||
"tracy::Profiler::Message",
|
||||
nullptr
|
||||
};
|
||||
|
||||
const char** s_tracyStackFrames = s_tracyStackFrames_;
|
||||
|
||||
const StringMatch s_tracySkipSubframes_[] = {
|
||||
{ "/include/arm_neon.h", 19 },
|
||||
{ "/include/adxintrin.h", 20 },
|
||||
{ "/include/ammintrin.h", 20 },
|
||||
{ "/include/amxbf16intrin.h", 24 },
|
||||
{ "/include/amxint8intrin.h", 24 },
|
||||
{ "/include/amxtileintrin.h", 24 },
|
||||
{ "/include/avx2intrin.h", 21 },
|
||||
{ "/include/avx5124fmapsintrin.h", 29 },
|
||||
{ "/include/avx5124vnniwintrin.h", 29 },
|
||||
{ "/include/avx512bf16intrin.h", 27 },
|
||||
{ "/include/avx512bf16vlintrin.h", 29 },
|
||||
{ "/include/avx512bitalgintrin.h", 29 },
|
||||
{ "/include/avx512bwintrin.h", 25 },
|
||||
{ "/include/avx512cdintrin.h", 25 },
|
||||
{ "/include/avx512dqintrin.h", 25 },
|
||||
{ "/include/avx512erintrin.h", 25 },
|
||||
{ "/include/avx512fintrin.h", 24 },
|
||||
{ "/include/avx512ifmaintrin.h", 27 },
|
||||
{ "/include/avx512ifmavlintrin.h", 29 },
|
||||
{ "/include/avx512pfintrin.h", 25 },
|
||||
{ "/include/avx512vbmi2intrin.h", 28 },
|
||||
{ "/include/avx512vbmi2vlintrin.h", 30 },
|
||||
{ "/include/avx512vbmiintrin.h", 27 },
|
||||
{ "/include/avx512vbmivlintrin.h", 29 },
|
||||
{ "/include/avx512vlbwintrin.h", 27 },
|
||||
{ "/include/avx512vldqintrin.h", 27 },
|
||||
{ "/include/avx512vlintrin.h", 25 },
|
||||
{ "/include/avx512vnniintrin.h", 27 },
|
||||
{ "/include/avx512vnnivlintrin.h", 29 },
|
||||
{ "/include/avx512vp2intersectintrin.h", 35 },
|
||||
{ "/include/avx512vp2intersectvlintrin.h", 37 },
|
||||
{ "/include/avx512vpopcntdqintrin.h", 32 },
|
||||
{ "/include/avx512vpopcntdqvlintrin.h", 34 },
|
||||
{ "/include/avxintrin.h", 20 },
|
||||
{ "/include/avxvnniintrin.h", 24 },
|
||||
{ "/include/bmi2intrin.h", 21 },
|
||||
{ "/include/bmiintrin.h", 20 },
|
||||
{ "/include/bmmintrin.h", 20 },
|
||||
{ "/include/cetintrin.h", 20 },
|
||||
{ "/include/cldemoteintrin.h", 25 },
|
||||
{ "/include/clflushoptintrin.h", 27 },
|
||||
{ "/include/clwbintrin.h", 21 },
|
||||
{ "/include/clzerointrin.h", 23 },
|
||||
{ "/include/emmintrin.h", 20 },
|
||||
{ "/include/enqcmdintrin.h", 23 },
|
||||
{ "/include/f16cintrin.h", 21 },
|
||||
{ "/include/fma4intrin.h", 21 },
|
||||
{ "/include/fmaintrin.h", 20 },
|
||||
{ "/include/fxsrintrin.h", 21 },
|
||||
{ "/include/gfniintrin.h", 21 },
|
||||
{ "/include/hresetintrin.h", 23 },
|
||||
{ "/include/ia32intrin.h", 21 },
|
||||
{ "/include/immintrin.h", 20 },
|
||||
{ "/include/keylockerintrin.h", 26 },
|
||||
{ "/include/lwpintrin.h", 20 },
|
||||
{ "/include/lzcntintrin.h", 22 },
|
||||
{ "/include/mmintrin.h", 19 },
|
||||
{ "/include/movdirintrin.h", 23 },
|
||||
{ "/include/mwaitxintrin.h", 23 },
|
||||
{ "/include/nmmintrin.h", 20 },
|
||||
{ "/include/pconfigintrin.h", 24 },
|
||||
{ "/include/pkuintrin.h", 20 },
|
||||
{ "/include/pmmintrin.h", 20 },
|
||||
{ "/include/popcntintrin.h", 23 },
|
||||
{ "/include/prfchwintrin.h", 23 },
|
||||
{ "/include/rdseedintrin.h", 23 },
|
||||
{ "/include/rtmintrin.h", 20 },
|
||||
{ "/include/serializeintrin.h", 26 },
|
||||
{ "/include/sgxintrin.h", 20 },
|
||||
{ "/include/shaintrin.h", 20 },
|
||||
{ "/include/smmintrin.h", 20 },
|
||||
{ "/include/tbmintrin.h", 20 },
|
||||
{ "/include/tmmintrin.h", 20 },
|
||||
{ "/include/tsxldtrkintrin.h", 25 },
|
||||
{ "/include/uintrintrin.h", 22 },
|
||||
{ "/include/vaesintrin.h", 21 },
|
||||
{ "/include/vpclmulqdqintrin.h", 27 },
|
||||
{ "/include/waitpkgintrin.h", 24 },
|
||||
{ "/include/wbnoinvdintrin.h", 25 },
|
||||
{ "/include/wmmintrin.h", 20 },
|
||||
{ "/include/x86gprintrin.h", 23 },
|
||||
{ "/include/x86intrin.h", 20 },
|
||||
{ "/include/xmmintrin.h", 20 },
|
||||
{ "/include/xopintrin.h", 20 },
|
||||
{ "/include/xsavecintrin.h", 23 },
|
||||
{ "/include/xsaveintrin.h", 22 },
|
||||
{ "/include/xsaveoptintrin.h", 25 },
|
||||
{ "/include/xsavesintrin.h", 23 },
|
||||
{ "/include/xtestintrin.h", 22 },
|
||||
{ "/bits/atomic_base.h", 19 },
|
||||
{ "/atomic", 7 },
|
||||
{}
|
||||
};
|
||||
|
||||
const StringMatch* s_tracySkipSubframes = s_tracySkipSubframes_;
|
||||
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
#ifndef __TRACYSTACKFRAMES_HPP__
|
||||
#define __TRACYSTACKFRAMES_HPP__
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct StringMatch
|
||||
{
|
||||
const char* str;
|
||||
size_t len;
|
||||
};
|
||||
|
||||
extern const char** s_tracyStackFrames;
|
||||
extern const StringMatch* s_tracySkipSubframes;
|
||||
|
||||
static constexpr int s_tracySkipSubframesMinLen = 7;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,16 +1,13 @@
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(disable:4996)
|
||||
#if defined _MSC_VER || defined __CYGWIN__ || defined _WIN32
|
||||
# ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# endif
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
#endif
|
||||
#if defined _WIN32
|
||||
# ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# endif
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# include <windows.h>
|
||||
# include <malloc.h>
|
||||
# include "TracyUwp.hpp"
|
||||
#else
|
||||
# include <pthread.h>
|
||||
# include <string.h>
|
||||
@@ -18,16 +15,10 @@
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
# ifdef __ANDROID__
|
||||
# include <sys/types.h>
|
||||
# else
|
||||
# include <sys/syscall.h>
|
||||
# ifndef __ANDROID__
|
||||
# include <syscall.h>
|
||||
# endif
|
||||
# include <fcntl.h>
|
||||
#elif defined __FreeBSD__
|
||||
# include <sys/thr.h>
|
||||
#elif defined __NetBSD__ || defined __DragonFly__
|
||||
# include <sys/lwp.h>
|
||||
#endif
|
||||
|
||||
#ifdef __MINGW32__
|
||||
@@ -39,11 +30,6 @@
|
||||
|
||||
#include "TracySystem.hpp"
|
||||
|
||||
#if defined _WIN32
|
||||
extern "C" typedef HRESULT (WINAPI *t_SetThreadDescription)( HANDLE, PCWSTR );
|
||||
extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* );
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# include <atomic>
|
||||
# include "TracyAlloc.hpp"
|
||||
@@ -52,135 +38,75 @@ extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* );
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
TRACY_API uint32_t GetThreadHandleImpl()
|
||||
{
|
||||
#if defined _WIN32
|
||||
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint32_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint32_t( GetCurrentThreadId() );
|
||||
#elif defined __APPLE__
|
||||
uint64_t id;
|
||||
pthread_threadid_np( pthread_self(), &id );
|
||||
return uint32_t( id );
|
||||
#elif defined __ANDROID__
|
||||
return (uint32_t)gettid();
|
||||
#elif defined __linux__
|
||||
return (uint32_t)syscall( SYS_gettid );
|
||||
#elif defined __FreeBSD__
|
||||
long id;
|
||||
thr_self( &id );
|
||||
return id;
|
||||
#elif defined __NetBSD__
|
||||
return _lwp_self();
|
||||
#elif defined __DragonFly__
|
||||
return lwp_gettid();
|
||||
#elif defined __OpenBSD__
|
||||
return getthrid();
|
||||
#else
|
||||
// To add support for a platform, retrieve and return the kernel thread identifier here.
|
||||
//
|
||||
// Note that pthread_t (as for example returned by pthread_self()) is *not* a kernel
|
||||
// thread identifier. It is a pointer to a library-allocated data structure instead.
|
||||
// Such pointers will be reused heavily, making the pthread_t non-unique. Additionally
|
||||
// a 64-bit pointer cannot be reliably truncated to 32 bits.
|
||||
#error "Unsupported platform!"
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
struct ThreadNameData
|
||||
{
|
||||
uint32_t id;
|
||||
uint64_t id;
|
||||
const char* name;
|
||||
ThreadNameData* next;
|
||||
};
|
||||
std::atomic<ThreadNameData*>& GetThreadNameData();
|
||||
TRACY_API std::atomic<ThreadNameData*>& GetThreadNameData();
|
||||
TRACY_API void InitRPMallocThread();
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma pack( push, 8 )
|
||||
struct THREADNAME_INFO
|
||||
void SetThreadName( const char* name )
|
||||
{
|
||||
DWORD dwType;
|
||||
LPCSTR szName;
|
||||
DWORD dwThreadID;
|
||||
DWORD dwFlags;
|
||||
};
|
||||
# pragma pack(pop)
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# if defined NTDDI_WIN10_RS2 && NTDDI_VERSION >= NTDDI_WIN10_RS2
|
||||
wchar_t buf[256];
|
||||
mbstowcs( buf, name, 256 );
|
||||
SetThreadDescription( GetCurrentThread(), buf );
|
||||
# elif defined _MSC_VER
|
||||
const DWORD MS_VC_EXCEPTION=0x406D1388;
|
||||
# pragma pack( push, 8 )
|
||||
struct THREADNAME_INFO
|
||||
{
|
||||
DWORD dwType;
|
||||
LPCSTR szName;
|
||||
DWORD dwThreadID;
|
||||
DWORD dwFlags;
|
||||
};
|
||||
# pragma pack(pop)
|
||||
|
||||
DWORD ThreadId = GetCurrentThreadId();
|
||||
THREADNAME_INFO info;
|
||||
info.dwType = 0x1000;
|
||||
info.szName = name;
|
||||
info.dwThreadID = ThreadId;
|
||||
info.dwFlags = 0;
|
||||
|
||||
void ThreadNameMsvcMagic( const THREADNAME_INFO& info )
|
||||
{
|
||||
__try
|
||||
{
|
||||
RaiseException( 0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
|
||||
RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
|
||||
}
|
||||
__except(EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TRACY_API void SetThreadName( const char* name )
|
||||
{
|
||||
#if defined _WIN32
|
||||
# ifdef TRACY_UWP
|
||||
static auto _SetThreadDescription = &::SetThreadDescription;
|
||||
# else
|
||||
static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" );
|
||||
# endif
|
||||
if( _SetThreadDescription )
|
||||
{
|
||||
wchar_t buf[256];
|
||||
mbstowcs( buf, name, 256 );
|
||||
_SetThreadDescription( GetCurrentThread(), buf );
|
||||
}
|
||||
else
|
||||
{
|
||||
# if defined _MSC_VER
|
||||
THREADNAME_INFO info;
|
||||
info.dwType = 0x1000;
|
||||
info.szName = name;
|
||||
info.dwThreadID = GetCurrentThreadId();
|
||||
info.dwFlags = 0;
|
||||
ThreadNameMsvcMagic( info );
|
||||
# endif
|
||||
}
|
||||
#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__
|
||||
#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__ && !defined __CYGWIN__
|
||||
{
|
||||
const auto sz = strlen( name );
|
||||
if( sz <= 15 )
|
||||
{
|
||||
#if defined __APPLE__
|
||||
pthread_setname_np( name );
|
||||
#else
|
||||
pthread_setname_np( pthread_self(), name );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
char buf[16];
|
||||
memcpy( buf, name, 15 );
|
||||
buf[15] = '\0';
|
||||
#if defined __APPLE__
|
||||
pthread_setname_np( buf );
|
||||
#else
|
||||
pthread_setname_np( pthread_self(), buf );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef TRACY_ENABLE
|
||||
{
|
||||
InitRPMallocThread();
|
||||
const auto sz = strlen( name );
|
||||
char* buf = (char*)tracy_malloc( sz+1 );
|
||||
memcpy( buf, name, sz );
|
||||
buf[sz] = '\0';
|
||||
auto data = (ThreadNameData*)tracy_malloc_fast( sizeof( ThreadNameData ) );
|
||||
buf[sz+1] = '\0';
|
||||
auto data = (ThreadNameData*)tracy_malloc( sizeof( ThreadNameData ) );
|
||||
data->id = detail::GetThreadHandleImpl();
|
||||
data->name = buf;
|
||||
data->next = GetThreadNameData().load( std::memory_order_relaxed );
|
||||
@@ -189,7 +115,7 @@ TRACY_API void SetThreadName( const char* name )
|
||||
#endif
|
||||
}
|
||||
|
||||
TRACY_API const char* GetThreadName( uint32_t id )
|
||||
const char* GetThreadName( uint64_t id )
|
||||
{
|
||||
static char buf[256];
|
||||
#ifdef TRACY_ENABLE
|
||||
@@ -203,27 +129,21 @@ TRACY_API const char* GetThreadName( uint32_t id )
|
||||
ptr = ptr->next;
|
||||
}
|
||||
#else
|
||||
# if defined _WIN32
|
||||
# ifdef TRACY_UWP
|
||||
static auto _GetThreadDescription = &::GetThreadDescription;
|
||||
# else
|
||||
static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
|
||||
# endif
|
||||
if( _GetThreadDescription )
|
||||
# if defined _WIN32 || defined __CYGWIN__
|
||||
# if defined NTDDI_WIN10_RS2 && NTDDI_VERSION >= NTDDI_WIN10_RS2
|
||||
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
|
||||
if( hnd != 0 )
|
||||
{
|
||||
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
|
||||
if( hnd != 0 )
|
||||
PWSTR tmp;
|
||||
GetThreadDescription( hnd, &tmp );
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
CloseHandle( hnd );
|
||||
if( ret != 0 )
|
||||
{
|
||||
PWSTR tmp;
|
||||
_GetThreadDescription( hnd, &tmp );
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
CloseHandle( hnd );
|
||||
if( ret != 0 )
|
||||
{
|
||||
return buf;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
# elif defined __linux__
|
||||
int cs, fd;
|
||||
char path[32];
|
||||
@@ -233,7 +153,7 @@ TRACY_API const char* GetThreadName( uint32_t id )
|
||||
int tid = (int) syscall( SYS_gettid );
|
||||
# endif
|
||||
snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", tid );
|
||||
sprintf( buf, "%" PRIu32, id );
|
||||
sprintf( buf, "%" PRIu64, id );
|
||||
# ifndef __ANDROID__
|
||||
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs );
|
||||
# endif
|
||||
@@ -255,50 +175,8 @@ TRACY_API const char* GetThreadName( uint32_t id )
|
||||
return buf;
|
||||
# endif
|
||||
#endif
|
||||
sprintf( buf, "%" PRIu32, id );
|
||||
sprintf( buf, "%" PRIu64, id );
|
||||
return buf;
|
||||
}
|
||||
|
||||
TRACY_API const char* GetEnvVar( const char* name )
|
||||
{
|
||||
#if defined _WIN32
|
||||
// unfortunately getenv() on Windows is just fundamentally broken. It caches the entire
|
||||
// environment block once on startup, then never refreshes it again. If any environment
|
||||
// strings are added or modified after startup of the CRT, those changes will not be
|
||||
// seen by getenv(). This removes the possibility of an app using this SDK from
|
||||
// programmatically setting any of the behaviour controlling envvars here.
|
||||
//
|
||||
// To work around this, we'll instead go directly to the Win32 environment strings APIs
|
||||
// to get the current value.
|
||||
static char buffer[1024];
|
||||
DWORD const kBufferSize = DWORD(sizeof(buffer) / sizeof(buffer[0]));
|
||||
DWORD count = GetEnvironmentVariableA(name, buffer, kBufferSize);
|
||||
|
||||
if( count == 0 )
|
||||
return nullptr;
|
||||
|
||||
if( count >= kBufferSize )
|
||||
{
|
||||
char* buf = reinterpret_cast<char*>(_alloca(count + 1));
|
||||
count = GetEnvironmentVariableA(name, buf, count + 1);
|
||||
memcpy(buffer, buf, kBufferSize);
|
||||
buffer[kBufferSize - 1] = 0;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
#else
|
||||
return getenv(name);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,6 +1,29 @@
|
||||
#ifndef __TRACYSYSTEM_HPP__
|
||||
#define __TRACYSYSTEM_HPP__
|
||||
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
# ifndef _WINDOWS_
|
||||
extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
|
||||
# endif
|
||||
#elif defined __APPLE__ || ( !defined __ANDROID__ && !defined __linux__ )
|
||||
# include <pthread.h>
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
# include <unistd.h>
|
||||
# ifdef __ANDROID__
|
||||
# include <sys/types.h>
|
||||
# else
|
||||
# include <sys/syscall.h>
|
||||
# endif
|
||||
#elif defined __FreeBSD__
|
||||
# include <sys/thr.h>
|
||||
#elif defined __NetBSD__ || defined __DragonFly__
|
||||
# include <sys/lwp.h>
|
||||
#elif defined __OpenBSD__
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "TracyApi.h"
|
||||
@@ -10,22 +33,47 @@ namespace tracy
|
||||
|
||||
namespace detail
|
||||
{
|
||||
TRACY_API uint32_t GetThreadHandleImpl();
|
||||
static inline uint64_t GetThreadHandleImpl()
|
||||
{
|
||||
#if defined _WIN32 || defined __CYGWIN__
|
||||
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( GetCurrentThreadId() );
|
||||
#elif defined __APPLE__
|
||||
uint64_t id;
|
||||
pthread_threadid_np( pthread_self(), &id );
|
||||
return id;
|
||||
#elif defined __ANDROID__
|
||||
return (uint64_t)gettid();
|
||||
#elif defined __linux__
|
||||
return (uint64_t)syscall( SYS_gettid );
|
||||
#elif defined __FreeBSD__
|
||||
long id;
|
||||
thr_self( &id );
|
||||
return id;
|
||||
#elif defined __NetBSD__
|
||||
return _lwp_self();
|
||||
#elif defined __DragonFly__
|
||||
return lwp_gettid();
|
||||
#elif defined __OpenBSD__
|
||||
return getthrid();
|
||||
#else
|
||||
static_assert( sizeof( decltype( pthread_self() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( pthread_self() );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
TRACY_API uint32_t GetThreadHandle();
|
||||
TRACY_API uint64_t GetThreadHandle();
|
||||
#else
|
||||
static inline uint32_t GetThreadHandle()
|
||||
static inline uint64_t GetThreadHandle()
|
||||
{
|
||||
return detail::GetThreadHandleImpl();
|
||||
}
|
||||
#endif
|
||||
|
||||
TRACY_API void SetThreadName( const char* name );
|
||||
TRACY_API const char* GetThreadName( uint32_t id );
|
||||
|
||||
TRACY_API const char* GetEnvVar(const char* name);
|
||||
void SetThreadName( const char* name );
|
||||
const char* GetThreadName( uint64_t id );
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
#ifndef __TRACYUWP_HPP__
|
||||
#define __TRACYUWP_HPP__
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <winapifamily.h>
|
||||
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
|
||||
# define TRACY_UWP
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -7,7 +7,7 @@
|
||||
# include <thread>
|
||||
#endif
|
||||
|
||||
#include "TracyForceInline.hpp"
|
||||
#include "../common/TracyForceInline.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
@@ -16,8 +16,6 @@ static tracy_force_inline void YieldThread()
|
||||
{
|
||||
#if defined __SSE2__ || defined _M_AMD64 || _M_IX86_FP == 2
|
||||
_mm_pause();
|
||||
#elif defined __aarch64__
|
||||
asm volatile( "isb" : : );
|
||||
#else
|
||||
std::this_thread::yield();
|
||||
#endif
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
# Extract the actual list of source files from a sibling Visual Studio project.
|
||||
|
||||
# Ensure these are simply-substituted variables, without changing their values.
|
||||
SRC := $(SRC)
|
||||
SRC2 := $(SRC2)
|
||||
SRC3 := $(SRC3)
|
||||
SRC4 := $(SRC4)
|
||||
|
||||
# Paths here are relative to the directory in which make was invoked, not to
|
||||
# this file, so ../win32/$(PROJECT).vcxproj refers to the Visual Studio project
|
||||
# of whichever tool is including this makefile fragment.
|
||||
|
||||
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
BASE4 := $(shell egrep 'None.*S"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
|
||||
# The tool-specific makefile may request that certain files be omitted.
|
||||
SRC += $(filter-out $(FILTER),$(BASE))
|
||||
SRC2 += $(filter-out $(FILTER),$(BASE2))
|
||||
SRC3 += $(filter-out $(FILTER),$(BASE3))
|
||||
SRC4 += $(filter-out $(FILTER),$(BASE4))
|
||||
68
common/tracy_benaphore.h
Normal file
68
common/tracy_benaphore.h
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright (c) 2015 Jeff Preshing
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#ifndef __TRACY_CPP11OM_BENAPHORE_H__
|
||||
#define __TRACY_CPP11OM_BENAPHORE_H__
|
||||
|
||||
#include <cassert>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include "tracy_sema.h"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class NonRecursiveBenaphore
|
||||
{
|
||||
private:
|
||||
std::atomic<int> m_contentionCount;
|
||||
DefaultSemaphoreType m_sema;
|
||||
|
||||
public:
|
||||
NonRecursiveBenaphore() : m_contentionCount(0) {}
|
||||
|
||||
void lock()
|
||||
{
|
||||
if (m_contentionCount.fetch_add(1, std::memory_order_acquire) > 0)
|
||||
{
|
||||
m_sema.wait();
|
||||
}
|
||||
}
|
||||
|
||||
bool try_lock()
|
||||
{
|
||||
if (m_contentionCount.load(std::memory_order_relaxed) != 0)
|
||||
return false;
|
||||
int expected = 0;
|
||||
return m_contentionCount.compare_exchange_strong(expected, 1, std::memory_order_acquire);
|
||||
}
|
||||
|
||||
void unlock()
|
||||
{
|
||||
int oldCount = m_contentionCount.fetch_sub(1, std::memory_order_release);
|
||||
assert(oldCount > 0);
|
||||
if (oldCount > 1)
|
||||
{
|
||||
m_sema.signal();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // __CPP11OM_BENAPHORE_H__
|
||||
File diff suppressed because it is too large
Load Diff
@@ -33,6 +33,7 @@
|
||||
- LZ4 source repository : https://github.com/lz4/lz4
|
||||
*/
|
||||
|
||||
|
||||
#ifndef TRACY_LZ4_H_2983827168210
|
||||
#define TRACY_LZ4_H_2983827168210
|
||||
|
||||
@@ -40,11 +41,13 @@
|
||||
#include <stddef.h> /* size_t */
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/**
|
||||
Introduction
|
||||
|
||||
LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
|
||||
LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core,
|
||||
scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
|
||||
multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
|
||||
|
||||
@@ -56,19 +59,16 @@
|
||||
- unbounded multiple steps (described as Streaming compression)
|
||||
|
||||
lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
|
||||
Decompressing such a compressed block requires additional metadata.
|
||||
Exact metadata depends on exact decompression function.
|
||||
For the typical case of LZ4_decompress_safe(),
|
||||
metadata includes block's compressed size, and maximum bound of decompressed size.
|
||||
Decompressing a block requires additional metadata, such as its compressed size.
|
||||
Each application is free to encode and pass such metadata in whichever way it wants.
|
||||
|
||||
lz4.h only handle blocks, it can not generate Frames.
|
||||
|
||||
Blocks are different from Frames (doc/lz4_Frame_format.md).
|
||||
Frames bundle both blocks and metadata in a specified manner.
|
||||
Embedding metadata is required for compressed data to be self-contained and portable.
|
||||
This are required for compressed data to be self-contained and portable.
|
||||
Frame format is delivered through a companion API, declared in lz4frame.h.
|
||||
The `lz4` CLI can only manage frames.
|
||||
Note that the `lz4` CLI can only manage frames.
|
||||
*/
|
||||
|
||||
/*^***************************************************************
|
||||
@@ -98,7 +98,7 @@
|
||||
/*------ Version ------*/
|
||||
#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
|
||||
#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */
|
||||
#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
|
||||
#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
|
||||
|
||||
#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
|
||||
|
||||
@@ -107,9 +107,6 @@
|
||||
#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
|
||||
#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */
|
||||
LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */
|
||||
|
||||
@@ -125,7 +122,7 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string;
|
||||
* Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
|
||||
*/
|
||||
#ifndef LZ4_MEMORY_USAGE
|
||||
# define LZ4_MEMORY_USAGE 14
|
||||
# define LZ4_MEMORY_USAGE 12
|
||||
#endif
|
||||
|
||||
|
||||
@@ -133,35 +130,29 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string;
|
||||
* Simple Functions
|
||||
**************************************/
|
||||
/*! LZ4_compress_default() :
|
||||
* Compresses 'srcSize' bytes from buffer 'src'
|
||||
* into already allocated 'dst' buffer of size 'dstCapacity'.
|
||||
* Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
|
||||
* It also runs faster, so it's a recommended setting.
|
||||
* If the function cannot compress 'src' into a more limited 'dst' budget,
|
||||
* compression stops *immediately*, and the function result is zero.
|
||||
* In which case, 'dst' content is undefined (invalid).
|
||||
* srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
|
||||
* dstCapacity : size of buffer 'dst' (which must be already allocated)
|
||||
* @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
|
||||
* or 0 if compression fails
|
||||
* Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
|
||||
*/
|
||||
Compresses 'srcSize' bytes from buffer 'src'
|
||||
into already allocated 'dst' buffer of size 'dstCapacity'.
|
||||
Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
|
||||
It also runs faster, so it's a recommended setting.
|
||||
If the function cannot compress 'src' into a more limited 'dst' budget,
|
||||
compression stops *immediately*, and the function result is zero.
|
||||
In which case, 'dst' content is undefined (invalid).
|
||||
srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
|
||||
dstCapacity : size of buffer 'dst' (which must be already allocated)
|
||||
@return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
|
||||
or 0 if compression fails
|
||||
Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
|
||||
|
||||
/*! LZ4_decompress_safe() :
|
||||
* compressedSize : is the exact complete size of the compressed block.
|
||||
* dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
|
||||
* @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
|
||||
* If destination buffer is not large enough, decoding will stop and output an error code (negative value).
|
||||
* If the source stream is detected malformed, the function will stop decoding and return a negative result.
|
||||
* Note 1 : This function is protected against malicious data packets :
|
||||
* it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
|
||||
* even if the compressed block is maliciously modified to order the decoder to do these actions.
|
||||
* In such case, the decoder stops immediately, and considers the compressed block malformed.
|
||||
* Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
|
||||
* The implementation is free to send / store / derive this information in whichever way is most beneficial.
|
||||
* If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
|
||||
*/
|
||||
compressedSize : is the exact complete size of the compressed block.
|
||||
dstCapacity : is the size of destination buffer, which must be already allocated.
|
||||
@return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
|
||||
If destination buffer is not large enough, decoding will stop and output an error code (negative value).
|
||||
If the source stream is detected malformed, the function will stop decoding and return a negative result.
|
||||
Note : This function is protected against malicious data packets (never writes outside 'dst' buffer, nor read outside 'source' buffer).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
|
||||
|
||||
|
||||
@@ -187,8 +178,7 @@ LZ4LIB_API int LZ4_compressBound(int inputSize);
|
||||
The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
|
||||
It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
|
||||
An acceleration value of "1" is the same as regular LZ4_compress_default()
|
||||
Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
|
||||
Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
|
||||
Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
|
||||
|
||||
@@ -214,18 +204,7 @@ LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* d
|
||||
* New value is necessarily <= input value.
|
||||
* @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
|
||||
* or 0 if compression fails.
|
||||
*
|
||||
* Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
|
||||
* the produced compressed content could, in specific circumstances,
|
||||
* require to be decompressed into a destination buffer larger
|
||||
* by at least 1 byte than the content to decompress.
|
||||
* If an application uses `LZ4_compress_destSize()`,
|
||||
* it's highly recommended to update liblz4 to v1.9.2 or better.
|
||||
* If this can't be done or ensured,
|
||||
* the receiving decompression function should provide
|
||||
* a dstCapacity which is > decompressedSize, by at least 1 byte.
|
||||
* See https://github.com/lz4/lz4/issues/859 for details
|
||||
*/
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
|
||||
|
||||
|
||||
@@ -233,35 +212,25 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt
|
||||
* Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
|
||||
* into destination buffer 'dst' of size 'dstCapacity'.
|
||||
* Up to 'targetOutputSize' bytes will be decoded.
|
||||
* The function stops decoding on reaching this objective.
|
||||
* This can be useful to boost performance
|
||||
* whenever only the beginning of a block is required.
|
||||
* The function stops decoding on reaching this objective,
|
||||
* which can boost performance when only the beginning of a block is required.
|
||||
*
|
||||
* @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
|
||||
* @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
|
||||
* If source stream is detected malformed, function returns a negative result.
|
||||
*
|
||||
* Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
|
||||
* Note : @return can be < targetOutputSize, if compressed block contains less data.
|
||||
*
|
||||
* Note 2 : targetOutputSize must be <= dstCapacity
|
||||
*
|
||||
* Note 3 : this function effectively stops decoding on reaching targetOutputSize,
|
||||
* Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
|
||||
* and expects targetOutputSize <= dstCapacity.
|
||||
* It effectively stops decoding on reaching targetOutputSize,
|
||||
* so dstCapacity is kind of redundant.
|
||||
* This is because in older versions of this function,
|
||||
* decoding operation would still write complete sequences.
|
||||
* Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
|
||||
* This is because in a previous version of this function,
|
||||
* decoding operation would not "break" a sequence in the middle.
|
||||
* As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
|
||||
* it could write more bytes, though only up to dstCapacity.
|
||||
* Some "margin" used to be required for this operation to work properly.
|
||||
* Thankfully, this is no longer necessary.
|
||||
* The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
|
||||
*
|
||||
* Note 4 : If srcSize is the exact size of the block,
|
||||
* then targetOutputSize can be any value,
|
||||
* including larger than the block's decompressed size.
|
||||
* The function will, at most, generate block's decompressed size.
|
||||
*
|
||||
* Note 5 : If srcSize is _larger_ than block's compressed size,
|
||||
* then targetOutputSize **MUST** be <= block's decompressed size.
|
||||
* Otherwise, *silent corruption will occur*.
|
||||
* This is no longer necessary.
|
||||
* The function nonetheless keeps its signature, in an effort to not break API.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
|
||||
|
||||
@@ -420,10 +389,6 @@ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecod
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_H_2983827168210 */
|
||||
|
||||
|
||||
/*^*************************************
|
||||
* !!!!!! STATIC LINKING ONLY !!!!!!
|
||||
@@ -449,19 +414,14 @@ LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int sr
|
||||
* define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
|
||||
******************************************************************************/
|
||||
|
||||
#ifdef LZ4_STATIC_LINKING_ONLY
|
||||
|
||||
#ifndef TRACY_LZ4_STATIC_3504398509
|
||||
#define TRACY_LZ4_STATIC_3504398509
|
||||
|
||||
#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
|
||||
#define LZ4LIB_STATIC_API LZ4LIB_API
|
||||
#else
|
||||
#define LZ4LIB_STATIC_API
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
#ifdef LZ4_STATIC_LINKING_ONLY
|
||||
|
||||
|
||||
/*! LZ4_compress_fast_extState_fastReset() :
|
||||
* A variant of LZ4_compress_fast_extState().
|
||||
@@ -503,137 +463,78 @@ LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const c
|
||||
*/
|
||||
LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
|
||||
|
||||
|
||||
/*! In-place compression and decompression
|
||||
*
|
||||
* It's possible to have input and output sharing the same buffer,
|
||||
* for highly contrained memory environments.
|
||||
* In both cases, it requires input to lay at the end of the buffer,
|
||||
* and decompression to start at beginning of the buffer.
|
||||
* Buffer size must feature some margin, hence be larger than final size.
|
||||
*
|
||||
* |<------------------------buffer--------------------------------->|
|
||||
* |<-----------compressed data--------->|
|
||||
* |<-----------decompressed size------------------>|
|
||||
* |<----margin---->|
|
||||
*
|
||||
* This technique is more useful for decompression,
|
||||
* since decompressed size is typically larger,
|
||||
* and margin is short.
|
||||
*
|
||||
* In-place decompression will work inside any buffer
|
||||
* which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
|
||||
* This presumes that decompressedSize > compressedSize.
|
||||
* Otherwise, it means compression actually expanded data,
|
||||
* and it would be more efficient to store such data with a flag indicating it's not compressed.
|
||||
* This can happen when data is not compressible (already compressed, or encrypted).
|
||||
*
|
||||
* For in-place compression, margin is larger, as it must be able to cope with both
|
||||
* history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
|
||||
* and data expansion, which can happen when input is not compressible.
|
||||
* As a consequence, buffer size requirements are much higher,
|
||||
* and memory savings offered by in-place compression are more limited.
|
||||
*
|
||||
* There are ways to limit this cost for compression :
|
||||
* - Reduce history size, by modifying LZ4_DISTANCE_MAX.
|
||||
* Note that it is a compile-time constant, so all compressions will apply this limit.
|
||||
* Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
|
||||
* so it's a reasonable trick when inputs are known to be small.
|
||||
* - Require the compressor to deliver a "maximum compressed size".
|
||||
* This is the `dstCapacity` parameter in `LZ4_compress*()`.
|
||||
* When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
|
||||
* in which case, the return code will be 0 (zero).
|
||||
* The caller must be ready for these cases to happen,
|
||||
* and typically design a backup scheme to send data uncompressed.
|
||||
* The combination of both techniques can significantly reduce
|
||||
* the amount of margin required for in-place compression.
|
||||
*
|
||||
* In-place compression can work in any buffer
|
||||
* which size is >= (maxCompressedSize)
|
||||
* with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
|
||||
* LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
|
||||
* so it's possible to reduce memory requirements by playing with them.
|
||||
*/
|
||||
|
||||
#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32)
|
||||
#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
|
||||
|
||||
#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
|
||||
# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
|
||||
#endif
|
||||
|
||||
#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
|
||||
#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_STATIC_3504398509 */
|
||||
#endif /* LZ4_STATIC_LINKING_ONLY */
|
||||
|
||||
#ifndef TRACY_LZ4_H_98237428734687
|
||||
#define TRACY_LZ4_H_98237428734687
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/*-************************************************************
|
||||
* Private Definitions
|
||||
* PRIVATE DEFINITIONS
|
||||
**************************************************************
|
||||
* Do not use these definitions directly.
|
||||
* They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
|
||||
* Accessing members will expose user code to API and/or ABI break in future versions of the library.
|
||||
* Accessing members will expose code to API and/or ABI break in future versions of the library.
|
||||
**************************************************************/
|
||||
#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
|
||||
#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
|
||||
#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */
|
||||
|
||||
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
typedef int8_t LZ4_i8;
|
||||
typedef uint8_t LZ4_byte;
|
||||
typedef uint16_t LZ4_u16;
|
||||
typedef uint32_t LZ4_u32;
|
||||
#else
|
||||
typedef signed char LZ4_i8;
|
||||
typedef unsigned char LZ4_byte;
|
||||
typedef unsigned short LZ4_u16;
|
||||
typedef unsigned int LZ4_u32;
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
|
||||
struct LZ4_stream_t_internal {
|
||||
LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
|
||||
LZ4_u32 currentOffset;
|
||||
LZ4_u32 tableType;
|
||||
const LZ4_byte* dictionary;
|
||||
uint32_t hashTable[LZ4_HASH_SIZE_U32];
|
||||
uint32_t currentOffset;
|
||||
uint16_t dirty;
|
||||
uint16_t tableType;
|
||||
const uint8_t* dictionary;
|
||||
const LZ4_stream_t_internal* dictCtx;
|
||||
LZ4_u32 dictSize;
|
||||
uint32_t dictSize;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
const LZ4_byte* externalDict;
|
||||
const uint8_t* externalDict;
|
||||
size_t extDictSize;
|
||||
const LZ4_byte* prefixEnd;
|
||||
const uint8_t* prefixEnd;
|
||||
size_t prefixSize;
|
||||
} LZ4_streamDecode_t_internal;
|
||||
|
||||
#else
|
||||
|
||||
typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
|
||||
struct LZ4_stream_t_internal {
|
||||
unsigned int hashTable[LZ4_HASH_SIZE_U32];
|
||||
unsigned int currentOffset;
|
||||
unsigned short dirty;
|
||||
unsigned short tableType;
|
||||
const unsigned char* dictionary;
|
||||
const LZ4_stream_t_internal* dictCtx;
|
||||
unsigned int dictSize;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
const unsigned char* externalDict;
|
||||
const unsigned char* prefixEnd;
|
||||
size_t extDictSize;
|
||||
size_t prefixSize;
|
||||
} LZ4_streamDecode_t_internal;
|
||||
|
||||
#endif
|
||||
|
||||
/*! LZ4_stream_t :
|
||||
* Do not use below internal definitions directly !
|
||||
* Declare or allocate an LZ4_stream_t instead.
|
||||
* information structure to track an LZ4 stream.
|
||||
* LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
|
||||
* The structure definition can be convenient for static allocation
|
||||
* (on stack, or as part of larger structure).
|
||||
* Init this structure with LZ4_initStream() before first use.
|
||||
* note : only use this definition in association with static linking !
|
||||
* this definition is not API/ABI safe, and may change in future versions.
|
||||
* this definition is not API/ABI safe, and may change in a future version.
|
||||
*/
|
||||
#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */
|
||||
#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
|
||||
#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4 + ((sizeof(void*)==16) ? 4 : 0) /*AS-400*/ )
|
||||
#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
|
||||
union LZ4_stream_u {
|
||||
void* table[LZ4_STREAMSIZE_VOIDP];
|
||||
unsigned long long table[LZ4_STREAMSIZE_U64];
|
||||
LZ4_stream_t_internal internal_donotuse;
|
||||
}; /* previously typedef'd to LZ4_stream_t */
|
||||
|
||||
} ; /* previously typedef'd to LZ4_stream_t */
|
||||
|
||||
/*! LZ4_initStream() : v1.9.0+
|
||||
* An LZ4_stream_t structure must be initialized at least once.
|
||||
@@ -667,7 +568,6 @@ union LZ4_streamDecode_u {
|
||||
} ; /* previously typedef'd to LZ4_streamDecode_t */
|
||||
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Obsolete Functions
|
||||
**************************************/
|
||||
@@ -686,34 +586,34 @@ union LZ4_streamDecode_u {
|
||||
#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
|
||||
# define LZ4_DEPRECATED(message) /* disable deprecation warnings */
|
||||
#else
|
||||
# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
||||
# define LZ4_DEPRECATED(message) [[deprecated(message)]]
|
||||
# elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
|
||||
# elif (LZ4_GCC_VERSION >= 301)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated))
|
||||
# elif defined(_MSC_VER)
|
||||
# define LZ4_DEPRECATED(message) __declspec(deprecated(message))
|
||||
# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
|
||||
# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated))
|
||||
# else
|
||||
# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
|
||||
# define LZ4_DEPRECATED(message) /* disabled */
|
||||
# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
|
||||
# define LZ4_DEPRECATED(message)
|
||||
# endif
|
||||
#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
|
||||
|
||||
/*! Obsolete compression functions (since v1.7.3) */
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
|
||||
/* Obsolete compression functions */
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* source, char* dest, int sourceSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
|
||||
/*! Obsolete decompression functions (since v1.8.0) */
|
||||
/* Obsolete decompression functions */
|
||||
LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
|
||||
LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
|
||||
|
||||
/* Obsolete streaming functions (since v1.7.0)
|
||||
* degraded functionality; do not use!
|
||||
/* Obsolete streaming functions; degraded functionality; do not use!
|
||||
*
|
||||
* In order to perform streaming compression, these functions depended on data
|
||||
* that is no longer tracked in the state. They have been preserved as well as
|
||||
@@ -727,22 +627,23 @@ LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStre
|
||||
LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer);
|
||||
LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
|
||||
|
||||
/*! Obsolete streaming decoding functions (since v1.7.0) */
|
||||
/* Obsolete streaming decoding functions */
|
||||
LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
|
||||
LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
|
||||
|
||||
/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
|
||||
/*! LZ4_decompress_fast() : **unsafe!**
|
||||
* These functions used to be faster than LZ4_decompress_safe(),
|
||||
* but this is no longer the case. They are now slower.
|
||||
* but it has changed, and they are now slower than LZ4_decompress_safe().
|
||||
* This is because LZ4_decompress_fast() doesn't know the input size,
|
||||
* and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
|
||||
* and therefore must progress more cautiously in the input buffer to not read beyond the end of block.
|
||||
* On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
|
||||
* As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
|
||||
*
|
||||
* The last remaining LZ4_decompress_fast() specificity is that
|
||||
* it can decompress a block without knowing its compressed size.
|
||||
* Such functionality can be achieved in a more secure manner
|
||||
* by employing LZ4_decompress_safe_partial().
|
||||
* Such functionality could be achieved in a more secure manner,
|
||||
* by also providing the maximum size of input buffer,
|
||||
* but it would require new prototypes, and adaptation of the implementation to this new use case.
|
||||
*
|
||||
* Parameters:
|
||||
* originalSize : is the uncompressed size to regenerate.
|
||||
@@ -757,6 +658,7 @@ LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4
|
||||
* But they may happen if input data is invalid (error or intentional tampering).
|
||||
* As a consequence, use these functions in trusted environments with trusted data **only**.
|
||||
*/
|
||||
|
||||
LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
|
||||
LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
|
||||
LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
|
||||
@@ -774,4 +676,4 @@ LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_H_98237428734687 */
|
||||
#endif /* LZ4_H_2983827168210 */
|
||||
|
||||
@@ -53,7 +53,7 @@
|
||||
#include "tracy_lz4hc.hpp"
|
||||
|
||||
|
||||
/*=== Common definitions ===*/
|
||||
/*=== Common LZ4 definitions ===*/
|
||||
#if defined(__GNUC__)
|
||||
# pragma GCC diagnostic ignored "-Wunused-function"
|
||||
#endif
|
||||
@@ -61,15 +61,21 @@
|
||||
# pragma clang diagnostic ignored "-Wunused-function"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/*=== Enums ===*/
|
||||
typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
|
||||
|
||||
}
|
||||
|
||||
#define LZ4_COMMONDEFS_ONLY
|
||||
#ifndef LZ4_SRC_INCLUDED
|
||||
#include "tracy_lz4.cpp" /* LZ4_count, constants, mem */
|
||||
#endif
|
||||
|
||||
|
||||
/*=== Enums ===*/
|
||||
typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/*=== Constants ===*/
|
||||
#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
|
||||
@@ -85,9 +91,6 @@ typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
|
||||
/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
|
||||
#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
|
||||
|
||||
|
||||
@@ -96,7 +99,7 @@ static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)
|
||||
**************************************/
|
||||
static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
|
||||
{
|
||||
MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable));
|
||||
MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
|
||||
MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
|
||||
}
|
||||
|
||||
@@ -155,28 +158,13 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
|
||||
return back;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# define LZ4HC_rotl32(x,r) _rotl(x,r)
|
||||
#else
|
||||
# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
#endif
|
||||
|
||||
|
||||
static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
|
||||
{
|
||||
size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
|
||||
if (bitsToRotate == 0) return pattern;
|
||||
return LZ4HC_rotl32(pattern, (int)bitsToRotate);
|
||||
}
|
||||
|
||||
/* LZ4HC_countPattern() :
|
||||
* pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
|
||||
static unsigned
|
||||
LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
|
||||
{
|
||||
const BYTE* const iStart = ip;
|
||||
reg_t const pattern = (sizeof(pattern)==8) ?
|
||||
(reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32;
|
||||
reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
|
||||
|
||||
while (likely(ip < iEnd-(sizeof(pattern)-1))) {
|
||||
reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
|
||||
@@ -222,16 +210,6 @@ LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
|
||||
return (unsigned)(iStart - ip);
|
||||
}
|
||||
|
||||
/* LZ4HC_protectDictEnd() :
|
||||
* Checks if the match is in the last 3 bytes of the dictionary, so reading the
|
||||
* 4 byte MINMATCH would overflow.
|
||||
* @returns true if the match index is okay.
|
||||
*/
|
||||
static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
|
||||
{
|
||||
return ((U32)((dictLimit - 1) - matchIndex) >= 3);
|
||||
}
|
||||
|
||||
typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
|
||||
typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
|
||||
|
||||
@@ -257,7 +235,7 @@ LZ4HC_InsertAndGetWiderMatch (
|
||||
const U32 dictLimit = hc4->dictLimit;
|
||||
const BYTE* const lowPrefixPtr = base + dictLimit;
|
||||
const U32 ipIndex = (U32)(ip - base);
|
||||
const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
|
||||
const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
|
||||
const BYTE* const dictBase = hc4->dictBase;
|
||||
int const lookBackLength = (int)(ip-iLowLimit);
|
||||
int nbAttempts = maxNbAttempts;
|
||||
@@ -274,7 +252,7 @@ LZ4HC_InsertAndGetWiderMatch (
|
||||
DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
|
||||
matchIndex, lowestMatchIndex);
|
||||
|
||||
while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
|
||||
while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) {
|
||||
int matchLength=0;
|
||||
nbAttempts--;
|
||||
assert(matchIndex < ipIndex);
|
||||
@@ -316,21 +294,14 @@ LZ4HC_InsertAndGetWiderMatch (
|
||||
if (chainSwap && matchLength==longest) { /* better match => select a better chain */
|
||||
assert(lookBackLength==0); /* search forward only */
|
||||
if (matchIndex + (U32)longest <= ipIndex) {
|
||||
int const kTrigger = 4;
|
||||
U32 distanceToNextMatch = 1;
|
||||
int const end = longest - MINMATCH + 1;
|
||||
int step = 1;
|
||||
int accel = 1 << kTrigger;
|
||||
int pos;
|
||||
for (pos = 0; pos < end; pos += step) {
|
||||
for (pos = 0; pos <= longest - MINMATCH; pos++) {
|
||||
U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
|
||||
step = (accel++ >> kTrigger);
|
||||
if (candidateDist > distanceToNextMatch) {
|
||||
distanceToNextMatch = candidateDist;
|
||||
matchChainPos = (U32)pos;
|
||||
accel = 1 << kTrigger;
|
||||
}
|
||||
}
|
||||
} }
|
||||
if (distanceToNextMatch > 1) {
|
||||
if (distanceToNextMatch > matchIndex) break; /* avoid overflow */
|
||||
matchIndex -= distanceToNextMatch;
|
||||
@@ -349,61 +320,34 @@ LZ4HC_InsertAndGetWiderMatch (
|
||||
} else {
|
||||
repeat = rep_not;
|
||||
} }
|
||||
if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
|
||||
&& LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) {
|
||||
const int extDict = matchCandidateIdx < dictLimit;
|
||||
const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx;
|
||||
if ( (repeat == rep_confirmed)
|
||||
&& (matchCandidateIdx >= dictLimit) ) { /* same segment only */
|
||||
const BYTE* const matchPtr = base + matchCandidateIdx;
|
||||
if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
|
||||
const BYTE* const dictStart = dictBase + hc4->lowLimit;
|
||||
const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit;
|
||||
size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
|
||||
if (extDict && matchPtr + forwardPatternLength == iLimit) {
|
||||
U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern);
|
||||
forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern);
|
||||
}
|
||||
{ const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr;
|
||||
size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
|
||||
size_t currentSegmentLength;
|
||||
if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) {
|
||||
U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
|
||||
backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern);
|
||||
}
|
||||
/* Limit backLength not go further than lowestMatchIndex */
|
||||
backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
|
||||
assert(matchCandidateIdx - backLength >= lowestMatchIndex);
|
||||
currentSegmentLength = backLength + forwardPatternLength;
|
||||
/* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */
|
||||
if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
|
||||
&& (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
|
||||
U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
|
||||
if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex))
|
||||
matchIndex = newMatchIndex;
|
||||
else {
|
||||
/* Can only happen if started in the prefix */
|
||||
assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
|
||||
matchIndex = dictLimit;
|
||||
size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
|
||||
const BYTE* const lowestMatchPtr = (lowPrefixPtr + LZ4_DISTANCE_MAX >= ip) ? lowPrefixPtr : ip - LZ4_DISTANCE_MAX;
|
||||
size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
|
||||
size_t const currentSegmentLength = backLength + forwardPatternLength;
|
||||
|
||||
if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
|
||||
&& (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
|
||||
matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
|
||||
} else {
|
||||
matchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
|
||||
if (lookBackLength==0) { /* no back possible */
|
||||
size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
|
||||
if ((size_t)longest < maxML) {
|
||||
assert(base + matchIndex < ip);
|
||||
if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break;
|
||||
assert(maxML < 2 GB);
|
||||
longest = (int)maxML;
|
||||
*matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
|
||||
*startpos = ip;
|
||||
}
|
||||
} else {
|
||||
U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
|
||||
if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) {
|
||||
assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
|
||||
matchIndex = dictLimit;
|
||||
} else {
|
||||
matchIndex = newMatchIndex;
|
||||
if (lookBackLength==0) { /* no back possible */
|
||||
size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
|
||||
if ((size_t)longest < maxML) {
|
||||
assert(base + matchIndex != ip);
|
||||
if ((size_t)(ip - base) - matchIndex > LZ4_DISTANCE_MAX) break;
|
||||
assert(maxML < 2 GB);
|
||||
longest = (int)maxML;
|
||||
*matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
|
||||
*startpos = ip;
|
||||
}
|
||||
{ U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
|
||||
if (distToNextPattern > matchIndex) break; /* avoid overflow */
|
||||
matchIndex -= distToNextPattern;
|
||||
} } } } }
|
||||
{ U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
|
||||
if (distToNextPattern > matchIndex) break; /* avoid overflow */
|
||||
matchIndex -= distToNextPattern;
|
||||
} } }
|
||||
continue;
|
||||
} }
|
||||
} } /* PA optimization */
|
||||
@@ -414,7 +358,7 @@ LZ4HC_InsertAndGetWiderMatch (
|
||||
} /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
|
||||
|
||||
if ( dict == usingDictCtxHc
|
||||
&& nbAttempts > 0
|
||||
&& nbAttempts
|
||||
&& ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
|
||||
size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base);
|
||||
U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
|
||||
@@ -464,90 +408,74 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl
|
||||
* @return : 0 if ok,
|
||||
* 1 if buffer issue detected */
|
||||
LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
|
||||
const BYTE** _ip,
|
||||
BYTE** _op,
|
||||
const BYTE** _anchor,
|
||||
const BYTE** ip,
|
||||
BYTE** op,
|
||||
const BYTE** anchor,
|
||||
int matchLength,
|
||||
const BYTE* const match,
|
||||
limitedOutput_directive limit,
|
||||
BYTE* oend)
|
||||
{
|
||||
#define ip (*_ip)
|
||||
#define op (*_op)
|
||||
#define anchor (*_anchor)
|
||||
|
||||
size_t length;
|
||||
BYTE* const token = op++;
|
||||
BYTE* const token = (*op)++;
|
||||
|
||||
#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
|
||||
static const BYTE* start = NULL;
|
||||
static U32 totalCost = 0;
|
||||
U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
|
||||
U32 const ll = (U32)(ip - anchor);
|
||||
U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
|
||||
U32 const ll = (U32)(*ip - *anchor);
|
||||
U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
|
||||
U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
|
||||
U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
|
||||
if (start==NULL) start = anchor; /* only works for single segment */
|
||||
if (start==NULL) start = *anchor; /* only works for single segment */
|
||||
/* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
|
||||
DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
|
||||
DEBUGLOG(6, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
|
||||
pos,
|
||||
(U32)(ip - anchor), matchLength, (U32)(ip-match),
|
||||
(U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
|
||||
cost, totalCost);
|
||||
totalCost += cost;
|
||||
#endif
|
||||
|
||||
/* Encode Literal length */
|
||||
length = (size_t)(ip - anchor);
|
||||
LZ4_STATIC_ASSERT(notLimited == 0);
|
||||
/* Check output limit */
|
||||
if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
|
||||
DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
|
||||
(int)length, (int)(oend - op));
|
||||
return 1;
|
||||
}
|
||||
length = (size_t)(*ip - *anchor);
|
||||
if ((limit) && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */
|
||||
if (length >= RUN_MASK) {
|
||||
size_t len = length - RUN_MASK;
|
||||
*token = (RUN_MASK << ML_BITS);
|
||||
for(; len >= 255 ; len -= 255) *op++ = 255;
|
||||
*op++ = (BYTE)len;
|
||||
for(; len >= 255 ; len -= 255) *(*op)++ = 255;
|
||||
*(*op)++ = (BYTE)len;
|
||||
} else {
|
||||
*token = (BYTE)(length << ML_BITS);
|
||||
}
|
||||
|
||||
/* Copy Literals */
|
||||
LZ4_wildCopy8(op, anchor, op + length);
|
||||
op += length;
|
||||
LZ4_wildCopy8(*op, *anchor, (*op) + length);
|
||||
*op += length;
|
||||
|
||||
/* Encode Offset */
|
||||
assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
|
||||
LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
|
||||
assert( (*ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
|
||||
LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
|
||||
|
||||
/* Encode MatchLength */
|
||||
assert(matchLength >= MINMATCH);
|
||||
length = (size_t)matchLength - MINMATCH;
|
||||
if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
|
||||
DEBUGLOG(6, "Not enough room to write match length");
|
||||
return 1; /* Check output limit */
|
||||
}
|
||||
if ((limit) && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */
|
||||
if (length >= ML_MASK) {
|
||||
*token += ML_MASK;
|
||||
length -= ML_MASK;
|
||||
for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
|
||||
if (length >= 255) { length -= 255; *op++ = 255; }
|
||||
*op++ = (BYTE)length;
|
||||
for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; }
|
||||
if (length >= 255) { length -= 255; *(*op)++ = 255; }
|
||||
*(*op)++ = (BYTE)length;
|
||||
} else {
|
||||
*token += (BYTE)(length);
|
||||
}
|
||||
|
||||
/* Prepare next loop */
|
||||
ip += matchLength;
|
||||
anchor = ip;
|
||||
*ip += matchLength;
|
||||
*anchor = *ip;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#undef ip
|
||||
#undef op
|
||||
#undef anchor
|
||||
|
||||
LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
|
||||
LZ4HC_CCtx_internal* const ctx,
|
||||
@@ -555,7 +483,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
|
||||
char* const dest,
|
||||
int* srcSizePtr,
|
||||
int const maxOutputSize,
|
||||
int maxNbAttempts,
|
||||
unsigned maxNbAttempts,
|
||||
const limitedOutput_directive limit,
|
||||
const dictCtx_directive dict
|
||||
)
|
||||
@@ -585,7 +513,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
|
||||
/* init */
|
||||
*srcSizePtr = 0;
|
||||
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
||||
if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
|
||||
if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
|
||||
|
||||
/* Main Loop */
|
||||
while (ip <= mflimit) {
|
||||
@@ -657,11 +585,7 @@ _Search3:
|
||||
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
|
||||
ip = start2;
|
||||
optr = op;
|
||||
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) {
|
||||
ml = ml2;
|
||||
ref = ref2;
|
||||
goto _dest_overflow;
|
||||
}
|
||||
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) goto _dest_overflow;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -733,18 +657,17 @@ _Search3:
|
||||
_last_literals:
|
||||
/* Encode Last Literals */
|
||||
{ size_t lastRunSize = (size_t)(iend - anchor); /* literals */
|
||||
size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
|
||||
size_t const totalSize = 1 + llAdd + lastRunSize;
|
||||
size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
|
||||
size_t const totalSize = 1 + litLength + lastRunSize;
|
||||
if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
|
||||
if (limit && (op + totalSize > oend)) {
|
||||
if (limit == limitedOutput) return 0;
|
||||
if (limit == limitedOutput) return 0; /* Check output limit */
|
||||
/* adapt lastRunSize to fill 'dest' */
|
||||
lastRunSize = (size_t)(oend - op) - 1 /*token*/;
|
||||
llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
|
||||
lastRunSize -= llAdd;
|
||||
lastRunSize = (size_t)(oend - op) - 1;
|
||||
litLength = (lastRunSize + 255 - RUN_MASK) / 255;
|
||||
lastRunSize -= litLength;
|
||||
}
|
||||
DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
|
||||
ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
|
||||
ip = anchor + lastRunSize;
|
||||
|
||||
if (lastRunSize >= RUN_MASK) {
|
||||
size_t accumulator = lastRunSize - RUN_MASK;
|
||||
@@ -764,25 +687,9 @@ _last_literals:
|
||||
|
||||
_dest_overflow:
|
||||
if (limit == fillOutput) {
|
||||
/* Assumption : ip, anchor, ml and ref must be set correctly */
|
||||
size_t const ll = (size_t)(ip - anchor);
|
||||
size_t const ll_addbytes = (ll + 240) / 255;
|
||||
size_t const ll_totalCost = 1 + ll_addbytes + ll;
|
||||
BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
|
||||
DEBUGLOG(6, "Last sequence overflowing");
|
||||
op = optr; /* restore correct out pointer */
|
||||
if (op + ll_totalCost <= maxLitPos) {
|
||||
/* ll validated; now adjust match length */
|
||||
size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
|
||||
size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
|
||||
assert(maxMlSize < INT_MAX); assert(ml >= 0);
|
||||
if ((size_t)ml > maxMlSize) ml = (int)maxMlSize;
|
||||
if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) {
|
||||
LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend);
|
||||
} }
|
||||
goto _last_literals;
|
||||
}
|
||||
/* compression failed */
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -793,7 +700,7 @@ static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
|
||||
int const nbSearches, size_t sufficient_len,
|
||||
const limitedOutput_directive limit, int const fullUpdate,
|
||||
const dictCtx_directive dict,
|
||||
const HCfavor_e favorDecSpeed);
|
||||
HCfavor_e favorDecSpeed);
|
||||
|
||||
|
||||
LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
|
||||
@@ -810,7 +717,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
|
||||
typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
|
||||
typedef struct {
|
||||
lz4hc_strat_e strat;
|
||||
int nbSearches;
|
||||
U32 nbSearches;
|
||||
U32 targetLength;
|
||||
} cParams_t;
|
||||
static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
|
||||
@@ -829,8 +736,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
|
||||
{ lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
|
||||
};
|
||||
|
||||
DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
|
||||
ctx, src, *srcSizePtr, limit);
|
||||
DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d)", ctx, src, *srcSizePtr);
|
||||
|
||||
if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */
|
||||
if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
|
||||
@@ -850,7 +756,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
|
||||
assert(cParam.strat == lz4opt);
|
||||
result = LZ4HC_compress_optimal(ctx,
|
||||
src, dst, srcSizePtr, dstCapacity,
|
||||
cParam.nbSearches, cParam.targetLength, limit,
|
||||
(int)cParam.nbSearches, cParam.targetLength, limit,
|
||||
cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */
|
||||
dict, favor);
|
||||
}
|
||||
@@ -923,22 +829,27 @@ LZ4HC_compress_generic (
|
||||
|
||||
int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); }
|
||||
|
||||
#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
|
||||
* it reports an aligment of 8-bytes,
|
||||
* while actually aligning LZ4_streamHC_t on 4 bytes. */
|
||||
static size_t LZ4_streamHC_t_alignment(void)
|
||||
{
|
||||
#if LZ4_ALIGN_TEST
|
||||
typedef struct { char c; LZ4_streamHC_t t; } t_a;
|
||||
return sizeof(t_a) - sizeof(LZ4_streamHC_t);
|
||||
#else
|
||||
return 1; /* effectively disabled */
|
||||
#endif
|
||||
struct { char c; LZ4_streamHC_t t; } t_a;
|
||||
return sizeof(t_a) - sizeof(t_a.t);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* state is presumed correctly initialized,
|
||||
* in which case its size and alignment have already been validate */
|
||||
int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
|
||||
{
|
||||
LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
|
||||
if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0;
|
||||
#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
|
||||
* it reports an aligment of 8-bytes,
|
||||
* while actually aligning LZ4_streamHC_t on 4 bytes. */
|
||||
assert(((size_t)state & (LZ4_streamHC_t_alignment() - 1)) == 0); /* check alignment */
|
||||
#endif
|
||||
if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */
|
||||
LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
|
||||
LZ4HC_init_internal (ctx, (const BYTE*)src);
|
||||
if (dstCapacity < LZ4_compressBound(srcSize))
|
||||
@@ -987,11 +898,10 @@ int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* s
|
||||
/* allocation */
|
||||
LZ4_streamHC_t* LZ4_createStreamHC(void)
|
||||
{
|
||||
LZ4_streamHC_t* const state =
|
||||
(LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t));
|
||||
if (state == NULL) return NULL;
|
||||
LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT);
|
||||
return state;
|
||||
LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
|
||||
if (LZ4_streamHCPtr==NULL) return NULL;
|
||||
LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); /* full initialization, malloc'ed buffer can be full of garbage */
|
||||
return LZ4_streamHCPtr;
|
||||
}
|
||||
|
||||
int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
|
||||
@@ -1006,16 +916,22 @@ int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
|
||||
LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size)
|
||||
{
|
||||
LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer;
|
||||
/* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
|
||||
LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
|
||||
DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size);
|
||||
/* check conditions */
|
||||
if (buffer == NULL) return NULL;
|
||||
if (size < sizeof(LZ4_streamHC_t)) return NULL;
|
||||
if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL;
|
||||
/* init */
|
||||
{ LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse);
|
||||
MEM_INIT(hcstate, 0, sizeof(*hcstate)); }
|
||||
#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
|
||||
* it reports an aligment of 8-bytes,
|
||||
* while actually aligning LZ4_streamHC_t on 4 bytes. */
|
||||
if (((size_t)buffer) & (LZ4_streamHC_t_alignment() - 1)) return NULL; /* alignment check */
|
||||
#endif
|
||||
/* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
|
||||
LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
|
||||
DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", LZ4_streamHCPtr, (unsigned)size);
|
||||
/* end-base will trigger a clearTable on starting compression */
|
||||
LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t)-1;
|
||||
LZ4_streamHCPtr->internal_donotuse.base = NULL;
|
||||
LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
|
||||
LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0;
|
||||
LZ4_streamHCPtr->internal_donotuse.dirty = 0;
|
||||
LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
|
||||
return LZ4_streamHCPtr;
|
||||
}
|
||||
@@ -1060,7 +976,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
|
||||
const char* dictionary, int dictSize)
|
||||
{
|
||||
LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
|
||||
DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize);
|
||||
DEBUGLOG(4, "LZ4_loadDictHC(%p, %p, %d)", LZ4_streamHCPtr, dictionary, dictSize);
|
||||
assert(LZ4_streamHCPtr != NULL);
|
||||
if (dictSize > 64 KB) {
|
||||
dictionary += (size_t)dictSize - 64 KB;
|
||||
@@ -1096,20 +1012,16 @@ static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBl
|
||||
ctxPtr->base = newBlock - ctxPtr->dictLimit;
|
||||
ctxPtr->end = newBlock;
|
||||
ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
|
||||
|
||||
/* cannot reference an extDict and a dictCtx at the same time */
|
||||
ctxPtr->dictCtx = NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
|
||||
const char* src, char* dst,
|
||||
int* srcSizePtr, int dstCapacity,
|
||||
limitedOutput_directive limit)
|
||||
static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
|
||||
const char* src, char* dst,
|
||||
int* srcSizePtr, int dstCapacity,
|
||||
limitedOutput_directive limit)
|
||||
{
|
||||
LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
|
||||
DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
|
||||
LZ4_streamHCPtr, src, *srcSizePtr, limit);
|
||||
DEBUGLOG(4, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d)",
|
||||
LZ4_streamHCPtr, src, *srcSizePtr);
|
||||
assert(ctxPtr != NULL);
|
||||
/* auto-init if forgotten */
|
||||
if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
|
||||
@@ -1133,7 +1045,8 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
|
||||
if (sourceEnd > dictEnd) sourceEnd = dictEnd;
|
||||
ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
|
||||
if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
|
||||
} }
|
||||
}
|
||||
}
|
||||
|
||||
return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
|
||||
}
|
||||
@@ -1153,30 +1066,23 @@ int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const ch
|
||||
|
||||
|
||||
|
||||
/* LZ4_saveDictHC :
|
||||
* save history content
|
||||
* into a user-provided buffer
|
||||
* which is then used to continue compression
|
||||
*/
|
||||
/* dictionary saving */
|
||||
|
||||
int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
|
||||
{
|
||||
LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
|
||||
int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
|
||||
DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
|
||||
assert(prefixSize >= 0);
|
||||
DEBUGLOG(4, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
|
||||
if (dictSize > 64 KB) dictSize = 64 KB;
|
||||
if (dictSize < 4) dictSize = 0;
|
||||
if (dictSize > prefixSize) dictSize = prefixSize;
|
||||
if (safeBuffer == NULL) assert(dictSize == 0);
|
||||
if (dictSize > 0)
|
||||
memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
|
||||
memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
|
||||
{ U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
|
||||
streamPtr->end = (const BYTE*)safeBuffer + dictSize;
|
||||
streamPtr->base = streamPtr->end - endIndex;
|
||||
streamPtr->dictLimit = endIndex - (U32)dictSize;
|
||||
streamPtr->lowLimit = endIndex - (U32)dictSize;
|
||||
if (streamPtr->nextToUpdate < streamPtr->dictLimit)
|
||||
streamPtr->nextToUpdate = streamPtr->dictLimit;
|
||||
if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
|
||||
}
|
||||
return dictSize;
|
||||
}
|
||||
@@ -1326,13 +1232,8 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
||||
const dictCtx_directive dict,
|
||||
const HCfavor_e favorDecSpeed)
|
||||
{
|
||||
int retval = 0;
|
||||
#define TRAILING_LITERALS 3
|
||||
#ifdef LZ4HC_HEAPMODE
|
||||
LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
|
||||
#else
|
||||
LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */
|
||||
#endif
|
||||
|
||||
const BYTE* ip = (const BYTE*) source;
|
||||
const BYTE* anchor = ip;
|
||||
@@ -1342,19 +1243,15 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
||||
BYTE* op = (BYTE*) dst;
|
||||
BYTE* opSaved = (BYTE*) dst;
|
||||
BYTE* oend = op + dstCapacity;
|
||||
int ovml = MINMATCH; /* overflow - last sequence */
|
||||
const BYTE* ovref = NULL;
|
||||
|
||||
/* init */
|
||||
#ifdef LZ4HC_HEAPMODE
|
||||
if (opt == NULL) goto _return_label;
|
||||
#endif
|
||||
DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
|
||||
*srcSizePtr = 0;
|
||||
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
||||
if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
|
||||
|
||||
/* Main Loop */
|
||||
assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
|
||||
while (ip <= mflimit) {
|
||||
int const llen = (int)(ip - anchor);
|
||||
int best_mlen, best_off;
|
||||
@@ -1368,11 +1265,8 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
||||
int const firstML = firstMatch.len;
|
||||
const BYTE* const matchPos = ip - firstMatch.off;
|
||||
opSaved = op;
|
||||
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */
|
||||
ovml = firstML;
|
||||
ovref = matchPos;
|
||||
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */
|
||||
goto _dest_overflow;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1514,7 +1408,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
||||
best_off = opt[last_match_pos].off;
|
||||
cur = last_match_pos - best_mlen;
|
||||
|
||||
encode: /* cur, last_match_pos, best_mlen, best_off must be set */
|
||||
encode: /* cur, last_match_pos, best_mlen, best_off must be set */
|
||||
assert(cur < LZ4_OPT_NUM);
|
||||
assert(last_match_pos >= 1); /* == 1 when only one candidate */
|
||||
DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
|
||||
@@ -1544,31 +1438,25 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */
|
||||
assert(ml >= MINMATCH);
|
||||
assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
|
||||
opSaved = op;
|
||||
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */
|
||||
ovml = ml;
|
||||
ovref = ip - offset;
|
||||
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */
|
||||
goto _dest_overflow;
|
||||
} } }
|
||||
} }
|
||||
} /* while (ip <= mflimit) */
|
||||
|
||||
_last_literals:
|
||||
_last_literals:
|
||||
/* Encode Last Literals */
|
||||
{ size_t lastRunSize = (size_t)(iend - anchor); /* literals */
|
||||
size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
|
||||
size_t const totalSize = 1 + llAdd + lastRunSize;
|
||||
size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
|
||||
size_t const totalSize = 1 + litLength + lastRunSize;
|
||||
if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
|
||||
if (limit && (op + totalSize > oend)) {
|
||||
if (limit == limitedOutput) { /* Check output limit */
|
||||
retval = 0;
|
||||
goto _return_label;
|
||||
}
|
||||
if (limit == limitedOutput) return 0; /* Check output limit */
|
||||
/* adapt lastRunSize to fill 'dst' */
|
||||
lastRunSize = (size_t)(oend - op) - 1 /*token*/;
|
||||
llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
|
||||
lastRunSize -= llAdd;
|
||||
lastRunSize = (size_t)(oend - op) - 1;
|
||||
litLength = (lastRunSize + 255 - RUN_MASK) / 255;
|
||||
lastRunSize -= litLength;
|
||||
}
|
||||
DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
|
||||
ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
|
||||
ip = anchor + lastRunSize;
|
||||
|
||||
if (lastRunSize >= RUN_MASK) {
|
||||
size_t accumulator = lastRunSize - RUN_MASK;
|
||||
@@ -1584,37 +1472,14 @@ _last_literals:
|
||||
|
||||
/* End */
|
||||
*srcSizePtr = (int) (((const char*)ip) - source);
|
||||
retval = (int) ((char*)op-dst);
|
||||
goto _return_label;
|
||||
return (int) ((char*)op-dst);
|
||||
|
||||
_dest_overflow:
|
||||
if (limit == fillOutput) {
|
||||
/* Assumption : ip, anchor, ovml and ovref must be set correctly */
|
||||
size_t const ll = (size_t)(ip - anchor);
|
||||
size_t const ll_addbytes = (ll + 240) / 255;
|
||||
size_t const ll_totalCost = 1 + ll_addbytes + ll;
|
||||
BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
|
||||
DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved));
|
||||
op = opSaved; /* restore correct out pointer */
|
||||
if (op + ll_totalCost <= maxLitPos) {
|
||||
/* ll validated; now adjust match length */
|
||||
size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
|
||||
size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
|
||||
assert(maxMlSize < INT_MAX); assert(ovml >= 0);
|
||||
if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize;
|
||||
if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
|
||||
DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
|
||||
DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
|
||||
LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend);
|
||||
DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
|
||||
} }
|
||||
goto _last_literals;
|
||||
}
|
||||
_return_label:
|
||||
#ifdef LZ4HC_HEAPMODE
|
||||
FREEMEM(opt);
|
||||
#endif
|
||||
return retval;
|
||||
}
|
||||
_dest_overflow:
|
||||
if (limit == fillOutput) {
|
||||
op = opSaved; /* restore correct out pointer */
|
||||
goto _last_literals;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,6 +38,8 @@
|
||||
/* note : lz4hc requires lz4.h/lz4.c for compilation */
|
||||
#include "tracy_lz4.hpp" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/* --- Useful constants --- */
|
||||
#define LZ4HC_CLEVEL_MIN 3
|
||||
@@ -45,8 +47,6 @@
|
||||
#define LZ4HC_CLEVEL_OPT_MIN 10
|
||||
#define LZ4HC_CLEVEL_MAX 12
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/*-************************************
|
||||
* Block Compression
|
||||
@@ -196,32 +196,57 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in
|
||||
#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
|
||||
|
||||
|
||||
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
|
||||
struct LZ4HC_CCtx_internal
|
||||
{
|
||||
LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE];
|
||||
LZ4_u16 chainTable[LZ4HC_MAXD];
|
||||
const LZ4_byte* end; /* next block here to continue on current prefix */
|
||||
const LZ4_byte* base; /* All index relative to this position */
|
||||
const LZ4_byte* dictBase; /* alternate base for extDict */
|
||||
LZ4_u32 dictLimit; /* below that point, need extDict */
|
||||
LZ4_u32 lowLimit; /* below that point, no more dict */
|
||||
LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */
|
||||
short compressionLevel;
|
||||
LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set,
|
||||
otherwise, favor compression ratio */
|
||||
LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */
|
||||
uint32_t hashTable[LZ4HC_HASHTABLESIZE];
|
||||
uint16_t chainTable[LZ4HC_MAXD];
|
||||
const uint8_t* end; /* next block here to continue on current prefix */
|
||||
const uint8_t* base; /* All index relative to this position */
|
||||
const uint8_t* dictBase; /* alternate base for extDict */
|
||||
uint32_t dictLimit; /* below that point, need extDict */
|
||||
uint32_t lowLimit; /* below that point, no more dict */
|
||||
uint32_t nextToUpdate; /* index from which to continue dictionary update */
|
||||
short compressionLevel;
|
||||
int8_t favorDecSpeed; /* favor decompression speed if this flag set,
|
||||
otherwise, favor compression ratio */
|
||||
int8_t dirty; /* stream has to be fully reset if this flag is set */
|
||||
const LZ4HC_CCtx_internal* dictCtx;
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
|
||||
struct LZ4HC_CCtx_internal
|
||||
{
|
||||
unsigned int hashTable[LZ4HC_HASHTABLESIZE];
|
||||
unsigned short chainTable[LZ4HC_MAXD];
|
||||
const unsigned char* end; /* next block here to continue on current prefix */
|
||||
const unsigned char* base; /* All index relative to this position */
|
||||
const unsigned char* dictBase; /* alternate base for extDict */
|
||||
unsigned int dictLimit; /* below that point, need extDict */
|
||||
unsigned int lowLimit; /* below that point, no more dict */
|
||||
unsigned int nextToUpdate; /* index from which to continue dictionary update */
|
||||
short compressionLevel;
|
||||
char favorDecSpeed; /* favor decompression speed if this flag set,
|
||||
otherwise, favor compression ratio */
|
||||
char dirty; /* stream has to be fully reset if this flag is set */
|
||||
const LZ4HC_CCtx_internal* dictCtx;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* Do not use these definitions directly !
|
||||
* Declare or allocate an LZ4_streamHC_t instead.
|
||||
*/
|
||||
#define LZ4_STREAMHCSIZE 262200 /* static size, for inter-version compatibility */
|
||||
#define LZ4_STREAMHCSIZE_VOIDP (LZ4_STREAMHCSIZE / sizeof(void*))
|
||||
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56 + ((sizeof(void*)==16) ? 56 : 0) /* AS400*/ ) /* 262200 or 262256*/
|
||||
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
|
||||
union LZ4_streamHC_u {
|
||||
void* table[LZ4_STREAMHCSIZE_VOIDP];
|
||||
size_t table[LZ4_STREAMHCSIZE_SIZET];
|
||||
LZ4HC_CCtx_internal internal_donotuse;
|
||||
}; /* previously typedef'd to LZ4_streamHC_t */
|
||||
|
||||
@@ -289,6 +314,7 @@ LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStre
|
||||
*/
|
||||
LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_HC_H_19834876238432 */
|
||||
@@ -303,11 +329,8 @@ LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionL
|
||||
* after successfull usage in real-life scenarios.
|
||||
***************************************************/
|
||||
#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
|
||||
#ifndef TRACY_LZ4_HC_SLO_098092834
|
||||
#define TRACY_LZ4_HC_SLO_098092834
|
||||
|
||||
#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */
|
||||
#include "tracy_lz4.hpp"
|
||||
#ifndef LZ4_HC_SLO_098092834
|
||||
#define LZ4_HC_SLO_098092834
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
255
common/tracy_sema.h
Normal file
255
common/tracy_sema.h
Normal file
@@ -0,0 +1,255 @@
|
||||
// Copyright (c) 2015 Jeff Preshing
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#ifndef __TRACY_CPP11OM_SEMAPHORE_H__
|
||||
#define __TRACY_CPP11OM_SEMAPHORE_H__
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
|
||||
#if defined(__MACH__)
|
||||
#include <mach/mach.h>
|
||||
#elif defined(__unix__)
|
||||
#include <semaphore.h>
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined(_WIN32)
|
||||
//---------------------------------------------------------
|
||||
// Semaphore (Windows)
|
||||
//---------------------------------------------------------
|
||||
#ifndef MAXLONG
|
||||
enum { MAXLONG = 0x7fffffff };
|
||||
#endif
|
||||
|
||||
#ifndef INFINITE
|
||||
enum { INFINITE = 0xFFFFFFFF };
|
||||
#endif
|
||||
|
||||
#ifndef _WINDOWS_
|
||||
typedef void* HANDLE;
|
||||
|
||||
extern "C" __declspec(dllimport) HANDLE __stdcall CreateSemaphoreA( void*, long, long, const char* );
|
||||
extern "C" __declspec(dllimport) int __stdcall CloseHandle( HANDLE );
|
||||
extern "C" __declspec(dllimport) unsigned long __stdcall WaitForSingleObject( HANDLE, unsigned long );
|
||||
extern "C" __declspec(dllimport) int __stdcall ReleaseSemaphore( HANDLE, long, long* );
|
||||
#endif
|
||||
|
||||
class Semaphore
|
||||
{
|
||||
private:
|
||||
HANDLE m_hSema;
|
||||
|
||||
Semaphore(const Semaphore& other) = delete;
|
||||
Semaphore& operator=(const Semaphore& other) = delete;
|
||||
|
||||
public:
|
||||
Semaphore(int initialCount = 0)
|
||||
{
|
||||
assert(initialCount >= 0);
|
||||
m_hSema = CreateSemaphoreA(NULL, initialCount, MAXLONG, NULL);
|
||||
}
|
||||
|
||||
~Semaphore()
|
||||
{
|
||||
CloseHandle(m_hSema);
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
WaitForSingleObject(m_hSema, INFINITE);
|
||||
}
|
||||
|
||||
void signal(int count = 1)
|
||||
{
|
||||
ReleaseSemaphore(m_hSema, count, NULL);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#elif defined(__MACH__)
|
||||
//---------------------------------------------------------
|
||||
// Semaphore (Apple iOS and OSX)
|
||||
// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
|
||||
//---------------------------------------------------------
|
||||
|
||||
class Semaphore
|
||||
{
|
||||
private:
|
||||
semaphore_t m_sema;
|
||||
|
||||
Semaphore(const Semaphore& other) = delete;
|
||||
Semaphore& operator=(const Semaphore& other) = delete;
|
||||
|
||||
public:
|
||||
Semaphore(int initialCount = 0)
|
||||
{
|
||||
assert(initialCount >= 0);
|
||||
semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
|
||||
}
|
||||
|
||||
~Semaphore()
|
||||
{
|
||||
semaphore_destroy(mach_task_self(), m_sema);
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
semaphore_wait(m_sema);
|
||||
}
|
||||
|
||||
void signal()
|
||||
{
|
||||
semaphore_signal(m_sema);
|
||||
}
|
||||
|
||||
void signal(int count)
|
||||
{
|
||||
while (count-- > 0)
|
||||
{
|
||||
semaphore_signal(m_sema);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#elif defined(__unix__)
|
||||
//---------------------------------------------------------
|
||||
// Semaphore (POSIX, Linux)
|
||||
//---------------------------------------------------------
|
||||
|
||||
class Semaphore
|
||||
{
|
||||
private:
|
||||
sem_t m_sema;
|
||||
|
||||
Semaphore(const Semaphore& other) = delete;
|
||||
Semaphore& operator=(const Semaphore& other) = delete;
|
||||
|
||||
public:
|
||||
Semaphore(int initialCount = 0)
|
||||
{
|
||||
assert(initialCount >= 0);
|
||||
sem_init(&m_sema, 0, initialCount);
|
||||
}
|
||||
|
||||
~Semaphore()
|
||||
{
|
||||
sem_destroy(&m_sema);
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
|
||||
int rc;
|
||||
do
|
||||
{
|
||||
rc = sem_wait(&m_sema);
|
||||
}
|
||||
while (rc == -1 && errno == EINTR);
|
||||
}
|
||||
|
||||
void signal()
|
||||
{
|
||||
sem_post(&m_sema);
|
||||
}
|
||||
|
||||
void signal(int count)
|
||||
{
|
||||
while (count-- > 0)
|
||||
{
|
||||
sem_post(&m_sema);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#error Unsupported platform!
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
//---------------------------------------------------------
|
||||
// LightweightSemaphore
|
||||
//---------------------------------------------------------
|
||||
class LightweightSemaphore
|
||||
{
|
||||
private:
|
||||
std::atomic<int> m_count;
|
||||
Semaphore m_sema;
|
||||
|
||||
void waitWithPartialSpinning()
|
||||
{
|
||||
int oldCount;
|
||||
// Is there a better way to set the initial spin count?
|
||||
// If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
|
||||
// as threads start hitting the kernel semaphore.
|
||||
int spin = 10000;
|
||||
while (spin--)
|
||||
{
|
||||
oldCount = m_count.load(std::memory_order_relaxed);
|
||||
if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire))
|
||||
return;
|
||||
std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop.
|
||||
}
|
||||
oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
|
||||
if (oldCount <= 0)
|
||||
{
|
||||
m_sema.wait();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
LightweightSemaphore(int initialCount = 0) : m_count(initialCount)
|
||||
{
|
||||
assert(initialCount >= 0);
|
||||
}
|
||||
|
||||
bool tryWait()
|
||||
{
|
||||
int oldCount = m_count.load(std::memory_order_relaxed);
|
||||
return (oldCount > 0 && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire));
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
if (!tryWait())
|
||||
waitWithPartialSpinning();
|
||||
}
|
||||
|
||||
void signal(int count = 1)
|
||||
{
|
||||
int oldCount = m_count.fetch_add(count, std::memory_order_release);
|
||||
int toRelease = -oldCount < count ? -oldCount : count;
|
||||
if (toRelease > 0)
|
||||
{
|
||||
m_sema.signal(toRelease);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
typedef LightweightSemaphore DefaultSemaphoreType;
|
||||
|
||||
}
|
||||
|
||||
#endif // __CPP11OM_SEMAPHORE_H__
|
||||
@@ -1,13 +0,0 @@
|
||||
ARCH := $(shell uname -m)
|
||||
|
||||
ifeq (1,$(shell $(CC) --version | grep clang > /dev/null && echo 1 || echo 0))
|
||||
ifeq (1,$(shell ld.mold --version > /dev/null 2> /dev/null && echo 1 || echo 0))
|
||||
LDFLAGS := -fuse-ld=mold
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef TRACY_NO_ISA_EXTENSIONS
|
||||
ifeq ($(ARCH),x86_64)
|
||||
CFLAGS += -msse4.1
|
||||
endif
|
||||
endif
|
||||
@@ -1,19 +0,0 @@
|
||||
ARCH := $(shell uname -m)
|
||||
|
||||
ifeq (0,$(shell $(CC) --version | grep clang > /dev/null && echo 1 || echo 0))
|
||||
CFLAGS += -s
|
||||
else
|
||||
ifeq (1,$(shell ld.mold --version > /dev/null 2> /dev/null && echo 1 || echo 0))
|
||||
LDFLAGS := -s -fuse-ld=mold
|
||||
else
|
||||
LDFLAGS := -s
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef TRACY_NO_ISA_EXTENSIONS
|
||||
ifneq (,$(filter $(ARCH),aarch64 arm64))
|
||||
CFLAGS += -mcpu=native
|
||||
else
|
||||
CFLAGS += -march=native
|
||||
endif
|
||||
endif
|
||||
@@ -1,82 +0,0 @@
|
||||
# Common code needed by most Tracy Unix Makefiles.
|
||||
|
||||
# Ensure these are simply-substituted variables, without changing their values.
|
||||
LIBS := $(LIBS)
|
||||
|
||||
# Tracy does not use TBB directly, but the implementation of parallel algorithms
|
||||
# in some versions of libstdc++ depends on TBB. When it does, you must
|
||||
# explicitly link against -ltbb.
|
||||
#
|
||||
# Some distributions have pgk-config files for TBB, others don't.
|
||||
ifeq (0,$(shell pkg-config --libs tbb >/dev/null 2>&1; echo $$?))
|
||||
LIBS += $(shell pkg-config --libs tbb)
|
||||
else ifeq (0,$(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?))
|
||||
LIBS += -ltbb
|
||||
endif
|
||||
|
||||
OBJDIRBASE := obj/$(BUILD)
|
||||
OBJDIR := $(OBJDIRBASE)/o/o/o
|
||||
|
||||
OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
|
||||
OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
|
||||
OBJ3 := $(addprefix $(OBJDIR)/,$(SRC3:%.m=%.o))
|
||||
OBJ4 := $(addprefix $(OBJDIR)/,$(SRC4:%.S=%.o))
|
||||
|
||||
all: $(IMAGE)
|
||||
|
||||
$(OBJDIR)/%.o: %.cpp
|
||||
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.cpp
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(OBJDIR)/%.o: %.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.c
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(OBJDIR)/%.o: %.m
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.m
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(OBJDIR)/%.o: %.S
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
$(OBJDIR)/%.d : %.S
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
ifeq (yes,$(SHARED_LIBRARY))
|
||||
$(IMAGE): $(OBJ) $(OBJ2) $(OBJ4)
|
||||
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ4) $(LIBS) -shared -o $@
|
||||
else
|
||||
$(IMAGE): $(OBJ) $(OBJ2) $(OBJ3) $(OBJ4)
|
||||
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ3) $(OBJ4) $(LIBS) -o $@
|
||||
endif
|
||||
|
||||
ifneq "$(MAKECMDGOALS)" "clean"
|
||||
-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) $(addprefix $(OBJDIR)/,$(SRC2:.c=.d)) $(addprefix $(OBJDIR)/,$(SRC3:.m=.d)) $(addprefix $(OBJDIR)/,$(SRC4:.S=.d))
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJDIRBASE) $(IMAGE)*
|
||||
|
||||
.PHONY: clean all
|
||||
@@ -1,16 +0,0 @@
|
||||
all: release
|
||||
|
||||
debug:
|
||||
@+make -f debug.mk all
|
||||
|
||||
release:
|
||||
@+make -f release.mk all
|
||||
|
||||
clean:
|
||||
@+make -f build.mk clean
|
||||
|
||||
db: clean
|
||||
@bear -- $(MAKE) -f debug.mk all
|
||||
@mv -f compile_commands.json ../../../
|
||||
|
||||
.PHONY: all clean debug release db
|
||||
@@ -1,12 +0,0 @@
|
||||
CFLAGS +=
|
||||
CXXFLAGS := $(CFLAGS) -std=gnu++17
|
||||
# DEFINES += -DTRACY_NO_STATISTICS
|
||||
INCLUDES := $(shell pkg-config --cflags capstone)
|
||||
LIBS := $(shell pkg-config --libs capstone) -lpthread
|
||||
PROJECT := csvexport
|
||||
IMAGE := $(PROJECT)-$(BUILD)
|
||||
|
||||
FILTER :=
|
||||
include ../../../common/src-from-vcxproj.mk
|
||||
|
||||
include ../../../common/unix.mk
|
||||
@@ -1,6 +0,0 @@
|
||||
CFLAGS := -g3 -Wall
|
||||
DEFINES := -DDEBUG
|
||||
BUILD := debug
|
||||
|
||||
include ../../../common/unix-debug.mk
|
||||
include build.mk
|
||||
@@ -1,9 +0,0 @@
|
||||
CFLAGS := -O3
|
||||
ifndef TRACY_NO_LTO
|
||||
CFLAGS += -flto
|
||||
endif
|
||||
DEFINES := -DNDEBUG
|
||||
BUILD := release
|
||||
|
||||
include ../../../common/unix-release.mk
|
||||
include build.mk
|
||||
@@ -1,25 +0,0 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.30907.101
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "csvexport", "csvexport.vcxproj", "{447D58BF-94CD-4469-BB90-549C05D03E00}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.Build.0 = Debug|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.ActiveCfg = Release|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {3E51386C-43EA-44AC-9F24-AFAFE4D63ADE}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
@@ -1,206 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{447D58BF-94CD-4469-BB90-549C05D03E00}</ProjectGuid>
|
||||
<RootNamespace>capture</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
<VcpkgTriplet>x64-windows-static</VcpkgTriplet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<PropertyGroup Label="Vcpkg">
|
||||
<VcpkgEnableManifest>true</VcpkgEnableManifest>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<PreprocessorDefinitions>_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpplatest</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include;$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include\capstone;$(VcpkgManifestRoot)\vcpkg_installed\$(VcpkgTriplet)\$(VcpkgTriplet)\include\capstone;$(VcpkgRoot)\installed\$(VcpkgTriplet)\include\capstone</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\debug\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
<LanguageStandard>stdcpplatest</LanguageStandard>
|
||||
<AdditionalIncludeDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include;$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include\capstone;$(VcpkgManifestRoot)\vcpkg_installed\$(VcpkgTriplet)\$(VcpkgTriplet)\include\capstone;$(VcpkgRoot)\installed\$(VcpkgTriplet)\include\capstone</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\TracySocket.cpp" />
|
||||
<ClCompile Include="..\..\..\common\TracyStackFrames.cpp" />
|
||||
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp" />
|
||||
<ClCompile Include="..\..\..\getopt\getopt.c" />
|
||||
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyMmap.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyPrint.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyWorker.cpp" />
|
||||
<ClCompile Include="..\..\..\zstd\common\debug.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\entropy_common.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\error_private.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\fse_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\pool.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\threading.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\xxhash.c" />
|
||||
<ClCompile Include="..\..\..\zstd\common\zstd_common.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\fse_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\hist.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\huf_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_literals.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_sequences.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_superblock.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_double_fast.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_fast.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_lazy.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_ldm.c" />
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_opt.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\huf_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\src\csvexport.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyAlloc.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyColor.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyStackFrames.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp" />
|
||||
<ClInclude Include="..\..\..\getopt\getopt.h" />
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyFileRead.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyMemory.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyMmap.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyPrint.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracySlab.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyVector.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp" />
|
||||
<ClInclude Include="..\..\..\zstd\common\bitstream.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\compiler.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\cpu.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\debug.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\error_private.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\fse.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\huf.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\mem.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\pool.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\portability_macros.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\threading.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\xxhash.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_deps.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_trace.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\clevels.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\hist.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_literals.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_sequences.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_superblock.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_cwksp.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_double_fast.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_fast.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_lazy.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm_geartab.h" />
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_opt.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h" />
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h" />
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h" />
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
@@ -1,359 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="src">
|
||||
<UniqueIdentifier>{729c80ee-4d26-4a5e-8f1f-6c075783eb56}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="server">
|
||||
<UniqueIdentifier>{cf23ef7b-7694-4154-830b-00cf053350ea}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="common">
|
||||
<UniqueIdentifier>{e39d3623-47cd-4752-8da9-3ea324f964c1}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="getopt">
|
||||
<UniqueIdentifier>{ee9737d2-69c7-44da-b9c7-539d18f9d4b4}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd">
|
||||
<UniqueIdentifier>{44ea1742-0fd9-40ba-879c-031868509600}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\common">
|
||||
<UniqueIdentifier>{2d065bba-d78e-4e44-87f7-b44cf3248260}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\compress">
|
||||
<UniqueIdentifier>{a19ca3bc-2a17-49c5-a0d9-5916213de774}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\decompress">
|
||||
<UniqueIdentifier>{d4181058-2198-4931-ae31-b7eda0312458}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="zstd\dictBuilder">
|
||||
<UniqueIdentifier>{873c22fe-b4d7-480d-ad67-48271296f4c1}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\TracySocket.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\TracySystem.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyMemory.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyWorker.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyPrint.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyMmap.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\getopt\getopt.c">
|
||||
<Filter>getopt</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\csvexport.cpp">
|
||||
<Filter>src</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\debug.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\entropy_common.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\error_private.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\fse_decompress.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\pool.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\threading.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\xxhash.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\common\zstd_common.c">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\decompress\huf_decompress.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\fse_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\hist.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\huf_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_literals.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_sequences.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_superblock.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_double_fast.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_fast.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_lazy.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_ldm.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstd_opt.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\TracyStackFrames.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyAlloc.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyColor.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyForceInline.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyProtocol.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyQueue.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracySocket.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyEvent.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyMemory.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracySlab.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyVector.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyAlign.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyPrint.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyFileRead.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyMmap.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\getopt\getopt.h">
|
||||
<Filter>getopt</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zstd_errors.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\bitstream.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\compiler.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\cpu.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\debug.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\error_private.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\fse.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\huf.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\mem.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\pool.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\threading.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\xxhash.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_deps.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_internal.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\zstd_trace.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\hist.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_internal.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_literals.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_sequences.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_superblock.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_cwksp.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_double_fast.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_fast.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_lazy.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm_geartab.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstd_opt.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\zdict.h">
|
||||
<Filter>zstd</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h">
|
||||
<Filter>zstd\dictBuilder</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyStackFrames.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\common\portability_macros.h">
|
||||
<Filter>zstd\common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\zstd\compress\clevels.h">
|
||||
<Filter>zstd\compress</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S">
|
||||
<Filter>zstd\decompress</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -1,311 +0,0 @@
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../../server/TracyFileRead.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
#include "../../getopt/getopt.h"
|
||||
|
||||
void print_usage_exit(int e)
|
||||
{
|
||||
fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
|
||||
fprintf(stderr, "Usage:\n");
|
||||
fprintf(stderr, " extract [OPTION...] <trace file>\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " -h, --help Print usage\n");
|
||||
fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n");
|
||||
fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n");
|
||||
fprintf(stderr, " -c, --case Case sensitive filtering\n");
|
||||
fprintf(stderr, " -e, --self Get self times\n");
|
||||
fprintf(stderr, " -u, --unwrap Report each zone event\n");
|
||||
|
||||
exit(e);
|
||||
}
|
||||
|
||||
struct Args {
|
||||
const char* filter;
|
||||
const char* separator;
|
||||
const char* trace_file;
|
||||
bool case_sensitive;
|
||||
bool self_time;
|
||||
bool unwrap;
|
||||
};
|
||||
|
||||
Args parse_args(int argc, char** argv)
|
||||
{
|
||||
if (argc == 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
Args args = { "", ",", "", false, false, false };
|
||||
|
||||
struct option long_opts[] = {
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "filter", optional_argument, NULL, 'f' },
|
||||
{ "sep", optional_argument, NULL, 's' },
|
||||
{ "case", no_argument, NULL, 'c' },
|
||||
{ "self", no_argument, NULL, 'e' },
|
||||
{ "unwrap", no_argument, NULL, 'u' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
int c;
|
||||
while ((c = getopt_long(argc, argv, "hf:s:ceu", long_opts, NULL)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'h':
|
||||
print_usage_exit(0);
|
||||
break;
|
||||
case 'f':
|
||||
args.filter = optarg;
|
||||
break;
|
||||
case 's':
|
||||
args.separator = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
args.case_sensitive = true;
|
||||
break;
|
||||
case 'e':
|
||||
args.self_time = true;
|
||||
break;
|
||||
case 'u':
|
||||
args.unwrap = true;
|
||||
break;
|
||||
default:
|
||||
print_usage_exit(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc != optind + 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
args.trace_file = argv[optind];
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
bool is_substring(
|
||||
const char* term,
|
||||
const char* s,
|
||||
bool case_sensitive = false
|
||||
){
|
||||
auto new_term = std::string(term);
|
||||
auto new_s = std::string(s);
|
||||
|
||||
if (!case_sensitive) {
|
||||
std::transform(
|
||||
new_term.begin(),
|
||||
new_term.end(),
|
||||
new_term.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
|
||||
std::transform(
|
||||
new_s.begin(),
|
||||
new_s.end(),
|
||||
new_s.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
}
|
||||
|
||||
return new_s.find(new_term) != std::string::npos;
|
||||
}
|
||||
|
||||
const char* get_name(int32_t id, const tracy::Worker& worker)
|
||||
{
|
||||
auto& srcloc = worker.GetSourceLocation(id);
|
||||
return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string join(const T& v, const char* sep) {
|
||||
std::ostringstream s;
|
||||
for (const auto& i : v) {
|
||||
if (&i != &v[0]) {
|
||||
s << sep;
|
||||
}
|
||||
s << i;
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
|
||||
// From TracyView.cpp
|
||||
int64_t GetZoneChildTimeFast(
|
||||
const tracy::Worker& worker,
|
||||
const tracy::ZoneEvent& zone
|
||||
){
|
||||
int64_t time = 0;
|
||||
if( zone.HasChildren() )
|
||||
{
|
||||
auto& children = worker.GetZoneChildren( zone.Child() );
|
||||
if( children.is_magic() )
|
||||
{
|
||||
auto& vec = *(tracy::Vector<tracy::ZoneEvent>*)&children;
|
||||
for( auto& v : vec )
|
||||
{
|
||||
assert( v.IsEndValid() );
|
||||
time += v.End() - v.Start();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( auto& v : children )
|
||||
{
|
||||
assert( v->IsEndValid() );
|
||||
time += v->End() - v->Start();
|
||||
}
|
||||
}
|
||||
}
|
||||
return time;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (!AttachConsole(ATTACH_PARENT_PROCESS))
|
||||
{
|
||||
AllocConsole();
|
||||
SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07);
|
||||
}
|
||||
#endif
|
||||
|
||||
Args args = parse_args(argc, argv);
|
||||
|
||||
auto f = std::unique_ptr<tracy::FileRead>(
|
||||
tracy::FileRead::Open(args.trace_file)
|
||||
);
|
||||
if (!f)
|
||||
{
|
||||
fprintf(stderr, "Could not open file %s\n", args.trace_file);
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto worker = tracy::Worker(*f);
|
||||
|
||||
while (!worker.AreSourceLocationZonesReady())
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
|
||||
auto& slz = worker.GetSourceLocationZones();
|
||||
tracy::Vector<decltype(slz.begin())> slz_selected;
|
||||
slz_selected.reserve(slz.size());
|
||||
|
||||
uint32_t total_cnt = 0;
|
||||
for(auto it = slz.begin(); it != slz.end(); ++it)
|
||||
{
|
||||
if(it->second.total != 0)
|
||||
{
|
||||
++total_cnt;
|
||||
if(args.filter[0] == '\0')
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto name = get_name(it->first, worker);
|
||||
if(is_substring(args.filter, name, args.case_sensitive))
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char*> columns;
|
||||
if (args.unwrap)
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "ns_since_start", "exec_time_ns"
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "total_ns", "total_perc",
|
||||
"counts", "mean_ns", "min_ns", "max_ns", "std_ns"
|
||||
};
|
||||
}
|
||||
std::string header = join(columns, args.separator);
|
||||
printf("%s\n", header.data());
|
||||
|
||||
const auto last_time = worker.GetLastTime();
|
||||
for(auto& it : slz_selected)
|
||||
{
|
||||
std::vector<std::string> values(columns.size());
|
||||
|
||||
values[0] = get_name(it->first, worker);
|
||||
|
||||
const auto& srcloc = worker.GetSourceLocation(it->first);
|
||||
values[1] = worker.GetString(srcloc.file);
|
||||
values[2] = std::to_string(srcloc.line);
|
||||
|
||||
const auto& zone_data = it->second;
|
||||
|
||||
if (args.unwrap)
|
||||
{
|
||||
int i = 0;
|
||||
for (const auto& zone_thread_data : zone_data.zones) {
|
||||
const auto zone_event = zone_thread_data.Zone();
|
||||
const auto start = zone_event->Start();
|
||||
const auto end = zone_event->End();
|
||||
|
||||
values[3] = std::to_string(start);
|
||||
|
||||
auto timespan = end - start;
|
||||
if (args.self_time) {
|
||||
timespan -= GetZoneChildTimeFast(worker, *zone_event);
|
||||
}
|
||||
values[4] = std::to_string(timespan);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto time = args.self_time ? zone_data.selfTotal : zone_data.total;
|
||||
values[3] = std::to_string(time);
|
||||
values[4] = std::to_string(100. * time / last_time);
|
||||
|
||||
values[5] = std::to_string(zone_data.zones.size());
|
||||
|
||||
const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
|
||||
/ zone_data.zones.size();
|
||||
values[6] = std::to_string(avg);
|
||||
|
||||
const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
|
||||
const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max;
|
||||
values[7] = std::to_string(tmin);
|
||||
values[8] = std::to_string(tmax);
|
||||
|
||||
const auto sz = zone_data.zones.size();
|
||||
const auto ss = zone_data.sumSq
|
||||
- 2. * zone_data.total * avg
|
||||
+ avg * avg * sz;
|
||||
const auto std = sqrt(ss / (sz - 1));
|
||||
values[9] = std::to_string(std);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
BIN
doc/profiler.png
BIN
doc/profiler.png
Binary file not shown.
|
Before Width: | Height: | Size: 261 KiB After Width: | Height: | Size: 284 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 156 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 100 KiB |
@@ -1,17 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(OpenCLVectorAdd)
|
||||
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
find_package(OpenCL REQUIRED)
|
||||
|
||||
add_executable(OpenCLVectorAdd OpenCLVectorAdd.cpp)
|
||||
|
||||
add_library(TracyClient STATIC ../../TracyClient.cpp
|
||||
../../TracyOpenCL.hpp)
|
||||
target_include_directories(TracyClient PUBLIC ../../)
|
||||
target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE=1)
|
||||
|
||||
target_link_libraries(OpenCLVectorAdd PUBLIC OpenCL::OpenCL TracyClient ${CMAKE_DL_LIBS} Threads::Threads)
|
||||
@@ -1,220 +0,0 @@
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <Tracy.hpp>
|
||||
#include <TracyOpenCL.hpp>
|
||||
|
||||
#define CL_ASSERT(err) \
|
||||
if((err) != CL_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << "OpenCL Call Returned " << err << std::endl; \
|
||||
assert(false); \
|
||||
}
|
||||
|
||||
const char kernelSource[] =
|
||||
" void __kernel vectorAdd(global float* C, global float* A, global float* B, int N) "
|
||||
" { "
|
||||
" int i = get_global_id(0); "
|
||||
" if (i < N) { "
|
||||
" C[i] = A[i] + B[i]; "
|
||||
" } "
|
||||
" } ";
|
||||
|
||||
int main()
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_device_id device;
|
||||
cl_context context;
|
||||
cl_command_queue commandQueue;
|
||||
cl_kernel vectorAddKernel;
|
||||
cl_program program;
|
||||
cl_int err;
|
||||
cl_mem bufferA, bufferB, bufferC;
|
||||
|
||||
TracyCLCtx tracyCLCtx;
|
||||
|
||||
{
|
||||
ZoneScopedN("OpenCL Init");
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
CL_ASSERT(clGetPlatformIDs(0, nullptr, &numPlatforms));
|
||||
|
||||
if (numPlatforms == 0)
|
||||
{
|
||||
std::cerr << "Cannot find OpenCL platform to run this application" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
CL_ASSERT(clGetPlatformIDs(1, &platform, nullptr));
|
||||
|
||||
size_t platformNameBufferSize = 0;
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, nullptr, &platformNameBufferSize));
|
||||
std::string platformName(platformNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformNameBufferSize, &platformName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Platform: " << platformName << std::endl;
|
||||
|
||||
CL_ASSERT(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr));
|
||||
size_t deviceNameBufferSize = 0;
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, &deviceNameBufferSize));
|
||||
std::string deviceName(deviceNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, deviceNameBufferSize, &deviceName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Device: " << deviceName << std::endl;
|
||||
|
||||
err = CL_SUCCESS;
|
||||
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
size_t kernelSourceLength = sizeof(kernelSource);
|
||||
const char* kernelSourceArray = { kernelSource };
|
||||
program = clCreateProgramWithSource(context, 1, &kernelSourceArray, &kernelSourceLength, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
if (clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr) != CL_SUCCESS)
|
||||
{
|
||||
size_t programBuildLogBufferSize = 0;
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &programBuildLogBufferSize));
|
||||
std::string programBuildLog(programBuildLogBufferSize, '\0');
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, programBuildLogBufferSize, &programBuildLog[0], nullptr));
|
||||
std::clog << programBuildLog << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vectorAddKernel = clCreateKernel(program, "vectorAdd", &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
commandQueue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
CL_ASSERT(err);
|
||||
}
|
||||
|
||||
tracyCLCtx = TracyCLContext(context, device);
|
||||
|
||||
size_t N = 10 * 1024 * 1024 / sizeof(float); // 10MB of floats
|
||||
std::vector<float> hostA, hostB, hostC;
|
||||
|
||||
{
|
||||
ZoneScopedN("Host Data Init");
|
||||
hostA.resize(N);
|
||||
hostB.resize(N);
|
||||
hostC.resize(N);
|
||||
|
||||
std::iota(std::begin(hostA), std::end(hostA), 0.0f);
|
||||
std::iota(std::begin(hostB), std::end(hostB), 0.0f);
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Host to Device Memory Copy");
|
||||
|
||||
bufferA = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferB = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferC = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
cl_event writeBufferAEvent, writeBufferBEvent;
|
||||
{
|
||||
ZoneScopedN("Write Buffer A");
|
||||
TracyCLZoneS(tracyCLCtx, "Write BufferA", 5);
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferA, CL_FALSE, 0, N * sizeof(float), hostA.data(), 0, nullptr, &writeBufferAEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferAEvent);
|
||||
}
|
||||
{
|
||||
ZoneScopedN("Write Buffer B");
|
||||
TracyCLZone(tracyCLCtx, "Write BufferB");
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferB, CL_FALSE, 0, N * sizeof(float), hostB.data(), 0, nullptr, &writeBufferBEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferBEvent);
|
||||
}
|
||||
}
|
||||
|
||||
cl_int clN = static_cast<cl_int>(N);
|
||||
const int numFrames = 10;
|
||||
const int launchsPerFrame = 10;
|
||||
constexpr int numLaunchs = numFrames * launchsPerFrame;
|
||||
std::vector<cl_event> kernelLaunchEvts;
|
||||
kernelLaunchEvts.reserve(numLaunchs);
|
||||
for (int i = 0; i < numFrames; ++i)
|
||||
{
|
||||
FrameMark;
|
||||
for (int j = 0; j < launchsPerFrame; ++j) {
|
||||
ZoneScopedN("VectorAdd Kernel Launch");
|
||||
TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4);
|
||||
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(cl_int), &clN));
|
||||
|
||||
cl_event vectorAddKernelEvent;
|
||||
CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent));
|
||||
TracyCLZoneSetEvent(vectorAddKernelEvent);
|
||||
CL_ASSERT(clRetainEvent(vectorAddKernelEvent));
|
||||
kernelLaunchEvts.push_back(vectorAddKernelEvent);
|
||||
std::cout << "VectorAdd Kernel Enqueued" << std::endl;
|
||||
}
|
||||
{
|
||||
// Wait frame events to be finished
|
||||
ZoneScopedN("clFinish");
|
||||
CL_ASSERT(clFinish(commandQueue));
|
||||
}
|
||||
// You should collect on each 'frame' ends, so that streaming can be achieved.
|
||||
TracyCLCollect(tracyCLCtx);
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Device to Host Memory Copy");
|
||||
TracyCLZone(tracyCLCtx, "Read Buffer C");
|
||||
|
||||
cl_event readbufferCEvent;
|
||||
CL_ASSERT(clEnqueueReadBuffer(commandQueue, bufferC, CL_TRUE, 0, N * sizeof(float), hostC.data(), 0, nullptr, &readbufferCEvent));
|
||||
TracyCLZoneSetEvent(readbufferCEvent);
|
||||
}
|
||||
|
||||
CL_ASSERT(clFinish(commandQueue));
|
||||
std::vector<float> durations(kernelLaunchEvts.size());
|
||||
for (int i=0; i<kernelLaunchEvts.size(); i++) {
|
||||
cl_event evt = kernelLaunchEvts[i];
|
||||
cl_ulong start;
|
||||
cl_ulong end;
|
||||
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr));
|
||||
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr));
|
||||
CL_ASSERT(clReleaseEvent(evt));
|
||||
durations[i] = (end - start) * 0.001f;
|
||||
std::cout << "VectorAdd Kernel " << i << " tooks " << static_cast<int>(durations[i]) << "us" << std::endl;
|
||||
};
|
||||
float avg = std::accumulate(durations.cbegin(), durations.cend(), 0.0f) / durations.size();
|
||||
float stddev2 = std::accumulate(durations.cbegin(), durations.cend(), 0.0f, [avg](const float& acc, const float& v) {
|
||||
auto d = v - avg;
|
||||
return acc + d*d;
|
||||
}) / (durations.size() - 1.0f);
|
||||
std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl;
|
||||
|
||||
// User should ensure all events are finished, in this case, collect after the clFinish will do the trick.
|
||||
TracyCLCollect(tracyCLCtx);
|
||||
|
||||
{
|
||||
ZoneScopedN("Checking results");
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
assert(hostC[i] == hostA[i] + hostB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Results are correct!" << std::endl;
|
||||
|
||||
TracyCLDestroy(tracyCLCtx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
https://github.com/aras-p/ToyPathTracer
|
||||
|
||||
Modified to render only 10 frames. Client part requires 12 GB, server part
|
||||
requires 6.4 GB.
|
||||
Modified to render only 10 frames. Client part requires 8 GB, server part
|
||||
requires 7 GB.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.30907.101
|
||||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.27130.2036
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TestCpu", "TestCpu.vcxproj", "{4F84B756-87F5-4B92-827B-DA087DAE1900}"
|
||||
EndProject
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user