Compare commits

..

7 Commits

Author SHA1 Message Date
Bartosz Taudul
7d475c3022 Reverse drawing order. 2019-08-06 17:08:54 +02:00
Bartosz Taudul
e7acf05a71 Better layout. 2019-08-06 16:29:01 +02:00
Bartosz Taudul
41c45e2d2b Draw mesh debug. 2019-08-06 16:20:40 +02:00
Bartosz Taudul
d4db8e2512 Save/load mesh data. 2019-08-06 11:44:05 +02:00
Bartosz Taudul
a25f541297 Associate mesh data with frames. 2019-08-05 16:58:20 +02:00
Bartosz Taudul
abacf6302b Push mesh triangles into staging vector. 2019-08-05 16:49:45 +02:00
Bartosz Taudul
5d673eb8f2 Add 2D mesh debug interface. 2019-08-05 16:47:20 +02:00
380 changed files with 53417 additions and 365865 deletions

23
.appveyor.yml Normal file
View File

@@ -0,0 +1,23 @@
version: '{build}'
platform:
- x64
image:
- Visual Studio 2019
- Ubuntu1804
install:
- cmd: cd c:\tools\vcpkg
- cmd: git pull
- cmd: bootstrap-vcpkg.bat
- cmd: vcpkg install freetype glfw3 --triplet x64-windows-static
- cmd: vcpkg integrate install
- cmd: cd %APPVEYOR_BUILD_FOLDER%
build_script:
- cmd: msbuild .\update\build\win32\update.vcxproj
- cmd: msbuild .\profiler\build\win32\Tracy.vcxproj
- cmd: msbuild .\capture\build\win32\capture.vcxproj
- sh: sudo apt-get update && sudo apt-get -y install libglfw3-dev libgtk2.0-dev
- sh: make -C update/build/unix debug release
- sh: make -C profiler/build/unix debug release
- sh: make -C capture/build/unix debug release
- sh: make -C test
test: off

1
.github/FUNDING.yml vendored
View File

@@ -1 +0,0 @@
github: wolfpld

BIN
.github/sponsor.png vendored

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

View File

@@ -1,44 +0,0 @@
name: gcc
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-22.04, macOS-latest]
steps:
- uses: actions/checkout@v2
- name: Install linux libraries
if: ${{ matrix.os == 'ubuntu-22.04' }}
run: sudo apt-get update && sudo apt-get -y install libglfw3-dev libdbus-1-dev libcapstone-dev libtbb-dev libdebuginfod-dev
- name: Install macos libraries
if: ${{ matrix.os == 'macOS-latest' }}
run: brew install capstone tbb pkg-config glfw
- name: Profiler GUI
run: make -j -C profiler/build/unix debug release
- name: Update utility
run: make -j -C update/build/unix debug release
- name: Capture utility
run: make -j -C capture/build/unix debug release
- name: Csvexport utility
run: make -j -C csvexport/build/unix debug release
- name: Import-chrome utility
run: make -j -C import-chrome/build/unix debug release
- name: Library
run: make -j -C library/unix debug release
- name: Test application
run: |
make -j -C test
make -j -C test clean
make -j -C test TRACYFLAGS=-DTRACY_ON_DEMAND
make -j -C test clean
make -j -C test TRACYFLAGS="-DTRACY_DELAYED_INIT -DTRACY_MANUAL_LIFETIME"

View File

@@ -1,28 +0,0 @@
name: Manual
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Fix stupidity
run: |
cp AUTHORS AUTHORS.
cp LICENSE LICENSE.
- name: Compile LaTeX
uses: xu-cheng/latex-action@v2
with:
working_directory: manual
root_file: tracy.tex
- uses: actions/upload-artifact@v2
with:
name: manual
path: manual/tracy.pdf

View File

@@ -1,72 +0,0 @@
name: MSVC
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build-with-vcpkg-integration:
runs-on: windows-2022
steps:
- uses: actions/checkout@v2
- uses: microsoft/setup-msbuild@v1.0.2
- name: Integrate vcpkg
run: vcpkg integrate install
- name: Profiler GUI Debug
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Debug /property:Platform=x64
- name: Profiler GUI Release
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Release /property:Platform=x64
- name: Update utility Debug
run: msbuild .\update\build\win32\update.vcxproj /property:Configuration=Debug /property:Platform=x64
- name: Update utility Release
run: msbuild .\update\build\win32\update.vcxproj /property:Configuration=Release /property:Platform=x64
- name: Capture utility Debug
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Debug /property:Platform=x64
- name: Capture utility Release
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Release /property:Platform=x64
- name: Csvexport utility Debug
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Debug /property:Platform=x64
- name: Csvexport utility Release
run: msbuild .\csvexport\build\win32\csvexport.vcxproj /property:Configuration=Release /property:Platform=x64
- name: Import-chrome utility Debug
run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Debug /property:Platform=x64
- name: Import-chrome utility Release
run: msbuild .\import-chrome\build\win32\import-chrome.vcxproj /property:Configuration=Release /property:Platform=x64
- name: Library
run: msbuild .\library\win32\TracyProfiler.vcxproj /property:Configuration=Release /property:Platform=x64
- name: Package binaries
run: |
mkdir bin
mkdir bin\dev
copy profiler\build\win32\x64\Release\Tracy.exe bin
copy update\build\win32\x64\Release\update.exe bin
copy capture\build\win32\x64\Release\capture.exe bin
copy import-chrome\build\win32\x64\Release\import-chrome.exe bin
copy csvexport\build\win32\x64\Release\csvexport.exe bin
copy library\win32\x64\Release\TracyProfiler.dll bin\dev
copy library\win32\x64\Release\TracyProfiler.lib bin\dev
7z a Tracy.7z bin
- uses: actions/upload-artifact@v2
with:
path: Tracy.7z
build-without-vcpkg-integration:
runs-on: windows-2022
env:
VCPKG_ROOT: ''
steps:
- uses: actions/checkout@v2
- uses: microsoft/setup-msbuild@v1.0.2
- name: Install vcpkg dependencies
run: vcpkg/install_vcpkg_dependencies.bat
- name: Profiler GUI Release
run: msbuild .\profiler\build\win32\Tracy.vcxproj /property:Configuration=Release /property:Platform=x64
- name: Capture utility Release
run: msbuild .\capture\build\win32\capture.vcxproj /property:Configuration=Release /property:Platform=x64

20
.gitignore vendored
View File

@@ -5,12 +5,8 @@
x64
Release
Debug
_build
_compiler
tools/*
*.d
*.o
*.so
*.swp
imgui.ini
test/tracy_test
@@ -22,21 +18,5 @@ manual/t*.out
manual/t*.pdf
manual/t*.synctex.gz
manual/t*.toc
manual/t*.bbl
manual/t*.blg
profiler/build/win32/packages
profiler/build/win32/Tracy.aps
# include the vcpkg install script but not the files it produces
vcpkg/*
!vcpkg/install_vcpkg_dependencies.bat
/vcpkg_installed
.deps/
.dirstamp
.vscode/
/_*/**
/**/__pycache__/**
extra/vswhere.exe
extra/tracy-build
/.cache
compile_commands.json

View File

@@ -1,17 +0,0 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"/usr/include/freetype2"
],
"defines": [],
"compilerPath": "/usr/bin/clang++",
"cStandard": "c11",
"cppStandard": "c++17",
"intelliSenseMode": "clang-x64"
}
],
"version": 4
}

58
.vscode/launch.json vendored
View File

@@ -1,58 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Profiler GUI",
"type": "lldb",
"request": "launch",
"program": "${workspaceFolder}/profiler/build/unix/Tracy-debug",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"preLaunchTask": "Build Profiler GUI",
"console": "internalConsole",
"internalConsoleOptions": "neverOpen"
},
{
"name": "Launch capture tool",
"type": "lldb",
"request": "launch",
"program": "${workspaceFolder}/capture/build/unix/capture-debug",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"preLaunchTask": "Build capture tool"
},
{
"name": "Launch update tool",
"type": "lldb",
"request": "launch",
"program": "${workspaceFolder}/update/build/unix/update-debug",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"preLaunchTask": "Build update tool"
},
{
"name": "Launch test application",
"type": "lldb",
"request": "launch",
"program": "${workspaceFolder}/test/tracy_test",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}/test",
"environment": [],
"externalConsole": false,
"preLaunchTask": "Build test application"
},
]
}

88
.vscode/tasks.json vendored
View File

@@ -1,88 +0,0 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "Build Profiler GUI",
"type": "shell",
"command": "CC=clang CXX=clang++ make debug -C profiler/build/unix -j `nproc`",
"group": {
"kind": "build",
"isDefault": true
},
"presentation": {
"close": true
}
},
{
"label": "Build capture tool",
"type": "shell",
"command": "CC=clang CXX=clang++ make debug -C capture/build/unix -j `nproc`",
"group": {
"kind": "build",
"isDefault": true
},
"presentation": {
"close": true
}
},
{
"label": "Build csvexport tool",
"type": "shell",
"command": "CC=clang CXX=clang++ make debug -C csvexport/build/unix -j `nproc`",
"group": {
"kind": "build",
"isDefault": true
},
"presentation": {
"close": true
}
},
{
"label": "Build import-chrome tool",
"type": "shell",
"command": "CC=clang CXX=clang++ make debug -C import-chrome/build/unix -j `nproc`",
"group": {
"kind": "build",
"isDefault": true
}
},
{
"label": "Build update tool",
"type": "shell",
"command": "CC=clang CXX=clang++ make debug -C update/build/unix -j `nproc`",
"group": {
"kind": "build",
"isDefault": true
},
"presentation": {
"close": true
}
},
{
"label": "Build client library",
"type": "shell",
"command": "CC=clang CXX=clang++ make debug -C library/unix -j `nproc`",
"group": {
"kind": "build",
"isDefault": true
},
"presentation": {
"close": true
}
},
{
"label": "Build test application",
"type": "shell",
"command": "CC=clang CXX=clang++ make OPTFLAGS=-DTRACY_VERBOSE\\ -fno-omit-frame-pointer\\ -march=native\\ -g -C test -j `nproc`",
"group": {
"kind": "build",
"isDefault": true
},
"presentation": {
"close": true
}
}
]
}

14
AUTHORS
View File

@@ -1,4 +1,4 @@
Bartosz Taudul <wolf@nereid.pl>
Bartosz Taudul <wolf.pld@gmail.com>
Kamil Klimek <kamil.klimek@sharkbits.com> (initial find zone implementation)
Bartosz Szreder <zgredder@gmail.com> (view/worker split)
Arvid Gerstmann <dev@arvid-g.de> (compatibility fixes)
@@ -6,15 +6,3 @@ Rokas Kupstys <rokups@zoho.com> (compatibility fixes, initia
Till Rathmann <till.rathmann@gmx.de> (DLL support)
Sherief Farouk <sherief.personal@gmail.com> (compatibility fixes)
Dedmen Miller <dedmen@dedmen.de> (find zone bug fixes, improvements)
Michał Cichoń <michcic@gmail.com> (OSX call stack decoding backport)
Thales Sabino <thales@codeplay.com> (OpenCL support)
Andrew Depke <andrewdepke@gmail.com> (Direct3D 12 support)
Simonas Kazlauskas <git@kazlauskas.me> (OSX CI, external bindings)
Jakub Žádník <kubouch@gmail.com> (csvexport utility)
Andrey Voroshilov <andrew.voroshilov@gmail.com> (multi-DLL fixes)
Benoit Jacob <benoitjacob@google.com> (Android improvements)
David Farrel <dafarrel@adobe.com> (Direct3D 11 support)
Terence Rokop <rokopt@sharpears.net> (Non-reentrant zones)
Lukas Berbuer <lukas.berbuer@gmail.com> (CMake integration)
Xavier Bouchoux <xavierb@gmail.com> (sample data in find zone)
Balazs Kovacsics <kovab93@gmail.com> (Universal Windows Platform)

View File

@@ -1,132 +0,0 @@
cmake_minimum_required(VERSION 3.10)
project(Tracy LANGUAGES CXX)
find_package(Threads REQUIRED)
add_library(TracyClient TracyClient.cpp)
target_compile_features(TracyClient PUBLIC cxx_std_11)
target_include_directories(TracyClient SYSTEM PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<INSTALL_INTERFACE:include>)
target_link_libraries(
TracyClient
PUBLIC
Threads::Threads
${CMAKE_DL_LIBS}
)
# Public dependency on some libraries required when using Mingw
if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} MATCHES "GNU")
target_link_libraries(TracyClient PUBLIC ws2_32 dbghelp)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
find_library(EXECINFO_LIBRARY NAMES execinfo REQUIRED)
target_link_libraries(TracyClient PUBLIC ${EXECINFO_LIBRARY})
endif()
add_library(Tracy::TracyClient ALIAS TracyClient)
macro(set_option option help value)
option(${option} ${help} ${value})
if(${option})
message(STATUS "${option}: ON")
target_compile_definitions(TracyClient PUBLIC ${option})
else()
message(STATUS "${option}: OFF")
endif()
endmacro()
set_option(TRACY_ENABLE "Enable profiling" ON)
set_option(TRACY_ON_DEMAND "On-demand profiling" OFF)
set_option(TRACY_CALLSTACK "Enfore callstack collection for tracy regions" OFF)
set_option(TRACY_NO_CALLSTACK "Disable all callstack related functionality" OFF)
set_option(TRACY_NO_CALLSTACK_INLINES "Disables the inline functions in callstacks" OFF)
set_option(TRACY_ONLY_LOCALHOST "Only listen on the localhost interface" OFF)
set_option(TRACY_NO_BROADCAST "Disable client discovery by broadcast to local network" OFF)
set_option(TRACY_ONLY_IPV4 "Tracy will only accept connections on IPv4 addresses (disable IPv6)" OFF)
set_option(TRACY_NO_CODE_TRANSFER "Disable collection of source code" OFF)
set_option(TRACY_NO_CONTEXT_SWITCH "Disable capture of context switches" OFF)
set_option(TRACY_NO_EXIT "Client executable does not exit until all profile data is sent to server" OFF)
set_option(TRACY_NO_SAMPLING "Disable call stack sampling" OFF)
set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF)
set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF)
set_option(TRACY_NO_FRAME_IMAGE "Disable the frame image support and its thread" OFF)
set_option(TRACY_NO_SYSTEM_TRACING "Disable systrace sampling" OFF)
set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF)
set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF)
set_option(TRACY_FIBERS "Enable fibers support" OFF)
set_option(TRACY_NO_CRASH_HANDLER "Disable crash handling" OFF)
set_option(TRACY_TIMER_FALLBACK "Use lower resolution timers" OFF)
if(BUILD_SHARED_LIBS)
target_compile_definitions(TracyClient PRIVATE TRACY_EXPORTS)
target_compile_definitions(TracyClient PUBLIC TRACY_IMPORTS)
endif()
include(CMakePackageConfigHelpers)
include(GNUInstallDirs)
set(includes
${CMAKE_CURRENT_LIST_DIR}/TracyC.h
${CMAKE_CURRENT_LIST_DIR}/Tracy.hpp
${CMAKE_CURRENT_LIST_DIR}/TracyD3D11.hpp
${CMAKE_CURRENT_LIST_DIR}/TracyD3D12.hpp
${CMAKE_CURRENT_LIST_DIR}/TracyLua.hpp
${CMAKE_CURRENT_LIST_DIR}/TracyOpenCL.hpp
${CMAKE_CURRENT_LIST_DIR}/TracyOpenGL.hpp
${CMAKE_CURRENT_LIST_DIR}/TracyVulkan.hpp)
set(client_includes
${CMAKE_CURRENT_LIST_DIR}/client/tracy_concurrentqueue.h
${CMAKE_CURRENT_LIST_DIR}/client/tracy_rpmalloc.hpp
${CMAKE_CURRENT_LIST_DIR}/client/tracy_SPSCQueue.h
${CMAKE_CURRENT_LIST_DIR}/client/TracyArmCpuTable.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyCallstack.h
${CMAKE_CURRENT_LIST_DIR}/client/TracyCallstack.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyDebug.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyDxt1.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyFastVector.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyLock.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyProfiler.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyRingBuffer.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyScoped.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyStringHelpers.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracySysTime.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracySysTrace.hpp
${CMAKE_CURRENT_LIST_DIR}/client/TracyThread.hpp)
set(common_includes
${CMAKE_CURRENT_LIST_DIR}/common/tracy_lz4.hpp
${CMAKE_CURRENT_LIST_DIR}/common/tracy_lz4hc.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyAlign.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyAlign.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyAlloc.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyApi.h
${CMAKE_CURRENT_LIST_DIR}/common/TracyColor.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyForceInline.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyMutex.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyProtocol.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyQueue.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracySocket.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyStackFrames.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracySystem.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyUwp.hpp
${CMAKE_CURRENT_LIST_DIR}/common/TracyYield.hpp)
install(TARGETS TracyClient
EXPORT TracyConfig
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(FILES ${includes}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(FILES ${client_includes}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/client)
install(FILES ${common_includes}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common)
install(EXPORT TracyConfig
NAMESPACE Tracy::
FILE TracyConfig.cmake
DESTINATION share/Tracy)

77
FAQ.md Normal file
View File

@@ -0,0 +1,77 @@
# A quick tracy FAQ
### I already use VTune/perf/Very Sleepy/callgrind/MSVC profiler.
These are statistical profilers, which can be used to find hot spots in the code. This is very useful, but it won't show you the underlying reason for semi-random frame stutter that may occur every couple of seconds.
### You can use Telemetry for that.
Telemetry license costs about 8000 $ per year. Tracy is open source software. Telemetry doesn't have Lua bindings.
### You can use the free Brofiler. Crytek does use it, so it has to be good.
After a cursory look at the Brofiler code I can tell that the timer resolution there is at 300 ns. Tracy can achieve 5 ns timer resolution. Brofiler event logging infrastructure seems to be over-engineered. Brofiler can't track lock contention, nor does it have Lua bindings.
### So tracy is supposedly faster?
My measurements show that logging a single zone with tracy takes only 15 ns. In theory, if the program was doing nothing else, tracy should be able to log 66 million zones per second.
### Bullshit, RAD is advertising that they are able only to log about a million zones, over the network nevertheless: "Capture over a million timing zones per second in real-time!"
Tracy can perform network transfer of 15 million zones per second. Should the client and server be on separate machines, this number will be even higher, but you will need more than a gigabit link to achieve the maximum throughput. [Click here for a video of a max-throughput capture.](https://www.youtube.com/watch?v=DSMIHShKGAc)
### Can I connect to my application at any time and start profiling at this moment?
By default no, all events are registered from the beginning of program execution and are waiting in a queue. There's a separate on-demand mode, enabled by using a `TRACY_ON_DEMAND` macro.
### Am I seeing correctly that the profiler allocates one gigabyte of memory per second?
Only in extreme cases. Normal usage has much lower memory pressure.
### Why do you do magic with the static initialization order? Everyone says that's a bad practice.
It allows tracking construction of static objects and memory allocations performed before main() is entered.
### There's no support for consoles.
Welp. But there's mobile support.
### I do need console support.
The code is open. Write your own, then send a patch.
### I don't believe you can capture a zone in 15 ns. Show me the code!
Following is the annotated assembly code (generated from C++ sources) that's responsible for logging start of the zone:
```
call qword ptr [__imp_GetCurrentThreadId]
mov r14d,eax
mov qword ptr [rsp+0F0h],r14 // save thread id for later use
mov r12d,10h
mov rax,qword ptr gs:[58h] // TLS
mov r15,qword ptr [rax] // queue address
mov rdi,qword ptr [r12+r15] // data address
mov rbp,qword ptr [rdi+20h] // buffer counter
mov rbx,rbp
and ebx,7Fh // 128 item buffer
jne Application::InnerLoop+66h --+
mov rdx,rbp |
mov rcx,rdi |
call enqueue_begin_alloc | // reclaim/alloc next buffer
shl rbx,5 <---------------------+ // buffer items are 32 bytes
add rbx,qword ptr [rdi+40h]
mov byte ptr [rbx],4 // queue item type
rdtscp
mov dword ptr [rbx+19h],ecx // cpu id
shl rdx,20h
or rax,rdx // 64 bit timestamp
mov qword ptr [rbx+1],rax
mov qword ptr [rbx+9],r14 // thread id
lea rax,[__tracy_source_location] // static struct address
mov qword ptr [rbx+11h],rax
lea rax,[rbp+1] // increment buffer counter
mov qword ptr [rdi+20h],rax
```
There's also a second code block, for the end of the zone. It's similar, but a bit smaller, as it can use some of the variables that were retrieved above.

View File

@@ -1,7 +1,7 @@
Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the
Tracy Profiler (https://bitbucket.org/wolfpld/tracy) is licensed under the
3-clause BSD license.
Copyright (c) 2017-2022, Bartosz Taudul <wolf@nereid.pl>
Copyright (c) 2017-2019, Bartosz Taudul <wolf.pld@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without

517
NEWS
View File

@@ -2,516 +2,13 @@ Note: There is no guarantee that version mismatched client and server will
be able to talk with each other. Network protocol breakages won't be listed
here.
v0.8.2 (2022-06-28)
-------------------
Note: Release numbers are nothing more than numbers. There are some
"missing" versions due to trace file changes during development. This is not
a mistake.
- Added support for debuginfod debug information services. Note that
since this depends on proper system configuration, vendors providing
the debug information, and network retrieval, it is disabled by
default. To enable, compile the profiled application with the
TRACY_DEBUGINFOD define and link with libdebuginfod.
- When Tracy server-side utilities are build with MSVC, the required
libraries will be now automatically retrieved and built with vcpkg.
- Added microarchitecture data for: Bonnell, Airmont, Goldmont, Goldmont
Plus, Tremont.
- Recognize additional CPUIDs of Zen 3, Alder Lake, Ice Lake
microarchitectures.
- Assembly line width will be now extended, if needed. Previously the line
width was calculated for the initial layout and changing amount of
displayed data (especially listing the read/written registers) didn't
affect this, which may have made some lines partially unreadable.
- Added ability to filter call stacks in memory tab by inactive allocations.
Filtering by inactive allocations helps to pinpoint wasteful allocations
in the program.
- Plot graph will no longer display min/max values interpolated for
animation, but rather true values.
- The CPU topology tree structure was replaced by a CPU schematic showing
the same thing in a more concise way.
v0.8.1 (2022-04-21)
-------------------
- Support for pre-0.7 traces has been dropped.
- Update utility can now scan for source files missing in the trace cache,
if the '-c' parameter is given. Found files will be added to the cache.
- Added high-priority queue for fast queries to bypass slow symbol queries.
- Fixed Android documentation to show how to enable context switch tracing.
- Workaround MSVC 2015 stupidity which prevented compilation as C++11.
- Added support for showing branch cost data for CPUs that don't report
branch retirement events (but do report branch misses).
- The right-click context menu available for jump arrows in the symbol view
window will now additionally display jump context, i.e. jump sources and
jump target source code fragments.
- Added freedesktop.org compliant desktop entry and MIME type definition.
- The call stack column in list of messages will now be only displayed when
at least one message on the list has call stack data.
- File dialogs on Unix will be now native to the desktop environment you are
using. Note that this relies on xdg-desktop-portal and dbus.
v0.8.0 (2022-03-28)
-------------------
- Support for Cygwin has been dropped. It was not working for a very long
time and nobody had complained about it.
- Mingw is deprecated due to lack of interest.
- Added TRACY_NO_CALLSTACK_INLINES macro to disable inline functions
resolution in call stacks on Windows.
- Improved function matching algorithm in compare traces view.
- Added CMake integration.
- Reworked rpmalloc initialization.
- Fixed display of messages with newlines on messages list.
- Excluded some uninteresting wrapper functions from call stacks (for
example SIMD pass-through intrinsics to the compiler built-ins).
- Adjusted coloring of instruction hotness in symbol view.
- Properly handle rare cases when sampling on Linux is momentary not able to
resolve time stamps.
- Added Rocket Lake microarchitectural data.
- Updated CPU identifier lists.
- Implemented GPU timer overflow handling heuristics.
- Assembly instructions are now assigned to inline symbols.
- You can not only see the assembly source file and line, but also the
originating function.
- If symbol view is restricted to a single inline function, all assembly
instructions not in this context will be dimmed out.
- Likewise, the navigation in assembly code will be limited just to the
inline context, if a single function is selected.
- Kernel call stacks will be now properly captured and displayed in the
profiler. Kernel functions are marked with the red color.
- The CPU hardware performance counters can be now sampled on Linux.
- Three inferred statistics are displayed for lines in both source and
assembly code in the symbol view window:
- Instructions executed per cycle.
- Branch miss rate.
- Cache miss rate.
- Instruction cost estimation method is no longer tied to software call
stack sampling.
- The image name filter entry field is now providing a list of available
images.
- Reentrant function calls may be now excluded from calculations in the
statistics view.
- Crash handler is now properly removed during profiler destruction.
- Repeatedly right-clicking on the same source line in the symbol view
window will now cycle through assembly blocks associated with this source
line.
- Vulkan headers must be now explicitly included before including
TracyVulkan.hpp.
- The capture utility may now limit capture time to a specified number of
seconds.
- Fixed message thread assignment in the import-chrome utility.
- Sampling data can be now also found in the find zone menu.
- Instrumentation failures may now display their context, e.g. the zone text
that was to be set.
- A warning is now displayed when sampling data is out-of-order.
- Average value for plots can be now viewed.
- Moved symbol resolution to a separate thread. Profiling will no longer be
stuck when there is a large number of symbols to resolve. This not only
improves user experience, but also prevents buildup of data (and memory
consumption) on the client side.
- Android device name will be now reported.
- Added support for capturing fibers.
- Fibers require additional processing, which has to be enabled by adding
the TRACY_FIBERS define on the client side.
- Client code requires additional instrumentation using the new macros
TracyFiberEnter and TracyFiberLeave (or the corresponding C API
variants).
- Fibers are represented in traces as separate threads, and are
distinguished by green color. Faux context switch regions are used to
indicate when a fiber is being run by the worker thread.
- Continuous frame marks no longer need to be issued from a single thread.
- Context switch call stacks are now captured on Windows and Linux.
- Hovering the context switch wait region will now display wait stack,
which may provide additional insight into why the switch happened.
- Wait stacks inspection can be performed in a new view.
- Stacks can be limited to certain threads and to a selected time range.
- Stacks are presented either as a sorted list, or as a bottom-up and
top-down trees.
- Entry call stacks can be now also viewed as a bottom-up and top-down
trees.
- Updated project build files to MSVC 2022.
- Call stack tooltips now also show the executable image name.
- Playback frames can be now changed by interacting with the frame image
slider using the mouse wheel.
- Signal used to handle crashes on Linux can be now redefined.
- Various DPI scaling improvements.
- User interface can be now scaled in run time.
- Symbol code retrieval now also supports kernel on Windows.
- Added low-level C API interface for GPU zones.
- Symbol child calls can be now listed.
- Replaced "restrict time" in memory window with a proper time range limit.
- Added Alder Lake microarchitectural data.
- Added GPU zone statistics.
- Universal Windows Platform support.
- All call stack related functionality can be now disabled with the
TRACY_NO_CALLSTACK macro.
- Added ability to add full-view annotations from the annotations list
window.
v0.7.8 (2021-05-19)
-------------------
- Updated Zen 3 and added Tiger Lake microarchitectural data.
- Manually disconnecting from the server will no longer display erroneous
warning message.
- Added ability to display sample time spent in child function calls.
- Fixed issue which may have prevented sampling on ARM64.
- Added TRACY_NO_FRAME_IMAGE macro to disable frame image compression
thread.
- Ctrl and shift keys will now modify mouse wheel zoom speed.
- Improved user experience in the symbol view window.
- Added support for Direct3D 11 instrumentation.
- Vulkan contexts can be now calibrated on Linux.
- Support loading zstd-compressed chrome traces.
- Chrome traces with multiple PID entries (and possibly conflicting TIDs)
can be now imported.
- Added support for custom source location tag ("loc") in chrome traces.
- Sampling frequency can be now controlled using TRACY_SAMPLING_HZ macro.
- Trace compression can be now selected when saving a trace.
- If a trace cannot be saved, a failure dialog will be displayed.
- Run-time memory usage of frame images can be reduced by calculating
a compression dictionary. This can be only performed when a trace is saved
or through the update utility.
v0.7.7 (2021-04-01)
-------------------
- Linux crash handler will now also catch SIGABRT.
- Fixed invalid name assignment to source files discovered client-side.
- Added ability to check if a zone is active (which may be used to avoid
preparing zone text, etc., as it wouldn't be used anyway).
- Improved sorting behavior of internal vectors.
- Some data will now be always properly displayed during live capture.
This was not particularly visible before, as it mainly concerns edge
cases.
- Sorting is performed only as needed.
- In case of plots the performance during live capture may be decreased,
as these were sorted with at least 0.25 second intervals before. Now
the sorting is performed every frame.
- Some other data, which previously was not sorted, is sorted now.
- In headless capture mode sorting will be only performed when the trace
is saved to disk.
- Fixed some typos in macros.
- Fixed handling of non-ANSI file names on Windows. You can now name your
traces 'ęśąćż.tracy' and it should work as intended. This is supported on
Windows 10 release 1903 and newer.
- Fixed sending GPU context name in on-demand mode.
- Fixed color channel order in ZoneColor() macro.
- Handle failure state when a memory pointer allocation is reported twice,
without an intermediate free.
- Renamed "call stack parents" to "entry call stacks".
- Display number of entry call stacks in assembly line sample count tooltip.
- Added tooltips with preview of source code in various places in the UI.
v0.7.6 (2021-02-06)
-------------------
- Various fixes in build scripts.
- Fixed a faulty rpmalloc initialization path when the first thing the
thread did was sending a message with call stack.
- Added fallback timer define for various virtualized environments, which
may not be able to access the hardware timer registers. This will result
in usage of timer provided by the standard library, with reduced
resolution.
- Further OpenCL improvements.
- Updated libbacktrace.
- Adds Mach-O 64-bit FAT support.
- Fixes memory corruption when processing Mach-O data.
- Fixes missing matching entries during binary search.
- Adds support for MiniDebugInfo.
- Adds fallback to ELF symbol table if no debug info is available.
- Various other fixes.
- Store build time of profiled program in captures.
- GPU contexts can be now named.
- Implemented client -> server source code transfer.
v0.7.5 (2021-01-23)
-------------------
- More robust handling of system tracing on Android.
- Added warning dialog when the connection is lost before all needed data
can be retrieved.
- Fixed handling of NaN plot entries (by skipping them).
- Dynamic zone colors are now supported through the ZoneColor() macro.
- Fixed Arm machine code printout to match the one printed by objdump.
- Fixed client memory corruption when using colored messages.
- Switched to the next-gen ImGui table UI.
- Table columns can have their order rearranged, can be hidden, can be
sorted both in ascending and descending order (where appropriate).
- Table columns state is now preserved between runs.
- Various fixes related to restricting listening to localhost.
- Improved compatibility of ETW tracing with non-MSVC compilers.
- Fixed Vulkan call stack transfer.
- Added support for transient GPU zones (OpenGL, Vulkan, Direct3D 12).
- OpenCL fixes for assert-less builds and non-active zones.
- Added support for thread names and title bar description in traces
imported from chrome tracing format.
v0.7.4 (2020-11-15)
-------------------
- Added support for user-provided locks to keep dbghelp calls thread-safe.
- Call stacks can be now copied to clipboard.
- Allow more control over which automated captures are performed.
- Added textual descriptions for some assembly instructions.
- Profiler memory usage is now also displayed as a percentage of available
physical memory.
- Microarchitecture mismatch is now clearly displayed in the source view
window.
- Added Zen 3 and Cascade Lake microarchitectural data.
- Ghost zones are now supporting all zone coloring modes and namespace
shortening.
- Extend C API to support memory pools.
- Frame rate targets can be now visually represented on the timeline view.
v0.7.3 (2020-10-06)
-------------------
- Properly support DPI scaling on Linux (requires GLFW 3.3).
- Added early checks for output file validity in the capture utility.
- Improvements to presence broadcast handling.
- Custom zone colors can be optionally ignored.
- Added support for tracking multiple memory pools.
- Memory free failure dialog can now show call stack pointing to the failure
location.
- Added support for Wayland on Linux.
- If during the first 5 seconds of the trace there are no frames being
reported, the profiler will switch to following last 5 seconds of the
trace, instead of displaying three last frames.
v0.7.2 (2020-09-14)
-------------------
- Note: the bitbucket repository is obsolete and will soon stop receiving
updates. Migrate to https://github.com/wolfpld/tracy, if you haven't
already.
- The "waiting for connection" dialog no longer has "cancel" button. To
abort connection attempt just use the "close window" button.
- Added update notification.
- The most recent traced events can be now viewed regardless of timeline
zoom level.
- Fixed going-to-line in source view (again).
- Crash handling on client is now not performed, if there is no active
connection.
- Added ability to listen only on IPv4 interfaces.
v0.7.1 (2020-08-24)
-------------------
- Dropped support for pre-v0.6 traces.
- Fixed regression on non-AVX2 CPUs.
- Fixed incorrect calculation of some ghost zones.
- Added list of cached source files.
- Added import of plot data.
- Secure versions of alloc/free macros.
- Automated tracing of vertical synchronization on Windows.
- Fixed attachment of postponed frame images.
- Source location data can be now copied to clipboard from zone info window.
- Zones in find zones menu can be now grouped by zone name.
- Vulkan and D3D12 GPU contexts can be now calibrated.
- Added CSV export utility.
- "Go to frame" popup no longer has a dedicated button. To show it, click on
the frame counter.
- Added macro for checking if profiler is connected.
- Implemented optional data removal from traces in the update utility.
- Allow manual management of profiler lifetime.
- Adjusted priority of ETW threads to time critical.
- Annotations can be now freely adjusted on the timeline.
- Limiting time range for find zone functionality has been significantly
improved.
- Added time range limits for statistics and symbol view.
- Implemented call stack sampling on Linux (including Android).
- Exact time from start of profiling session can be now viewed by hovering
the mouse over the time scale.
- Code transfer can be now compiled-out.
- Added support for zone markup in unloadable modules.
- Added image name filter to sampling statistics results window.
v0.7 (2020-06-11)
v0.5 (xxxx-xx-xx)
-----------------
This is the last release which will be able to load pre-v0.6 traces. Use the
update utility to convert your old traces now!
- chrome:tracing importer now imports zone metadata from "args" key.
- Added display of statistical mode to find zone menu.
- Automatic stack sampling is now available on windows.
- Properly handle tracing on long-running systems.
- Message list entries can now show associated frame image.
- Call stack window will now display module names.
- Symbol location in call stack window may now also display symbol address.
- Statistics menu can now be used to display call stack sampling data or
list available symbols.
- All call paths leading to the sampled instruction in a call stack can be
now displayed.
- Frame image compression ratio (lossless in-memory compression, not taking
into account DXT compression) is displayed in playback window.
- Allow reconnection straight from the discard data dialog.
- Added ability to set custom names for locks.
- Improved handling of network ports.
- Added time percentage display to instrumentation statistics.
- Display of ghost zones (generated from automated call stack sampling).
- Notify when empty labels display is enabled.
- Small fragments of executable code will be now sent from client to server.
- Added notification about query backlog.
- Fixed performance problem with query backlog.
- Display number of in-flight queries, in addition to query backlog.
- Improved failure reports.
- The capture utility will connect to localhost by default.
- Added optional support for QPC timer on windows.
- Complete rewrite of source file viewer. It is now 100% reliable when going
to a source location.
- Symbol source view was added.
- Extension of source file viewer.
- Can display source file, assembly view, or both at the same time.
- May include display of statistical profiling data.
- Ability to switch between source files which were used to build the
symbol.
- Ability to switch between inlined functions which are incorporated into
the symbol.
- Graphical representation of control flow in program.
- Display of micro-architectural data for each assembly instruction.
- Tracking register dependencies between assembly instructions.
- Disassembly may be saved to a file, in order to be processed by external
tools.
- If the default listening port is occupied, profiler will now try listening
on other ports.
- Added possibility to perform source file names substitution.
- Profiler windows can be now docked.
- CPU usage tooltip now displays a list of running threads.
- Added possibility to filter discovered clients list.
- Source files are now cached during capture.
- Profiler will now display a popup when application crashes.
- Added ability to send simple integral values as extra payload for zones.
- Per-frame zone times on the frames plot can now display self time.
- Ability to bind only on localhost interface.
- OpenCL profiling.
- Direct3D 12 profiling.
v0.6.3 (2020-02-13)
-------------------
- Fixed performance issues with loading saved traces on Ryzen CPUs.
- Profiler window contents are now properly updated during window resize.
- Improved tid to pid mapping on windows.
- Zero length and unfinished zones are no longer taken into account for
statistics.
- Build files for shared library are now available (experimental).
- GPU zones now also have "active" parameter.
- Further reduction of memory usage and on-disk trace size.
- Replaced ska::flat_hash_map with robin-hood-hashing.
- Speed-up rendering of long lists of items.
- Exact event time is displayed in some places in the UI.
- Memory allocation lists can now be sorted.
- Added display of trace file compression ratio.
- Optional Zstd compression of trace files.
- Frame images are now internally compressed using Zstd (instead of LZ4).
- Fix display of continuous frame set tooltips.
v0.6.2 (2019-12-30)
-------------------
- Improved call stack decoding on OSX.
- Collection of CPU topology data.
- C API now supports allocated source locations.
- Added chrome:tracing importer.
- Allow merging of ZoneText() strings.
- Time distribution can now show both exclusive and inclusive times.
- Display proper value of selection time in find zone menu.
- Implemented limiting find zone search to a specified time range.
- Highlight hovered zone from find zone menu zone list on the histogram.
- Allow copying user data directory location to the clipboard.
v0.6.1 (2019-11-28)
-------------------
- Dropped support for pre-v0.5 traces.
- Improve BSD support.
- GPU zone CPU thread highlight will now highlight whole thread, not only
the thread name.
- Added CPU thread highlight for CPU data items.
- Client parameters may be now set from the server.
- Minor UI fixes.
v0.6 (2019-11-17)
-----------------
This is the last release which will be able to load pre-v0.5 traces. Use the
update utility to convert your old traces now!
- Dropped support for pre-v0.4 traces.
- Major memory usage decrease.
- Significant network bandwidth decrease.
- Implemented context switch capture on selected platforms.
- Zone timings in various UI places can now take into account only the
time when the thread was executing.
- Zone information window can now display regions in which thread was
suspended by the operating system.
- CPUs on which the zone was running are enumerated.
- Thread activity regions can be graphed on the timeline.
- API breakage: SetThreadName() now only works on current thread.
- Fixed thread name retrieval after thread is destroyed.
- Added number of CPU cores to host info.
- Limited number of possible source locations to 64K.
- Limited supported capture length to 1.6 days.
- CPU cores are now displayed on the timeline.
- Thread execution workload is displayed, including threads from external
programs.
- Thread migrations across CPU cores can be graphed.
- System-wide workload distribution is now plotted on the timeline.
- Added "CPU data" window showing programs competing for CPU during the
capture.
- Switched to using native thread identifiers (relatively small numbers), as
opposed to pthreads identifiers, which in reality were pointers.
- Improved thread name discovery if context switch capture is enabled.
- Per-trace state is now preserved between profiling sessions:
- Timeline view position.
- Item categories draw/hide settings.
- Timeline zones will be highlighted using a different color, when a
matching time range is selected on histogram.
- Per-frame zone times are now displayed on the frames plot when a zone is
selected in the find zone menu.
- Zone color is now displayed in zone information window.
- Zone colors can now be determined basing on depth and thread or source
location.
- Thread colors are displayed across the profiler application.
- Frame times can be now compared.
- Expose more lock handling functionality.
- Network port can be now specified by the user.
- Proper handling of multithreaded Vulkan code.
- Added extreme compression level in update utility.
- Added time distribution data in the zone information window.
- Trace file name is now displayed in trace information window.
- Annotations can be now added to the timeline.
- Server now performs network data retrieval and decompression on a dedicated
thread.
- Added examples of Tracy integration.
- Allow grouping of zones in the find zone menu by zone parent or with no
grouping.
- Zone list in the statistics window can be now filtered.
- Implemented configuration of plots.
- Messages can now collect call stacks.
v0.5 (2019-08-10)
-----------------
This is the last release which will be able to load pre-v0.4 traces. Use the
update utility to convert your old traces now!
- Major decrease of trace dump file size.
- Major optimizations across the board.
- Vcpkg is now used for library management on Windows.
@@ -593,7 +90,6 @@ update utility to convert your old traces now!
- GPU drift value can be now automatically measured.
- Connection window is now a popup hidden under a dedicated button.
v0.4.1 (2018-12-30)
-------------------
@@ -621,7 +117,6 @@ v0.4.1 (2018-12-30)
- Pressing enter key after entering client address in the welcome dialog
will now automatically begin connection process.
v0.4 (2018-10-09)
-----------------
@@ -753,8 +248,8 @@ v0.4 (2018-10-09)
- The capture utility will now display time span of the ongoing capture.
v0.3 (2018-07-03)
-----------------
v0.3.3 (2018-07-03)
-------------------
- Breaking change: the format of trace files has changed.
- Previous tracy version will crash when trying to open new traces.

View File

@@ -1,25 +1,73 @@
# Tracy Profiler
[![Sponsor](.github/sponsor.png)](https://github.com/sponsors/wolfpld/)
[![Build status](https://ci.appveyor.com/api/projects/status/968a88arq06gm3el/branch/master?svg=true)](https://ci.appveyor.com/project/wolfpld/tracy/branch/master)
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
Tracy supports profiling CPU (Direct support is provided for C, C++, and Lua integration. At the same time, third-party bindings to many other languages exist on the internet, such as Rust, Zig, OCaml, Odin, etc.), GPU (All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, OpenCL.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
- [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
- [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
- [Changelog](NEWS)
Tracy is a real time, nanosecond resolution frame profiler that can be used for remote or embedded telemetry of your application. It can profile CPU (C, C++11, Lua), GPU (OpenGL, Vulkan) and memory. It also can display locks held by threads and their interactions with each other.
![](doc/profiler.png)
![](doc/profiler2.png)
The following compilers are supported:
![](doc/profiler3.png)
- MSVC
- gcc
- clang
The following platforms are confirmed to be working (this is not a complete list):
- Windows (x86, x64)
- Linux (x86, x64, ARM, ARM64)
- Android (ARM, x86)
- FreeBSD (x64)
- Cygwin (x64)
- WSL (x64)
- OSX (x64)
[Introduction to Tracy Profiler v0.2](https://www.youtube.com/watch?v=fB5B46lbapc)
[New features in Tracy Profiler v0.3](https://www.youtube.com/watch?v=3SXpDpDh2Uo)
[New features in Tracy Profiler v0.4](https://www.youtube.com/watch?v=eAkgkaO8B9o)
[New features in Tracy Profiler v0.5](https://www.youtube.com/watch?v=P6E7qLMmzTQ)
[New features in Tracy Profiler v0.6](https://www.youtube.com/watch?v=uJkrFgriuOo)
[New features in Tracy Profiler v0.7](https://www.youtube.com/watch?v=_hU7vw00MZ4)
[New features in Tracy Profiler v0.8](https://www.youtube.com/watch?v=30wpRpHTTag)
[New features in Tracy Profiler v0.4](https://www.youtube.com/watch?v=eAkgkaO8B9o)
[A quick FAQ.](FAQ.md)
[List of changes.](NEWS)
### High-level overview
![](doc/design.svg)
Tracy is split into client and server side. The client side collects events using a high-efficiency queue and awaits for an incoming connection. The server part connects to client and receives collected data from the client, which is then reconstructed into a viewable timeline. The transfer is performed using a TCP connection.
### Performance impact
To check how much slowdown is introduced by using Tracy, I have profiled [etcpak](https://bitbucket.org/wolfpld/etcpak), which is the fastest ETC texture compression utility there is. I used an 8192×8192 test image as input data and instrumented everything down to the 4×4 pixel block compression function (that's 4 million blocks to compress). It should be noted that Tracy needs to calibrate its internal timers at each run. This introduces a delay of 115 ms (on my machine), which is negligible when doing lengthy profiling runs, but it skews the results of etcpak timing. The following times have this delay subtracted, to give focus on zone collection impact, which is the thing that really matters here.
| Scenario | Zones | Clean run | Profiling run | Difference |
|-------------------------------------------------------|---------|-----------|---------------|------------|
| Compression of an image to ETC1 format | 4194568 | 0.94 s | 1.003 s | +0.063 s |
| Compression of an image to ETC2 format, with mip-maps | 5592822 | 1.034 s | 1.119 s | +0.085 s |
In both scenarios the per-zone time cost is at ~15 ns. This is in line with the measured 8 ns single event collection time (each zone has to report start and end event).
## Usage instructions
The user manual for Tracy is available [at the following address](https://bitbucket.org/wolfpld/tracy/downloads/tracy.pdf). It provides information about the integration process, required code markup and so on.
## Features
#### Histogram of function execution times
![](doc/histogram.png)
#### Comparison of two profiling runs
![](doc/compare.png)
#### Marking locks
![](doc/locks.png)
#### Plotting data
![](doc/plot.png)
#### Message log
![](doc/messages.png)

7
TODO
View File

@@ -1,7 +0,0 @@
"Would be nice to have" list for 1.0 release:
=============================================
* Pack queue items tightly in the queues.
* Use level-of-detail system for plots.
* Use per-thread lock data structures.
* Use DTrace for BSD/OSX context switch capture.

178
Tracy.hpp
View File

@@ -11,24 +11,13 @@
#define ZoneNamedC(x,y,z)
#define ZoneNamedNC(x,y,z,w)
#define ZoneTransient(x,y)
#define ZoneTransientN(x,y,z)
#define ZoneScoped
#define ZoneScopedN(x)
#define ZoneScopedC(x)
#define ZoneScopedNC(x,y)
#define ZoneText(x,y)
#define ZoneTextV(x,y,z)
#define ZoneName(x,y)
#define ZoneNameV(x,y,z)
#define ZoneColor(x)
#define ZoneColorV(x,y)
#define ZoneValue(x)
#define ZoneValueV(x,y)
#define ZoneIsActive false
#define ZoneIsActiveV(x) false
#define FrameMark
#define FrameMarkNamed(x)
@@ -44,10 +33,8 @@
#define LockableBase( type ) type
#define SharedLockableBase( type ) type
#define LockMark(x) (void)x;
#define LockableName(x,y,z);
#define TracyPlot(x,y)
#define TracyPlotConfig(x,y)
#define TracyMessage(x,y)
#define TracyMessageL(x)
@@ -57,22 +44,12 @@
#define TracyAlloc(x,y)
#define TracyFree(x)
#define TracySecureAlloc(x,y)
#define TracySecureFree(x)
#define TracyAllocN(x,y,z)
#define TracyFreeN(x,y)
#define TracySecureAllocN(x,y,z)
#define TracySecureFreeN(x,y)
#define ZoneNamedS(x,y,z)
#define ZoneNamedNS(x,y,z,w)
#define ZoneNamedCS(x,y,z,w)
#define ZoneNamedNCS(x,y,z,w,a)
#define ZoneTransientS(x,y,z)
#define ZoneTransientNS(x,y,z,w)
#define ZoneScopedS(x)
#define ZoneScopedNS(x,y)
#define ZoneScopedCS(x,y)
@@ -80,50 +57,27 @@
#define TracyAllocS(x,y,z)
#define TracyFreeS(x,y)
#define TracySecureAllocS(x,y,z)
#define TracySecureFreeS(x,y)
#define TracyAllocNS(x,y,z,w)
#define TracyFreeNS(x,y,z)
#define TracySecureAllocNS(x,y,z,w)
#define TracySecureFreeNS(x,y,z)
#define TracyMessageS(x,y,z)
#define TracyMessageLS(x,y)
#define TracyMessageCS(x,y,z,w)
#define TracyMessageLCS(x,y,z)
#define TracyParameterRegister(x)
#define TracyParameterSetup(x,y,z,w)
#define TracyIsConnected false
#define TracyFiberEnter(x)
#define TracyFiberLeave
#define TracyMeshTri(v0,v1,v2,v3,v4,v5)
#define TracyMeshEnd
#else
#include <string.h>
#include "client/TracyLock.hpp"
#include "client/TracyProfiler.hpp"
#include "client/TracyScoped.hpp"
#include "client/TracyMesh.hpp"
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, TRACY_CALLSTACK, active );
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
#else
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, active );
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
#endif
#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true )
@@ -132,15 +86,7 @@
#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true )
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size );
#define ZoneTextV( varname, txt, size ) varname.Text( txt, size );
#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size );
#define ZoneNameV( varname, txt, size ) varname.Name( txt, size );
#define ZoneColor( color ) ___tracy_scoped_zone.Color( color );
#define ZoneColorV( varname, color ) varname.Color( color );
#define ZoneValue( value ) ___tracy_scoped_zone.Value( value );
#define ZoneValueV( varname, value ) varname.Value( value );
#define ZoneIsActive ___tracy_scoped_zone.IsActive()
#define ZoneIsActiveV( varname ) varname.IsActive()
#define FrameMark tracy::Profiler::SendFrameMark( nullptr );
#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name );
@@ -149,89 +95,49 @@
#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip );
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define LockableBase( type ) tracy::Lockable<type>
#define SharedLockableBase( type ) tracy::SharedLockable<type>
#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
#define LockableName( varname, txt, size ) varname.CustomName( txt, size );
#define LockMark( varname ) static const tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val );
#define TracyPlotConfig( name, type ) tracy::Profiler::ConfigurePlot( name, type );
#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size );
#define TracyMessageL( txt ) tracy::Profiler::Message( txt );
#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color );
#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color );
#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK );
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK );
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK );
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false );
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false );
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true );
# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true );
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name );
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name );
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name );
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK );
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK );
#else
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 );
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 );
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 );
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false );
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false );
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true );
# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true );
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name );
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name );
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name );
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size );
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr );
#endif
#ifdef TRACY_HAS_CALLSTACK
# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, depth, active );
# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
# define ZoneNamedS( varname, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedCS( varname, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color depth, true )
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false );
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false );
# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true );
# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true );
# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name );
# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name );
# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name );
# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name );
# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth );
# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth );
# define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth );
# define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth );
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth );
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth );
#else
# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active )
# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active )
# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active )
# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active )
# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active )
# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active )
# define ZoneScopedS( depth ) ZoneScoped
# define ZoneScopedNS( name, depth ) ZoneScopedN( name )
# define ZoneScopedCS( color, depth ) ZoneScopedC( color )
@@ -239,28 +145,10 @@
# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size )
# define TracyFreeS( ptr, depth ) TracyFree( ptr )
# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size )
# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr )
# define TracyAllocNS( ptr, size, depth, name ) TracyAlloc( ptr, size, name )
# define TracyFreeNS( ptr, depth, name ) TracyFree( ptr, name )
# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAlloc( ptr, size, name )
# define TracySecureFreeNS( ptr, depth, name ) TracySecureFree( ptr, name )
# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size )
# define TracyMessageLS( txt, depth ) TracyMessageL( txt )
# define TracyMessageCS( txt, size, color, depth ) TracyMessageC( txt, size, color )
# define TracyMessageLCS( txt, color, depth ) TracyMessageLC( txt, color )
#endif
#define TracyParameterRegister( cb ) tracy::Profiler::ParameterRegister( cb );
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val );
#define TracyIsConnected tracy::GetProfiler().IsConnected()
#ifdef TRACY_FIBERS
# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber );
# define TracyFiberLeave tracy::Profiler::LeaveFiber();
#endif
#define TracyMeshTri( x0, y0, x1, y1, x2, y2 ) tracy::mesh::MeshTri( x0, y0, x1, y1, x2, y2 )
#define TracyMeshEnd tracy::mesh::MeshEnd();
#endif

254
TracyC.h
View File

@@ -5,17 +5,11 @@
#include <stdint.h>
#include "client/TracyCallstack.h"
#include "common/TracyApi.h"
#ifdef __cplusplus
extern "C" {
#endif
TRACY_API void ___tracy_set_thread_name( const char* name );
#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
#ifndef TRACY_ENABLE
typedef const void* TracyCZoneCtx;
@@ -27,18 +21,9 @@ typedef const void* TracyCZoneCtx;
#define TracyCZoneEnd(c)
#define TracyCZoneText(c,x,y)
#define TracyCZoneName(c,x,y)
#define TracyCZoneColor(c,x)
#define TracyCZoneValue(c,x)
#define TracyCAlloc(x,y)
#define TracyCFree(x)
#define TracyCSecureAlloc(x,y)
#define TracyCSecureFree(x)
#define TracyCAllocN(x,y,z)
#define TracyCFreeN(x,y)
#define TracyCSecureAllocN(x,y,z)
#define TracyCSecureFreeN(x,y)
#define TracyCFrameMark
#define TracyCFrameMarkNamed(x)
@@ -53,33 +38,6 @@ typedef const void* TracyCZoneCtx;
#define TracyCMessageLC(x,y)
#define TracyCAppInfo(x,y)
#define TracyCZoneS(x,y,z)
#define TracyCZoneNS(x,y,z,w)
#define TracyCZoneCS(x,y,z,w)
#define TracyCZoneNCS(x,y,z,w,a)
#define TracyCAllocS(x,y,z)
#define TracyCFreeS(x,y)
#define TracyCSecureAllocS(x,y,z)
#define TracyCSecureFreeS(x,y)
#define TracyCAllocNS(x,y,z,w)
#define TracyCFreeNS(x,y,z)
#define TracyCSecureAllocNS(x,y,z,w)
#define TracyCSecureFreeNS(x,y,z)
#define TracyCMessageS(x,y,z)
#define TracyCMessageLS(x,y)
#define TracyCMessageCS(x,y,z,w)
#define TracyCMessageLCS(x,y,z)
#define TracyCIsConnected 0
#ifdef TRACY_FIBERS
# define TracyCFiberEnter(fiber)
# define TracyCFiberLeave
#endif
#else
#ifndef TracyConcat
@@ -104,146 +62,52 @@ struct ___tracy_c_zone_context
int active;
};
struct ___tracy_gpu_time_data
{
int64_t gpuTime;
uint16_t queryId;
uint8_t context;
};
struct ___tracy_gpu_zone_begin_data {
uint64_t srcloc;
uint16_t queryId;
uint8_t context;
};
struct ___tracy_gpu_zone_end_data {
uint16_t queryId;
uint8_t context;
};
struct ___tracy_gpu_new_context_data {
int64_t gpuTime;
float period;
uint8_t context;
uint8_t flags;
uint8_t type;
};
struct ___tracy_gpu_context_name_data {
uint8_t context;
const char* name;
uint16_t len;
};
// Some containers don't support storing const types.
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
#ifdef TRACY_MANUAL_LIFETIME
TRACY_API void ___tracy_startup_profiler(void);
TRACY_API void ___tracy_shutdown_profiler(void);
#endif
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz );
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active );
TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx );
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size );
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size );
TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color );
TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value );
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data );
TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data );
TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data );
TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data );
TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data );
TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data );
TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data );
TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data );
TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data );
TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data );
TRACY_API int ___tracy_connected(void);
TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active );
TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active );
void ___tracy_emit_zone_end( TracyCZoneCtx ctx );
void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size );
void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
#else
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,__LINE__), active );
#endif
#define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx );
#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size );
#define TracyCZoneName( ctx, txt, size ) ___tracy_emit_zone_name( ctx, txt, size );
#define TracyCZoneColor( ctx, color ) ___tracy_emit_zone_color( ctx, color );
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure );
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure );
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure );
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure );
TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name );
TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name );
TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name );
TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name );
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack );
TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack );
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack );
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack );
void ___tracy_emit_memory_alloc( const void* ptr, size_t size );
void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth );
void ___tracy_emit_memory_free( const void* ptr );
void ___tracy_emit_memory_free_callstack( const void* ptr, int depth );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name )
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name )
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name )
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name )
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK )
# define TracyCFree( ptr ) ___tracy_emit_memory_alloc_free_callstack( ptr, TRACY_CALLSTACK )
#else
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 );
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 );
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 );
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 );
# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 0, name );
# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 0, name );
# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 1, name );
# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 1, name );
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 );
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 );
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, 0 );
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, 0 );
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size );
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr );
#endif
TRACY_API void ___tracy_emit_frame_mark( const char* name );
TRACY_API void ___tracy_emit_frame_mark_start( const char* name );
TRACY_API void ___tracy_emit_frame_mark_end( const char* name );
TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip );
void ___tracy_emit_frame_mark( const char* name );
void ___tracy_emit_frame_mark_start( const char* name );
void ___tracy_emit_frame_mark_end( const char* name );
void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip );
#define TracyCFrameMark ___tracy_emit_frame_mark( 0 );
#define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name );
@@ -252,63 +116,29 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_
#define TracyCFrameImage( image, width, height, offset, flip ) ___tracy_emit_frame_image( image, width, height, offset, flip );
TRACY_API void ___tracy_emit_plot( const char* name, double val );
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
void ___tracy_emit_plot( const char* name, double val );
void ___tracy_emit_message( const char* txt, size_t size );
void ___tracy_emit_messageL( const char* txt );
void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color );
void ___tracy_emit_messageLC( const char* txt, uint32_t color );
void ___tracy_emit_message_appinfo( const char* txt, size_t size );
#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val );
#define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size );
#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size );
#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt );
#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color );
#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color );
#define TracyCAppInfo( txt, color ) ___tracy_emit_message_appinfo( txt, color );
#ifdef TRACY_HAS_CALLSTACK
# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __func__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
# define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name )
# define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name )
# define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name )
# define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name )
# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
# define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth );
# define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth );
#else
# define TracyCZoneS( ctx, depth, active ) TracyCZone( ctx, active )
# define TracyCZoneNS( ctx, name, depth, active ) TracyCZoneN( ctx, name, active )
# define TracyCZoneCS( ctx, color, depth, active ) TracyCZoneC( ctx, color, active )
# define TracyCZoneNCS( ctx, name, color, depth, active ) TracyCZoneNC( ctx, name, color, active )
# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size )
# define TracyCFreeS( ptr, depth ) TracyCFree( ptr )
# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size )
# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr )
# define TracyCAllocNS( ptr, size, depth, name ) TracyCAllocN( ptr, size, name )
# define TracyCFreeNS( ptr, depth, name ) TracyCFreeN( ptr, name )
# define TracyCSecureAllocNS( ptr, size, depth, name ) TracyCSecureAllocN( ptr, size, name )
# define TracyCSecureFreeNS( ptr, depth, name ) TracyCSecureFreeN( ptr, name )
# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size )
# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt )
# define TracyCMessageCS( txt, size, color, depth ) TracyCMessageC( txt, size, color )
# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color )
#endif
#define TracyCIsConnected ___tracy_connected()
TRACY_API void ___tracy_fiber_enter( const char* fiber );
TRACY_API void ___tracy_fiber_leave( void );
#ifdef TRACY_FIBERS
# define TracyCFiberEnter( fiber ) ___tracy_fiber_enter( fiber );
# define TracyCFiberLeave ___tracy_fiber_leave();
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth )
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_alloc_free_callstack( ptr, depth )
#endif
#endif

View File

@@ -15,42 +15,28 @@
#ifdef TRACY_ENABLE
#ifdef _MSC_VER
# pragma warning(push, 0)
#endif
#include "common/tracy_lz4.cpp"
#include "client/TracyProfiler.cpp"
#include "client/TracyCallstack.cpp"
#include "client/TracySysTime.cpp"
#include "client/TracySysTrace.cpp"
#include "common/TracySocket.cpp"
#include "client/tracy_rpmalloc.cpp"
#include "client/TracyDxt1.cpp"
#include "client/TracyAlloc.cpp"
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3
# include "libbacktrace/alloc.cpp"
# include "libbacktrace/dwarf.cpp"
# include "libbacktrace/elf.cpp"
# include "libbacktrace/fileline.cpp"
# include "libbacktrace/mmapio.cpp"
# include "libbacktrace/posix.cpp"
# include "libbacktrace/sort.cpp"
# include "libbacktrace/state.cpp"
# if TRACY_HAS_CALLSTACK == 4
# include "libbacktrace/macho.cpp"
# else
# include "libbacktrace/elf.cpp"
# endif
# include "common/TracyStackFrames.cpp"
#endif
#ifdef _MSC_VER
# pragma comment(lib, "ws2_32.lib")
# pragma comment(lib, "dbghelp.lib")
# pragma comment(lib, "advapi32.lib")
# pragma comment(lib, "user32.lib")
# pragma warning(pop)
#endif
#endif

19
TracyClientDLL.cpp Normal file
View File

@@ -0,0 +1,19 @@
//
// Tracy profiler
// ----------------
//
// On multi-DLL projects compile and
// link with this source file (and none
// other) in the executable and in
// DLLs / shared objects that link to
// the main DLL.
//
// Define TRACY_ENABLE to enable profiler.
#ifdef TRACY_ENABLE
# ifndef TRACY_IMPORTS
# define TRACY_IMPORTS 1
# endif
# include "common/TracySystem.cpp"
#endif

View File

@@ -1,442 +0,0 @@
#ifndef __TRACYD3D11_HPP__
#define __TRACYD3D11_HPP__
#ifndef TRACY_ENABLE
#define TracyD3D11Context(device,queue) nullptr
#define TracyD3D11Destroy(ctx)
#define TracyD3D11ContextName(ctx, name, size)
#define TracyD3D11NewFrame(ctx)
#define TracyD3D11Zone(ctx, name)
#define TracyD3D11ZoneC(ctx, name, color)
#define TracyD3D11NamedZone(ctx, varname, name, active)
#define TracyD3D11NamedZoneC(ctx, varname, name, color, active)
#define TracyD3D12ZoneTransient(ctx, varname, name, active)
#define TracyD3D11ZoneS(ctx, name, depth)
#define TracyD3D11ZoneCS(ctx, name, color, depth)
#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active)
#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active)
#define TracyD3D12ZoneTransientS(ctx, varname, name, depth, active)
#define TracyD3D11Collect(ctx)
namespace tracy
{
class D3D11ZoneScope {};
}
using TracyD3D11Ctx = void*;
#else
#include <atomic>
#include <assert.h>
#include <stdlib.h>
#include "Tracy.hpp"
#include "client/TracyProfiler.hpp"
#include "client/TracyCallstack.hpp"
#include "common/TracyAlign.hpp"
#include "common/TracyAlloc.hpp"
namespace tracy
{
class D3D11Ctx
{
friend class D3D11ZoneScope;
enum { QueryCount = 64 * 1024 };
public:
D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx )
: m_device( device )
, m_devicectx( devicectx )
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
{
assert( m_context != 255 );
for (int i = 0; i < QueryCount; i++)
{
HRESULT hr = S_OK;
D3D11_QUERY_DESC desc;
desc.MiscFlags = 0;
desc.Query = D3D11_QUERY_TIMESTAMP;
hr |= device->CreateQuery(&desc, &m_queries[i]);
desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
hr |= device->CreateQuery(&desc, &m_disjoints[i]);
m_disjointMap[i] = nullptr;
assert(SUCCEEDED(hr));
}
// Force query the initial GPU timestamp (pipeline stall)
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
UINT64 timestamp;
for (int attempts = 0; attempts < 50; attempts++)
{
devicectx->Begin(m_disjoints[0]);
devicectx->End(m_queries[0]);
devicectx->End(m_disjoints[0]);
devicectx->Flush();
while (devicectx->GetData(m_disjoints[0], &disjoint, sizeof(disjoint), 0) == S_FALSE)
/* Nothing */;
if (disjoint.Disjoint)
continue;
while (devicectx->GetData(m_queries[0], &timestamp, sizeof(timestamp), 0) == S_FALSE)
/* Nothing */;
break;
}
int64_t tgpu = timestamp * (1000000000ull / disjoint.Frequency);
int64_t tcpu = Profiler::GetTime();
uint8_t flags = 0;
const float period = 1.f;
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
~D3D11Ctx()
{
for (int i = 0; i < QueryCount; i++)
{
m_queries[i]->Release();
m_disjoints[i]->Release();
m_disjointMap[i] = nullptr;
}
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_context );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect()
{
ZoneScopedC( Color::Red4 );
if( m_tail == m_head ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() )
{
m_head = m_tail = 0;
return;
}
#endif
auto start = m_tail;
auto end = m_head + QueryCount;
auto cnt = (end - start) % QueryCount;
while (cnt > 1)
{
auto mid = start + cnt / 2;
bool available =
m_devicectx->GetData(m_disjointMap[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK &&
m_devicectx->GetData(m_queries[mid % QueryCount], nullptr, 0, D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK;
if (available)
{
start = mid;
}
else
{
end = mid;
}
cnt = (end - start) % QueryCount;
}
start %= QueryCount;
while (m_tail != start)
{
D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
UINT64 time;
m_devicectx->GetData(m_disjointMap[m_tail], &disjoint, sizeof(disjoint), 0);
m_devicectx->GetData(m_queries[m_tail], &time, sizeof(time), 0);
time *= (1000000000ull / disjoint.Frequency);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, (int64_t)time);
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
MemWrite(&item->gpuTime.context, m_context);
Profiler::QueueSerialFinish();
m_tail = (m_tail + 1) % QueryCount;
}
}
private:
tracy_force_inline unsigned int NextQueryId()
{
const auto id = m_head;
m_head = ( m_head + 1 ) % QueryCount;
assert( m_head != m_tail );
return id;
}
tracy_force_inline ID3D11Query* TranslateQueryId( unsigned int id )
{
return m_queries[id];
}
tracy_force_inline ID3D11Query* MapDisjointQueryId( unsigned int id, unsigned int disjointId )
{
m_disjointMap[id] = m_disjoints[disjointId];
return m_disjoints[disjointId];
}
tracy_force_inline uint8_t GetId() const
{
return m_context;
}
ID3D11Device* m_device;
ID3D11DeviceContext* m_devicectx;
ID3D11Query* m_queries[QueryCount];
ID3D11Query* m_disjoints[QueryCount];
ID3D11Query* m_disjointMap[QueryCount]; // Multiple time queries can have one disjoint query
uint8_t m_context;
unsigned int m_head;
unsigned int m_tail;
};
class D3D11ZoneScope
{
public:
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
m_disjointId = queryId;
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
m_disjointId = queryId;
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
GetProfiler().SendCallstack( depth );
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active&& GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if( !m_active ) return;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
m_disjointId = queryId;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active&& GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if( !m_active ) return;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
ctx->m_devicectx->Begin(ctx->MapDisjointQueryId(queryId, queryId));
ctx->m_devicectx->End(ctx->TranslateQueryId(queryId));
m_disjointId = queryId;
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline ~D3D11ZoneScope()
{
if( !m_active ) return;
const auto queryId = m_ctx->NextQueryId();
m_ctx->m_devicectx->End(m_ctx->TranslateQueryId(queryId));
m_ctx->m_devicectx->End(m_ctx->MapDisjointQueryId(queryId, m_disjointId));
auto* item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
Profiler::QueueSerialFinish();
}
private:
const bool m_active;
D3D11Ctx* m_ctx;
unsigned int m_disjointId;
};
static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx )
{
auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) );
new(ctx) D3D11Ctx( device, devicectx );
return ctx;
}
static inline void DestroyD3D11Context( D3D11Ctx* ctx )
{
ctx->~D3D11Ctx();
tracy_free( ctx );
}
}
using TracyD3D11Ctx = tracy::D3D11Ctx*;
#define TracyD3D11Context( device, devicectx ) tracy::CreateD3D11Context( device, devicectx );
#define TracyD3D11Destroy(ctx) tracy::DestroyD3D11Context(ctx);
#define TracyD3D11ContextName(ctx, name, size) ctx->Name(name, size);
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyD3D11ZoneTransient(ctx, varname, name, active) TracyD3D11ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
#else
# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, ___tracy_gpu_zone, name, true )
# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, ___tracy_gpu_zone, name, color, true )
# define TracyD3D11NamedZone( ctx, varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyD3D11ZoneTransient(ctx, varname, name, active) tracy::D3D11ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), active };
#endif
#ifdef TRACY_HAS_CALLSTACK
# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, ___tracy_gpu_zone, name, depth, true )
# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, ___tracy_gpu_zone, name, color, depth, true )
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D11ZoneScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) tracy::D3D11ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), depth, active };
#else
# define TracyD3D11ZoneS( ctx, name, depth, active ) TracyD3D11Zone( ctx, name )
# define TracyD3D11ZoneCS( ctx, name, color, depth, active ) TracyD3D11ZoneC( name, color )
# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11NamedZone( ctx, varname, name, active )
# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11NamedZoneC( ctx, varname, name, color, active )
# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, name, active)
#endif
#define TracyD3D11Collect( ctx ) ctx->Collect();
#endif
#endif

View File

@@ -1,506 +0,0 @@
#ifndef __TRACYD3D12_HPP__
#define __TRACYD3D12_HPP__
#ifndef TRACY_ENABLE
#define TracyD3D12Context(device, queue) nullptr
#define TracyD3D12Destroy(ctx)
#define TracyD3D12ContextName(ctx, name, size)
#define TracyD3D12NewFrame(ctx)
#define TracyD3D12Zone(ctx, cmdList, name)
#define TracyD3D12ZoneC(ctx, cmdList, name, color)
#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
#define TracyD3D12ZoneS(ctx, cmdList, name, depth)
#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth)
#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active)
#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active)
#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active)
#define TracyD3D12Collect(ctx)
namespace tracy
{
class D3D12ZoneScope {};
}
using TracyD3D12Ctx = void*;
#else
#include "Tracy.hpp"
#include "client/TracyProfiler.hpp"
#include "client/TracyCallstack.hpp"
#include <cstdlib>
#include <cassert>
#include <d3d12.h>
#include <dxgi.h>
#include <wrl/client.h>
#include <queue>
namespace tracy
{
struct D3D12QueryPayload
{
uint32_t m_queryIdStart = 0;
uint32_t m_queryCount = 0;
};
// Command queue context.
class D3D12QueueCtx
{
friend class D3D12ZoneScope;
static constexpr uint32_t MaxQueries = 64 * 1024; // Queries are begin and end markers, so we can store half as many total time durations. Must be even!
bool m_initialized = false;
ID3D12Device* m_device = nullptr;
ID3D12CommandQueue* m_queue = nullptr;
uint8_t m_context;
Microsoft::WRL::ComPtr<ID3D12QueryHeap> m_queryHeap;
Microsoft::WRL::ComPtr<ID3D12Resource> m_readbackBuffer;
// In-progress payload.
uint32_t m_queryLimit = MaxQueries;
uint32_t m_queryCounter = 0;
uint32_t m_previousQueryCounter = 0;
uint32_t m_activePayload = 0;
Microsoft::WRL::ComPtr<ID3D12Fence> m_payloadFence;
std::queue<D3D12QueryPayload> m_payloadQueue;
int64_t m_prevCalibration = 0;
int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() };
public:
D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue)
: m_device(device)
, m_queue(queue)
, m_context(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
{
// Verify we support timestamp queries on this queue.
if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY)
{
D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{};
bool Success = SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData)));
assert(Success && featureData.CopyQueueTimestampQueriesSupported && "Platform does not support profiling of copy queues.");
}
uint64_t timestampFrequency;
if (FAILED(queue->GetTimestampFrequency(&timestampFrequency)))
{
assert(false && "Failed to get timestamp frequency.");
}
uint64_t cpuTimestamp;
uint64_t gpuTimestamp;
if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
{
assert(false && "Failed to get queue clock calibration.");
}
// Save the device cpu timestamp, not the profiler's timestamp.
m_prevCalibration = cpuTimestamp * m_qpcToNs;
cpuTimestamp = Profiler::GetTime();
D3D12_QUERY_HEAP_DESC heapDesc{};
heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
heapDesc.Count = m_queryLimit;
heapDesc.NodeMask = 0; // #TODO: Support multiple adapters.
while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap))))
{
m_queryLimit /= 2;
heapDesc.Count = m_queryLimit;
}
// Create a readback buffer, which will be used as a destination for the query data.
D3D12_RESOURCE_DESC readbackBufferDesc{};
readbackBufferDesc.Alignment = 0;
readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t);
readbackBufferDesc.Height = 1;
readbackBufferDesc.DepthOrArraySize = 1;
readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN;
readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major.
readbackBufferDesc.MipLevels = 1;
readbackBufferDesc.SampleDesc.Count = 1;
readbackBufferDesc.SampleDesc.Quality = 0;
readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
D3D12_HEAP_PROPERTIES readbackHeapProps{};
readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK;
readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
readbackHeapProps.CreationNodeMask = 0;
readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters.
if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer))))
{
assert(false && "Failed to create query readback buffer.");
}
if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence))))
{
assert(false && "Failed to create payload fence.");
}
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp);
MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp);
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast<float>(timestampFrequency));
MemWrite(&item->gpuNewContext.context, m_context);
MemWrite(&item->gpuNewContext.flags, GpuContextCalibration);
MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12);
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem(*item);
#endif
Profiler::QueueSerialFinish();
m_initialized = true;
}
void NewFrame()
{
m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, m_queryCounter });
m_previousQueryCounter += m_queryCounter;
m_queryCounter = 0;
if (m_previousQueryCounter >= m_queryLimit)
{
m_previousQueryCounter -= m_queryLimit;
}
m_queue->Signal(m_payloadFence.Get(), ++m_activePayload);
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_context );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect()
{
ZoneScopedC(Color::Red4);
#ifdef TRACY_ON_DEMAND
if (!GetProfiler().IsConnected())
{
m_queryCounter = 0;
return;
}
#endif
// Find out what payloads are available.
const auto newestReadyPayload = m_payloadFence->GetCompletedValue();
const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload);
if (!payloadCount)
{
return; // No payloads are available yet, exit out.
}
D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) };
// Map the readback buffer so we can fetch the query data from the GPU.
void* readbackBufferMapping = nullptr;
if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping)))
{
assert(false && "Failed to map readback buffer.");
}
auto* timestampData = static_cast<uint64_t*>(readbackBufferMapping);
for (uint32_t i = 0; i < payloadCount; ++i)
{
const auto& payload = m_payloadQueue.front();
for (uint32_t j = 0; j < payload.m_queryCount; ++j)
{
const auto counter = (payload.m_queryIdStart + j) % m_queryLimit;
const auto timestamp = timestampData[counter];
const auto queryId = counter;
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, timestamp);
MemWrite(&item->gpuTime.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuTime.context, m_context);
Profiler::QueueSerialFinish();
}
m_payloadQueue.pop();
}
m_readbackBuffer->Unmap(0, nullptr);
// Recalibrate to account for drift.
uint64_t cpuTimestamp;
uint64_t gpuTimestamp;
if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp)))
{
assert(false && "Failed to get queue clock calibration.");
}
cpuTimestamp *= m_qpcToNs;
const auto cpuDelta = cpuTimestamp - m_prevCalibration;
if (cpuDelta > 0)
{
m_prevCalibration = cpuTimestamp;
cpuTimestamp = Profiler::GetTime();
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuCalibration);
MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp);
MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp);
MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta);
MemWrite(&item->gpuCalibration.context, m_context);
Profiler::QueueSerialFinish();
}
}
private:
tracy_force_inline uint32_t NextQueryId()
{
assert(m_queryCounter < m_queryLimit && "Submitted too many GPU queries! Consider increasing MaxQueries.");
const uint32_t id = (m_previousQueryCounter + m_queryCounter) % m_queryLimit;
m_queryCounter += 2; // Allocate space for a begin and end query.
return id;
}
tracy_force_inline uint8_t GetId() const
{
return m_context;
}
};
class D3D12ZoneScope
{
const bool m_active;
D3D12QueueCtx* m_ctx = nullptr;
ID3D12GraphicsCommandList* m_cmdList = nullptr;
uint32_t m_queryId = 0; // Used for tracking in nested zones.
public:
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active && GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if (!m_active) return;
m_ctx = ctx;
m_cmdList = cmdList;
m_queryId = ctx->NextQueryId();
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active&& GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if (!m_active) return;
m_ctx = ctx;
m_cmdList = cmdList;
m_queryId = ctx->NextQueryId();
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast<uint64_t>(srcLocation));
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active&& GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if (!m_active) return;
m_ctx = ctx;
m_cmdList = cmdList;
m_queryId = ctx->NextQueryId();
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active)
#ifdef TRACY_ON_DEMAND
: m_active(active&& GetProfiler().IsConnected())
#else
: m_active(active)
#endif
{
if (!m_active) return;
m_ctx = ctx;
m_cmdList = cmdList;
m_queryId = ctx->NextQueryId();
cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId);
const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz);
auto* item = Profiler::QueueSerialCallstack(Callstack(depth));
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, static_cast<uint16_t>(m_queryId));
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline ~D3D12ZoneScope()
{
if (!m_active) return;
const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot.
m_cmdList->EndQuery(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, queryId);
auto* item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
MemWrite(&item->gpuZoneEnd.queryId, static_cast<uint16_t>(queryId));
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
Profiler::QueueSerialFinish();
m_cmdList->ResolveQueryData(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer.Get(), m_queryId * sizeof(uint64_t));
}
};
static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue)
{
auto* ctx = static_cast<D3D12QueueCtx*>(tracy_malloc(sizeof(D3D12QueueCtx)));
new (ctx) D3D12QueueCtx{ device, queue };
return ctx;
}
static inline void DestroyD3D12Context(D3D12QueueCtx* ctx)
{
ctx->~D3D12QueueCtx();
tracy_free(ctx);
}
}
using TracyD3D12Ctx = tracy::D3D12QueueCtx*;
#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue);
#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx);
#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size);
#define TracyD3D12NewFrame(ctx) ctx->NewFrame();
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, TRACY_CALLSTACK, true)
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, TRACY_CALLSTACK, true)
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), TRACY_CALLSTACK, active };
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), TRACY_CALLSTACK, active };
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active)
#else
# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, ___tracy_gpu_zone, cmdList, name, true)
# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, ___tracy_gpu_zone, cmdList, name, color, true)
# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), active };
# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), cmdList, active };
#endif
#ifdef TRACY_HAS_CALLSTACK
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, depth, true)
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, depth, true)
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), depth, active };
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, __LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, __LINE__), depth, active };
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, __LINE__, __FILE__, strlen(__FILE__), __FUNCTION__, strlen(__FUNCTION__), name, strlen(name), cmdList, depth, active };
#else
# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name)
# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color)
# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active)
# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active)
# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active)
#endif
#define TracyD3D12Collect(ctx) ctx->Collect();
#endif
#endif

View File

@@ -125,7 +125,6 @@ static inline void LuaRemove( char* script )
#else
#include <assert.h>
#include <limits>
#include "common/TracyColor.hpp"
#include "common/TracyAlign.hpp"
@@ -151,9 +150,9 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
const char* func[64];
uint32_t fsz[64];
uint32_t ssz[64];
uint32_t spaceNeeded = 4; // cnt
uint8_t cnt;
uint16_t spaceNeeded = sizeof( cnt );
uint32_t cnt;
for( cnt=0; cnt<depth; cnt++ )
{
if( lua_getstack( L, cnt+1, dbg+cnt ) == 0 ) break;
@@ -163,29 +162,31 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
ssz[cnt] = uint32_t( strlen( dbg[cnt].source ) );
spaceNeeded += fsz[cnt] + ssz[cnt];
}
spaceNeeded += cnt * ( 4 + 2 + 2 ); // source line, function string length, source string length
spaceNeeded += cnt * ( 4 + 4 + 4 ); // source line, function string length, source string length
auto ptr = (char*)tracy_malloc( spaceNeeded + 2 );
auto ptr = (char*)tracy_malloc( spaceNeeded + 4 );
auto dst = ptr;
memcpy( dst, &spaceNeeded, 2 ); dst += 2;
memcpy( dst, &cnt, 1 ); dst++;
for( uint8_t i=0; i<cnt; i++ )
memcpy( dst, &spaceNeeded, 4 ); dst += 4;
memcpy( dst, &cnt, 4 ); dst += 4;
for( uint32_t i=0; i<cnt; i++ )
{
const uint32_t line = dbg[i].currentline;
memcpy( dst, &line, 4 ); dst += 4;
assert( fsz[i] <= std::numeric_limits<uint16_t>::max() );
memcpy( dst, fsz+i, 2 ); dst += 2;
memcpy( dst, fsz+i, 4 ); dst += 4;
memcpy( dst, func[i], fsz[i] ); dst += fsz[i];
assert( ssz[i] <= std::numeric_limits<uint16_t>::max() );
memcpy( dst, ssz+i, 2 ); dst += 2;
memcpy( dst, ssz+i, 4 ); dst += 4;
memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i];
}
assert( dst - ptr == spaceNeeded + 2 );
assert( dst - ptr == spaceNeeded + 4 );
TracyQueuePrepare( QueueType::CallstackAlloc );
MemWrite( &item->callstackAllocFat.ptr, (uint64_t)ptr );
MemWrite( &item->callstackAllocFat.nativePtr, (uint64_t)Callstack( depth ) );
TracyQueueCommit( callstackAllocFatThread );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::CallstackAlloc );
MemWrite( &item->callstackAlloc.ptr, (uint64_t)ptr );
MemWrite( &item->callstackAlloc.nativePtr, (uint64_t)Callstack( depth ) );
tail.store( magic + 1, std::memory_order_release );
}
static inline int LuaZoneBeginS( lua_State* L )
@@ -197,6 +198,48 @@ static inline int LuaZoneBeginS( lua_State* L )
if( !GetLuaZoneState().active ) return 0;
#endif
const uint32_t color = Color::DeepSkyBlue3;
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
const uint32_t line = dbg.currentline;
const auto func = dbg.name ? dbg.name : dbg.short_src;
const auto fsz = strlen( func );
const auto ssz = strlen( dbg.source );
// Data layout:
// 4b payload size
// 4b color
// 4b source line
// fsz function name
// 1b null terminator
// ssz source file name
// 1b null terminator
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 );
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 );
memcpy( ptr + 4, &color, 4 );
memcpy( ptr + 8, &line, 4 );
memcpy( ptr + 12, func, fsz+1 );
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
#ifdef TRACY_CALLSTACK
const uint32_t depth = TRACY_CALLSTACK;
#else
@@ -204,16 +247,6 @@ static inline int LuaZoneBeginS( lua_State* L )
#endif
SendLuaCallstack( L, depth );
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
return 0;
}
@@ -226,6 +259,52 @@ static inline int LuaZoneBeginNS( lua_State* L )
if( !GetLuaZoneState().active ) return 0;
#endif
const uint32_t color = Color::DeepSkyBlue3;
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
const uint32_t line = dbg.currentline;
const auto func = dbg.name ? dbg.name : dbg.short_src;
size_t nsz;
const auto name = lua_tolstring( L, 1, &nsz );
const auto fsz = strlen( func );
const auto ssz = strlen( dbg.source );
// Data layout:
// 4b payload size
// 4b color
// 4b source line
// fsz function name
// 1b null terminator
// ssz source file name
// 1b null terminator
// nsz zone name
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 + nsz );
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 );
memcpy( ptr + 4, &color, 4 );
memcpy( ptr + 8, &line, 4 );
memcpy( ptr + 12, func, fsz+1 );
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nsz );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
#ifdef TRACY_CALLSTACK
const uint32_t depth = TRACY_CALLSTACK;
#else
@@ -233,18 +312,6 @@ static inline int LuaZoneBeginNS( lua_State* L )
#endif
SendLuaCallstack( L, depth );
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
size_t nsz;
const auto name = lua_tolstring( L, 1, &nsz );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
return 0;
}
#endif
@@ -261,15 +328,47 @@ static inline int LuaZoneBegin( lua_State* L )
if( !GetLuaZoneState().active ) return 0;
#endif
const uint32_t color = Color::DeepSkyBlue3;
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
const uint32_t line = dbg.currentline;
const auto func = dbg.name ? dbg.name : dbg.short_src;
const auto fsz = strlen( func );
const auto ssz = strlen( dbg.source );
// Data layout:
// 4b payload size
// 4b color
// 4b source line
// fsz function name
// 1b null terminator
// ssz source file name
// 1b null terminator
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 );
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 );
memcpy( ptr + 4, &color, 4 );
memcpy( ptr + 8, &line, 4 );
memcpy( ptr + 12, func, fsz+1 );
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
#endif
}
@@ -286,17 +385,51 @@ static inline int LuaZoneBeginN( lua_State* L )
if( !GetLuaZoneState().active ) return 0;
#endif
const uint32_t color = Color::DeepSkyBlue3;
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
const uint32_t line = dbg.currentline;
const auto func = dbg.name ? dbg.name : dbg.short_src;
size_t nsz;
const auto name = lua_tolstring( L, 1, &nsz );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
const auto fsz = strlen( func );
const auto ssz = strlen( dbg.source );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
// Data layout:
// 4b payload size
// 4b color
// 4b source line
// fsz function name
// 1b null terminator
// ssz source file name
// 1b null terminator
// nsz zone name
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 + nsz );
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 );
memcpy( ptr + 4, &color, 4 );
memcpy( ptr + 8, &line, 4 );
memcpy( ptr + 12, func, fsz+1 );
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nsz );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
#endif
}
@@ -314,9 +447,19 @@ static inline int LuaZoneEnd( lua_State* L )
}
#endif
TracyQueuePrepare( QueueType::ZoneEnd );
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
TracyQueueCommit( zoneEndThread );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneEnd.time, Profiler::GetTime( item->zoneEnd.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneEnd.cpu, cpu );
#endif
tail.store( magic + 1, std::memory_order_release );
return 0;
}
@@ -333,15 +476,17 @@ static inline int LuaZoneText( lua_State* L )
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
assert( size < std::numeric_limits<uint16_t>::max() );
auto ptr = (char*)tracy_malloc( size );
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
TracyQueuePrepare( QueueType::ZoneText );
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyQueueCommit( zoneTextFatThread );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneText );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
}
@@ -358,15 +503,17 @@ static inline int LuaZoneName( lua_State* L )
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
assert( size < std::numeric_limits<uint16_t>::max() );
auto ptr = (char*)tracy_malloc( size );
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
TracyQueuePrepare( QueueType::ZoneName );
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyQueueCommit( zoneTextFatThread );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneName );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
}
@@ -378,16 +525,18 @@ static inline int LuaMessage( lua_State* L )
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
assert( size < std::numeric_limits<uint16_t>::max() );
auto ptr = (char*)tracy_malloc( size );
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
TracyQueuePrepare( QueueType::Message );
MemWrite( &item->messageFat.time, Profiler::GetTime() );
MemWrite( &item->messageFat.text, (uint64_t)ptr );
MemWrite( &item->messageFat.size, (uint16_t)size );
TracyQueueCommit( messageFatThread );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::Message );
MemWrite( &item->message.time, Profiler::GetTime() );
MemWrite( &item->message.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
}

View File

@@ -1,414 +0,0 @@
#ifndef __TRACYOPENCL_HPP__
#define __TRACYOPENCL_HPP__
#if !defined TRACY_ENABLE
#define TracyCLContext(c, x) nullptr
#define TracyCLDestroy(c)
#define TracyCLContextName(c, x, y)
#define TracyCLNamedZone(c, x, y, z)
#define TracyCLNamedZoneC(c, x, y, z, w)
#define TracyCLZone(c, x)
#define TracyCLZoneC(c, x, y)
#define TracyCLZoneTransient(c,x,y,z)
#define TracyCLNamedZoneS(c, x, y, z, w)
#define TracyCLNamedZoneCS(c, x, y, z, w, v)
#define TracyCLZoneS(c, x, y)
#define TracyCLZoneCS(c, x, y, z)
#define TracyCLZoneTransientS(c,x,y,z,w)
#define TracyCLNamedZoneSetEvent(x, e)
#define TracyCLZoneSetEvent(e)
#define TracyCLCollect(c)
namespace tracy
{
class OpenCLCtxScope {};
}
using TracyCLCtx = void*;
#else
#include <CL/cl.h>
#include <atomic>
#include <cassert>
#include <sstream>
#include "Tracy.hpp"
#include "client/TracyCallstack.hpp"
#include "client/TracyProfiler.hpp"
#include "common/TracyAlloc.hpp"
#define TRACY_CL_TO_STRING_INDIRECT(T) #T
#define TRACY_CL_TO_STRING(T) TRACY_CL_TO_STRING_INDIRECT(T)
#define TRACY_CL_ASSERT(p) if(!(p)) { \
TracyMessageL( "TRACY_CL_ASSERT failed on " __FILE__ ":" TRACY_CL_TO_STRING(__LINE__) ); \
assert(false && "TRACY_CL_ASSERT failed"); \
}
#define TRACY_CL_CHECK_ERROR(err) if(err != CL_SUCCESS) { \
std::ostringstream oss; \
oss << "TRACY_CL_CHECK_ERROR failed on " << __FILE__ << ":" << __LINE__ \
<< ": error code " << err; \
auto msg = oss.str(); \
TracyMessage(msg.data(), msg.size()); \
assert(false && "TRACY_CL_CHECK_ERROR failed"); \
}
namespace tracy {
enum class EventPhase : uint8_t
{
Begin,
End
};
struct EventInfo
{
cl_event event;
EventPhase phase;
};
class OpenCLCtx
{
public:
enum { QueryCount = 64 * 1024 };
OpenCLCtx(cl_context context, cl_device_id device)
: m_contextId(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed))
, m_head(0)
, m_tail(0)
{
int64_t tcpu, tgpu;
TRACY_CL_ASSERT(m_contextId != 255);
cl_int err = CL_SUCCESS;
cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
TRACY_CL_CHECK_ERROR(err)
uint32_t dummyValue = 42;
cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err);
TRACY_CL_CHECK_ERROR(err)
cl_event writeBufferEvent;
TRACY_CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent));
TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &writeBufferEvent));
tcpu = Profiler::GetTime();
cl_int eventStatus;
TRACY_CL_CHECK_ERROR(clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr));
TRACY_CL_ASSERT(eventStatus == CL_COMPLETE);
TRACY_CL_CHECK_ERROR(clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr));
TRACY_CL_CHECK_ERROR(clReleaseEvent(writeBufferEvent));
TRACY_CL_CHECK_ERROR(clReleaseMemObject(dummyBuffer));
TRACY_CL_CHECK_ERROR(clReleaseCommandQueue(queue));
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuNewContext);
MemWrite(&item->gpuNewContext.cpuTime, tcpu);
MemWrite(&item->gpuNewContext.gpuTime, tgpu);
memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread));
MemWrite(&item->gpuNewContext.period, 1.0f);
MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL);
MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId);
MemWrite(&item->gpuNewContext.flags, (uint8_t)0);
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem(*item);
#endif
Profiler::QueueSerialFinish();
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, (uint8_t)m_contextId );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect()
{
ZoneScopedC(Color::Red4);
if (m_tail == m_head) return;
#ifdef TRACY_ON_DEMAND
if (!GetProfiler().IsConnected())
{
m_head = m_tail = 0;
}
#endif
for (; m_tail != m_head; m_tail = (m_tail + 1) % QueryCount)
{
EventInfo eventInfo = GetQuery(m_tail);
cl_int eventStatus;
cl_int err = clGetEventInfo(eventInfo.event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr);
if (err != CL_SUCCESS)
{
std::ostringstream oss;
oss << "clGetEventInfo falied with error code " << err << ", on event " << eventInfo.event << ", skipping...";
auto msg = oss.str();
TracyMessage(msg.data(), msg.size());
if (eventInfo.event == nullptr) {
TracyMessageL("A TracyCLZone must be paird with a TracyCLZoneSetEvent, check your code!");
}
assert(false && "clGetEventInfo failed, maybe a TracyCLZone is not paired with TracyCLZoneSetEvent");
continue;
}
if (eventStatus != CL_COMPLETE) return;
cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin)
? CL_PROFILING_COMMAND_START
: CL_PROFILING_COMMAND_END;
cl_ulong eventTimeStamp = 0;
err = clGetEventProfilingInfo(eventInfo.event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr);
if (err == CL_PROFILING_INFO_NOT_AVAILABLE)
{
TracyMessageL("command queue is not created with CL_QUEUE_PROFILING_ENABLE flag, check your code!");
assert(false && "command queue is not created with CL_QUEUE_PROFILING_ENABLE flag");
}
else
TRACY_CL_CHECK_ERROR(err);
TRACY_CL_ASSERT(eventTimeStamp != 0);
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuTime);
MemWrite(&item->gpuTime.gpuTime, (int64_t)eventTimeStamp);
MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail);
MemWrite(&item->gpuTime.context, m_contextId);
Profiler::QueueSerialFinish();
if (eventInfo.phase == EventPhase::End)
{
// Done with the event, so release it
TRACY_CL_CHECK_ERROR(clReleaseEvent(eventInfo.event));
}
}
}
tracy_force_inline uint8_t GetId() const
{
return m_contextId;
}
tracy_force_inline unsigned int NextQueryId(EventInfo eventInfo)
{
const auto id = m_head;
m_head = (m_head + 1) % QueryCount;
TRACY_CL_ASSERT(m_head != m_tail);
m_query[id] = eventInfo;
return id;
}
tracy_force_inline EventInfo& GetQuery(unsigned int id)
{
TRACY_CL_ASSERT(id < QueryCount);
return m_query[id];
}
private:
unsigned int m_contextId;
EventInfo m_query[QueryCount];
unsigned int m_head; // index at which a new event should be inserted
unsigned int m_tail; // oldest event
};
class OpenCLCtxScope {
public:
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active&& GetProfiler().IsConnected())
#else
: m_active(is_active)
#endif
, m_ctx(ctx)
, m_event(nullptr)
{
if (!m_active) return;
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active&& GetProfiler().IsConnected())
#else
: m_active(is_active)
#endif
, m_ctx(ctx)
, m_event(nullptr)
{
if (!m_active) return;
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
GetProfiler().SendCallstack(depth);
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active && GetProfiler().IsConnected())
#else
: m_active(is_active)
#endif
, m_ctx(ctx)
, m_event(nullptr)
{
if (!m_active) return;
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, srcloc);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active)
#ifdef TRACY_ON_DEMAND
: m_active(is_active && GetProfiler().IsConnected())
#else
: m_active(is_active)
#endif
, m_ctx(ctx)
, m_event(nullptr)
{
if (!m_active) return;
m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin });
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial);
MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneBegin.srcloc, srcloc);
MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle());
MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId);
MemWrite(&item->gpuZoneBegin.context, ctx->GetId());
Profiler::QueueSerialFinish();
}
tracy_force_inline void SetEvent(cl_event event)
{
if (!m_active) return;
m_event = event;
TRACY_CL_CHECK_ERROR(clRetainEvent(m_event));
m_ctx->GetQuery(m_beginQueryId).event = m_event;
}
tracy_force_inline ~OpenCLCtxScope()
{
if (!m_active) return;
const auto queryId = m_ctx->NextQueryId(EventInfo{ m_event, EventPhase::End });
auto item = Profiler::QueueSerial();
MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial);
MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime());
MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle());
MemWrite(&item->gpuZoneEnd.queryId, (uint16_t)queryId);
MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId());
Profiler::QueueSerialFinish();
}
const bool m_active;
OpenCLCtx* m_ctx;
cl_event m_event;
unsigned int m_beginQueryId;
};
static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device)
{
auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx));
new (ctx) OpenCLCtx(context, device);
return ctx;
}
static inline void DestroyCLContext(OpenCLCtx* ctx)
{
ctx->~OpenCLCtx();
tracy_free(ctx);
}
} // namespace tracy
using TracyCLCtx = tracy::OpenCLCtx*;
#define TracyCLContext(context, device) tracy::CreateCLContext(context, device);
#define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx);
#define TracyCLContextName(context, name, size) ctx->Name(name, size);
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyCLZone(ctx, name) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, TRACY_CALLSTACK, true)
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, TRACY_CALLSTACK, true)
# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
#else
# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active);
# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), active);
# define TracyCLZone(ctx, name) TracyCLNamedZone(ctx, __tracy_gpu_zone, name, true)
# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneC(ctx, __tracy_gpu_zone, name, color, true )
# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
#endif
#ifdef TRACY_HAS_CALLSTACK
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active);
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__){ name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active);
# define TracyCLZoneS(ctx, name, depth) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, depth, true)
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, depth, true)
# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) tracy::OpenCLCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
#else
# define TracyCLNamedZoneS(ctx, varname, name, depth, active) TracyCLNamedZone(ctx, varname, name, active)
# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) TracyCLNamedZoneC(ctx, varname, name, color, active)
# define TracyCLZoneS(ctx, name, depth) TracyCLZone(ctx, name)
# define TracyCLZoneCS(ctx, name, color, depth) TracyCLZoneC(ctx, name, color)
# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) TracyCLZoneTransient( ctx, varname, name, active )
#endif
#define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event)
#define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event)
#define TracyCLCollect(ctx) ctx->Collect()
#endif
#endif

View File

@@ -1,22 +1,21 @@
#ifndef __TRACYOPENGL_HPP__
#define __TRACYOPENGL_HPP__
// Include this file after you include OpenGL 3.2 headers.
#if !defined TRACY_ENABLE || defined __APPLE__
#define TracyGpuContext
#define TracyGpuContextName(x,y)
#define TracyGpuNamedZone(x,y,z)
#define TracyGpuNamedZoneC(x,y,z,w)
#define TracyGpuNamedZone(x,y)
#define TracyGpuNamedZoneC(x,y,z)
#define TracyGpuZone(x)
#define TracyGpuZoneC(x,y)
#define TracyGpuZoneTransient(x,y,z)
#define TracyGpuCollect
#define TracyGpuNamedZoneS(x,y,z,w)
#define TracyGpuNamedZoneCS(x,y,z,w,a)
#define TracyGpuNamedZoneS(x,y,z)
#define TracyGpuNamedZoneCS(x,y,z,w)
#define TracyGpuZoneS(x,y)
#define TracyGpuZoneCS(x,y,z)
#define TracyGpuZoneTransientS(x,y,z,w)
namespace tracy
{
@@ -24,8 +23,8 @@ struct SourceLocationData;
class GpuCtxScope
{
public:
GpuCtxScope( const SourceLocationData*, bool ) {}
GpuCtxScope( const SourceLocationData*, int, bool ) {}
GpuCtxScope( const SourceLocationData* ) {}
GpuCtxScope( const SourceLocationData*, int depth ) {}
};
}
@@ -50,34 +49,29 @@ public:
#endif
#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
#define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
# define TracyGpuNamedZone( varname, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK );
# define TracyGpuNamedZoneC( varname, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK );
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK )
#else
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
# define TracyGpuNamedZone( varname, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) );
# define TracyGpuNamedZoneC( varname, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) );
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color )
#endif
#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
#ifdef TRACY_HAS_CALLSTACK
# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
# define TracyGpuNamedZoneS( varname, name, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth );
# define TracyGpuNamedZoneCS( varname, name, color, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth );
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth )
#else
# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active )
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active )
# define TracyGpuNamedZoneS( varname, name, depth ) TracyGpuNamedZone( varname, name )
# define TracyGpuNamedZoneCS( varname, name, color, depth ) TracyGpuNamedZoneC( varname, name, color )
# define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active )
#endif
namespace tracy
@@ -107,36 +101,24 @@ public:
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
const float period = 1.f;
Magic magic;
const auto thread = GetThreadHandle();
TracyLfqPrepare( QueueType::GpuNewContext );
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
MemWrite( &item->gpuNewContext.thread, thread );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
TracyLfqPrepare( QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_context );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
tail.store( magic + 1, std::memory_order_release );
}
void Collect()
@@ -153,6 +135,10 @@ public:
}
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
while( m_tail != m_head )
{
GLint available;
@@ -162,11 +148,12 @@ public:
uint64_t time;
glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
TracyLfqPrepare( QueueType::GpuTime );
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuTime );
MemWrite( &item->gpuTime.gpuTime, (int64_t)time );
MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail );
MemWrite( &item->gpuTime.context, m_context );
TracyLfqCommit;
tail.store( magic + 1, std::memory_order_release );
m_tail = ( m_tail + 1 ) % QueryCount;
}
@@ -201,121 +188,80 @@ private:
class GpuCtxScope
{
public:
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, bool is_active )
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
: m_active( GetProfiler().IsConnected() )
#endif
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
TracyLfqPrepare( QueueType::GpuZoneBegin );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBegin );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active )
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
: m_active( GetProfiler().IsConnected() )
#endif
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
#ifdef TRACY_FIBERS
TracyLfqPrepare( QueueType::GpuZoneBegin );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
#else
Magic magic;
const auto thread = GetThreadHandle();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, thread );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
tail.store( magic + 1, std::memory_order_release );
GetProfiler().SendCallstack( depth );
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
#endif
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
#ifdef TRACY_FIBERS
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
#else
GetProfiler().SendCallstack( depth );
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
#endif
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline ~GpuCtxScope()
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
TracyLfqPrepare( QueueType::GpuZoneEnd );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneEnd );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() );
TracyLfqCommit;
tail.store( magic + 1, std::memory_order_release );
}
private:
#ifdef TRACY_ON_DEMAND
const bool m_active;
#endif
};
}

View File

@@ -4,21 +4,17 @@
#if !defined TRACY_ENABLE
#define TracyVkContext(x,y,z,w) nullptr
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
#define TracyVkDestroy(x)
#define TracyVkContextName(c,x,y)
#define TracyVkNamedZone(c,x,y,z,w)
#define TracyVkNamedZoneC(c,x,y,z,w,a)
#define TracyVkNamedZone(c,x,y,z)
#define TracyVkNamedZoneC(c,x,y,z,w)
#define TracyVkZone(c,x,y)
#define TracyVkZoneC(c,x,y,z)
#define TracyVkZoneTransient(c,x,y,z,w)
#define TracyVkCollect(c,x)
#define TracyVkNamedZoneS(c,x,y,z,w,a)
#define TracyVkNamedZoneCS(c,x,y,z,w,v,a)
#define TracyVkNamedZoneS(c,x,y,z,w)
#define TracyVkNamedZoneCS(c,x,y,z,w,v)
#define TracyVkZoneS(c,x,y,z)
#define TracyVkZoneCS(c,x,y,z,w)
#define TracyVkZoneTransientS(c,x,y,z,w,a)
namespace tracy
{
@@ -29,12 +25,9 @@ using TracyVkCtx = void*;
#else
#if !defined VK_NULL_HANDLE
# error "You must include Vulkan headers before including TracyVulkan.hpp"
#endif
#include <assert.h>
#include <stdlib.h>
#include <vulkan/vulkan.h>
#include "Tracy.hpp"
#include "client/TracyProfiler.hpp"
#include "client/TracyCallstack.hpp"
@@ -49,41 +42,16 @@ class VkCtx
enum { QueryCount = 64 * 1024 };
public:
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT )
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
: m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
, m_oldCnt( 0 )
, m_queryCount( QueryCount )
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT )
{
assert( m_context != 255 );
if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT )
{
uint32_t num;
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr );
if( num > 4 ) num = 4;
VkTimeDomainEXT data[4];
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data );
VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1;
#if defined _WIN32
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
#endif
for( uint32_t i=0; i<num; i++ )
{
if( data[i] == supportedDomain )
{
m_timeDomain = data[i];
break;
}
}
}
VkPhysicalDeviceProperties prop;
vkGetPhysicalDeviceProperties( physdev, &prop );
const float period = prop.limits.timestampPeriod;
@@ -113,73 +81,39 @@ public:
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
int64_t tcpu, tgpu;
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
{
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
tcpu = Profiler::GetTime();
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
int64_t tcpu = Profiler::GetTime();
int64_t tgpu;
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
}
else
{
enum { NumProbes = 32 };
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation[NumProbes];
for( int i=0; i<NumProbes; i++ )
{
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
}
uint64_t minDeviation = deviation[0];
for( int i=1; i<NumProbes; i++ )
{
if( minDeviation > deviation[i] )
{
minDeviation = deviation[i];
}
}
m_deviation = minDeviation * 3 / 2;
#if defined _WIN32
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
#endif
Calibrate( device, m_prevCalibration, tgpu );
tcpu = Profiler::GetTime();
}
uint8_t flags = 0;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
auto item = Profiler::QueueSerial();
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
}
@@ -190,22 +124,6 @@ public:
vkDestroyQueryPool( m_device, m_query, nullptr );
}
void Name( const char* name, uint16_t len )
{
auto ptr = (char*)tracy_malloc( len );
memcpy( ptr, name, len );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuContextName );
MemWrite( &item->gpuContextNameFat.context, m_context );
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
MemWrite( &item->gpuContextNameFat.size, len );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
void Collect( VkCommandBuffer cmdbuf )
{
ZoneScopedC( Color::Red4 );
@@ -216,9 +134,7 @@ public:
if( !GetProfiler().IsConnected() )
{
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
m_head = m_tail = m_oldCnt = 0;
int64_t tgpu;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
m_head = m_tail = 0;
return;
}
#endif
@@ -240,33 +156,18 @@ public:
return;
}
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
for( unsigned int idx=0; idx<cnt; idx++ )
{
auto item = Profiler::QueueSerial();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuTime );
MemWrite( &item->gpuTime.gpuTime, m_res[idx] );
MemWrite( &item->gpuTime.queryId, uint16_t( m_tail + idx ) );
MemWrite( &item->gpuTime.context, m_context );
Profiler::QueueSerialFinish();
}
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
{
int64_t tgpu, tcpu;
Calibrate( m_device, tcpu, tgpu );
const auto refCpu = Profiler::GetTime();
const auto delta = tcpu - m_prevCalibration;
if( delta > 0 )
{
m_prevCalibration = tcpu;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
MemWrite( &item->gpuCalibration.cpuDelta, delta );
MemWrite( &item->gpuCalibration.context, m_context );
Profiler::QueueSerialFinish();
}
tail.store( magic + 1, std::memory_order_release );
}
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
@@ -289,38 +190,8 @@ private:
return m_context;
}
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
{
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation;
do
{
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
}
while( deviation > m_deviation );
#if defined _WIN32
tGpu = ts[0];
tCpu = ts[1] * m_qpcToNs;
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
tGpu = ts[0];
tCpu = ts[1];
#else
assert( false );
#endif
}
VkDevice m_device;
VkQueryPool m_query;
VkTimeDomainEXT m_timeDomain;
uint64_t m_deviation;
int64_t m_qpcToNs;
int64_t m_prevCalibration;
uint8_t m_context;
unsigned int m_head;
@@ -329,138 +200,100 @@ private:
unsigned int m_queryCount;
int64_t* m_res;
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
};
class VkCtxScope
{
public:
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active )
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf )
: m_cmdbuf( cmdbuf )
, m_ctx( ctx )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
, m_active( GetProfiler().IsConnected() )
#endif
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
#endif
const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
Magic magic;
const auto thread = GetThreadHandle();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBegin );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.thread, thread );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active )
tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth )
: m_cmdbuf( cmdbuf )
, m_ctx( ctx )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
, m_active( GetProfiler().IsConnected() )
#endif
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
#endif
const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
Magic magic;
const auto thread = GetThreadHandle();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.thread, thread );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tail.store( magic + 1, std::memory_order_release );
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
}
tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int depth, bool is_active )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
m_cmdbuf = cmdbuf;
m_ctx = ctx;
const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
GetProfiler().SendCallstack( depth );
}
tracy_force_inline ~VkCtxScope()
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
const auto queryId = m_ctx->NextQueryId();
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId );
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_ctx->m_query, queryId );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneEnd );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() );
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
}
private:
const bool m_active;
VkCommandBuffer m_cmdbuf;
VkCtx* m_ctx;
#ifdef TRACY_ON_DEMAND
const bool m_active;
#endif
};
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
{
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
new(ctx) VkCtx( physdev, device, queue, cmdbuf );
return ctx;
}
@@ -474,37 +307,31 @@ static inline void DestroyVkContext( VkCtx* ctx )
using TracyVkCtx = tracy::VkCtx*;
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf );
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true )
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active )
# define TracyVkNamedZone( ctx, varname, cmdbuf, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK )
#else
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true )
# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), cmdbuf, active );
# define TracyVkNamedZone( ctx, varname, cmdbuf, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color )
#endif
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
#ifdef TRACY_HAS_CALLSTACK
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true )
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true )
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), cmdbuf, depth, active );
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth );
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth );
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth )
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth )
#else
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active )
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active )
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth ) TracyVkNamedZone( ctx, varname, cmdbuf, name )
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color )
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name )
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color )
# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active )
#endif
#endif

View File

@@ -1,4 +1,4 @@
all: release
all: debug
debug:
@+make -f debug.mk all
@@ -9,8 +9,4 @@ release:
clean:
@+make -f build.mk clean
db: clean
@bear -- $(MAKE) -f debug.mk all
@mv -f compile_commands.json ../../../
.PHONY: all clean debug release db
.PHONY: all clean debug release

View File

@@ -1,12 +1,60 @@
CFLAGS +=
CXXFLAGS := $(CFLAGS) -std=gnu++17
DEFINES += -DTRACY_NO_STATISTICS
INCLUDES := $(shell pkg-config --cflags capstone)
LIBS += $(shell pkg-config --libs capstone) -lpthread
INCLUDES :=
LIBS := -lpthread
PROJECT := capture
IMAGE := $(PROJECT)-$(BUILD)
FILTER := ../../../getopt/getopt.c
include ../../../common/src-from-vcxproj.mk
FILTER :=
include ../../../common/unix.mk
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
SRC := $(filter-out $(FILTER),$(BASE))
SRC2 := $(filter-out $(FILTER),$(BASE2))
TBB := $(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?)
ifeq ($(TBB),0)
LIBS += -ltbb
endif
OBJDIRBASE := obj/$(BUILD)
OBJDIR := $(OBJDIRBASE)/o/o/o
OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
all: $(IMAGE)
$(OBJDIR)/%.o: %.cpp
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.cpp
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
$(OBJDIR)/%.o: %.c
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.c
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
$(IMAGE): $(OBJ) $(OBJ2)
$(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -o $@
ifneq "$(MAKECMDGOALS)" "clean"
-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) %(addprefix $(OBJDIR)/,$(SRC2:.c=.d))
endif
clean:
rm -rf $(OBJDIRBASE) $(IMAGE)*
.PHONY: clean all

View File

@@ -1,6 +1,11 @@
ARCH := $(shell uname -m)
CFLAGS := -g3 -Wall
DEFINES := -DDEBUG
BUILD := debug
include ../../../common/unix-debug.mk
ifeq ($(ARCH),x86_64)
CFLAGS += -msse4.1
endif
include build.mk

View File

@@ -1,9 +1,11 @@
CFLAGS := -O3
ifndef TRACY_NO_LTO
CFLAGS += -flto
endif
ARCH := $(shell uname -m)
CFLAGS := -O3 -s -fomit-frame-pointer
DEFINES := -DNDEBUG
BUILD := release
include ../../../common/unix-release.mk
ifeq ($(ARCH),x86_64)
CFLAGS += -msse4.1
endif
include build.mk

View File

@@ -1,7 +1,7 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30907.101
# Visual Studio 15
VisualStudioVersion = 15.0.27428.2002
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "capture", "capture.vcxproj", "{447D58BF-94CD-4469-BB90-549C05D03E00}"
EndProject

View File

@@ -1,6 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
@@ -15,19 +23,31 @@
<ProjectGuid>{447D58BF-94CD-4469-BB90-549C05D03E00}</ProjectGuid>
<RootNamespace>capture</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
<VcpkgTriplet>x64-windows-static</VcpkgTriplet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<PlatformToolset>v142</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
@@ -36,6 +56,12 @@
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
@@ -44,9 +70,14 @@
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<PropertyGroup Label="Vcpkg">
<VcpkgEnableManifest>true</VcpkgEnableManifest>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
@@ -57,12 +88,24 @@
<PreprocessorDefinitions>TRACY_NO_STATISTICS;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<LanguageStandard>stdcpplatest</LanguageStandard>
<AdditionalIncludeDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include;$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include\capstone;$(VcpkgManifestRoot)\vcpkg_installed\$(VcpkgTriplet)\$(VcpkgTriplet)\include\capstone;$(VcpkgRoot)\installed\$(VcpkgTriplet)\include\capstone</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\debug\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@@ -77,60 +120,24 @@
<PreprocessorDefinitions>TRACY_NO_STATISTICS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<LanguageStandard>stdcpplatest</LanguageStandard>
<AdditionalIncludeDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include;$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include\capstone;$(VcpkgManifestRoot)\vcpkg_installed\$(VcpkgTriplet)\$(VcpkgTriplet)\include\capstone;$(VcpkgRoot)\installed\$(VcpkgTriplet)\include\capstone</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\..\..\common\TracySocket.cpp" />
<ClCompile Include="..\..\..\common\TracyStackFrames.cpp" />
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp" />
<ClCompile Include="..\..\..\getopt\getopt.c" />
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
<ClCompile Include="..\..\..\server\TracyMmap.cpp" />
<ClCompile Include="..\..\..\server\TracyPrint.cpp" />
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp" />
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp" />
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp" />
<ClCompile Include="..\..\..\server\TracyWorker.cpp" />
<ClCompile Include="..\..\..\zstd\common\debug.c" />
<ClCompile Include="..\..\..\zstd\common\entropy_common.c" />
<ClCompile Include="..\..\..\zstd\common\error_private.c" />
<ClCompile Include="..\..\..\zstd\common\fse_decompress.c" />
<ClCompile Include="..\..\..\zstd\common\pool.c" />
<ClCompile Include="..\..\..\zstd\common\threading.c" />
<ClCompile Include="..\..\..\zstd\common\xxhash.c" />
<ClCompile Include="..\..\..\zstd\common\zstd_common.c" />
<ClCompile Include="..\..\..\zstd\compress\fse_compress.c" />
<ClCompile Include="..\..\..\zstd\compress\hist.c" />
<ClCompile Include="..\..\..\zstd\compress\huf_compress.c" />
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_compress.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_literals.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_sequences.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_superblock.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_double_fast.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_fast.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_lazy.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_ldm.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_opt.c" />
<ClCompile Include="..\..\..\zstd\decompress\huf_decompress.c" />
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c" />
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c" />
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
<ClCompile Include="..\..\src\capture.cpp" />
<ClCompile Include="..\..\src\getopt.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
@@ -140,65 +147,22 @@
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
<ClInclude Include="..\..\..\common\TracyStackFrames.hpp" />
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
<ClInclude Include="..\..\..\common\tracy_benaphore.h" />
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp" />
<ClInclude Include="..\..\..\getopt\getopt.h" />
<ClInclude Include="..\..\..\common\tracy_sema.h" />
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
<ClInclude Include="..\..\..\server\TracyFileRead.hpp" />
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp" />
<ClInclude Include="..\..\..\server\TracyMemory.hpp" />
<ClInclude Include="..\..\..\server\TracyMmap.hpp" />
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp" />
<ClInclude Include="..\..\..\server\TracyPrint.hpp" />
<ClInclude Include="..\..\..\server\TracySlab.hpp" />
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp" />
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp" />
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp" />
<ClInclude Include="..\..\..\server\TracyVector.hpp" />
<ClInclude Include="..\..\..\server\TracyWorker.hpp" />
<ClInclude Include="..\..\..\zstd\common\bitstream.h" />
<ClInclude Include="..\..\..\zstd\common\compiler.h" />
<ClInclude Include="..\..\..\zstd\common\cpu.h" />
<ClInclude Include="..\..\..\zstd\common\debug.h" />
<ClInclude Include="..\..\..\zstd\common\error_private.h" />
<ClInclude Include="..\..\..\zstd\common\fse.h" />
<ClInclude Include="..\..\..\zstd\common\huf.h" />
<ClInclude Include="..\..\..\zstd\common\mem.h" />
<ClInclude Include="..\..\..\zstd\common\pool.h" />
<ClInclude Include="..\..\..\zstd\common\portability_macros.h" />
<ClInclude Include="..\..\..\zstd\common\threading.h" />
<ClInclude Include="..\..\..\zstd\common\xxhash.h" />
<ClInclude Include="..\..\..\zstd\common\zstd_deps.h" />
<ClInclude Include="..\..\..\zstd\common\zstd_internal.h" />
<ClInclude Include="..\..\..\zstd\common\zstd_trace.h" />
<ClInclude Include="..\..\..\zstd\compress\clevels.h" />
<ClInclude Include="..\..\..\zstd\compress\hist.h" />
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_internal.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_literals.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_sequences.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_superblock.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_cwksp.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_double_fast.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_fast.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_lazy.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm_geartab.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_opt.h" />
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h" />
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h" />
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h" />
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h" />
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h" />
<ClInclude Include="..\..\..\zstd\zdict.h" />
<ClInclude Include="..\..\..\zstd\zstd.h" />
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S" />
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp" />
<ClInclude Include="..\..\src\getopt.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

View File

@@ -10,24 +10,6 @@
<Filter Include="common">
<UniqueIdentifier>{e39d3623-47cd-4752-8da9-3ea324f964c1}</UniqueIdentifier>
</Filter>
<Filter Include="getopt">
<UniqueIdentifier>{ee9737d2-69c7-44da-b9c7-539d18f9d4b4}</UniqueIdentifier>
</Filter>
<Filter Include="zstd">
<UniqueIdentifier>{f201463b-5e69-46fe-bdfc-1b5eed86c7f7}</UniqueIdentifier>
</Filter>
<Filter Include="zstd\common">
<UniqueIdentifier>{7e93ae33-6543-4bca-b05b-50818dbf24cc}</UniqueIdentifier>
</Filter>
<Filter Include="zstd\compress">
<UniqueIdentifier>{3b0c32f5-9efb-4503-9394-5ab95909fb1c}</UniqueIdentifier>
</Filter>
<Filter Include="zstd\decompress">
<UniqueIdentifier>{c1f99170-d904-4af1-8010-0a3ded5736c8}</UniqueIdentifier>
</Filter>
<Filter Include="zstd\dictBuilder">
<UniqueIdentifier>{456e6786-ea57-42b8-ae38-829cd2d918bd}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
@@ -48,117 +30,15 @@
<ClCompile Include="..\..\src\capture.cpp">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\..\src\getopt.c">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyPrint.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyMmap.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\getopt\getopt.c">
<Filter>getopt</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\debug.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\entropy_common.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\error_private.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\fse_decompress.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\pool.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\threading.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\xxhash.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\zstd_common.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\fse_compress.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\hist.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\huf_compress.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_compress.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_literals.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_sequences.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_superblock.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_double_fast.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_fast.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_lazy.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_ldm.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_opt.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\decompress\huf_decompress.c">
<Filter>zstd\decompress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c">
<Filter>zstd\decompress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c">
<Filter>zstd\decompress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c">
<Filter>zstd\decompress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c">
<Filter>zstd\dictBuilder</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c">
<Filter>zstd\dictBuilder</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c">
<Filter>zstd\dictBuilder</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c">
<Filter>zstd\dictBuilder</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\TracyStackFrames.cpp">
<Filter>common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
@@ -185,6 +65,9 @@
<ClInclude Include="..\..\..\common\TracySystem.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
<Filter>server</Filter>
</ClInclude>
@@ -209,151 +92,23 @@
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\src\getopt.h">
<Filter>src</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyAlign.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\tracy_benaphore.h">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\tracy_sema.h">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyPrint.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyFileRead.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyMmap.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\getopt\getopt.h">
<Filter>getopt</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_errors.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\bitstream.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\compiler.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\cpu.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\debug.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\error_private.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\fse.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\huf.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\mem.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\pool.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\threading.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\xxhash.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\zstd_deps.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\zstd_internal.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\zstd_trace.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\hist.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_internal.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_literals.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_sequences.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_superblock.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_cwksp.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_double_fast.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_fast.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_lazy.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm_geartab.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_opt.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h">
<Filter>zstd\decompress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h">
<Filter>zstd\decompress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h">
<Filter>zstd\decompress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h">
<Filter>zstd\dictBuilder</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h">
<Filter>zstd\dictBuilder</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zdict.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyStackFrames.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\portability_macros.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\clevels.h">
<Filter>zstd\compress</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S">
<Filter>zstd\decompress</Filter>
</None>
</ItemGroup>
</Project>

View File

@@ -1,96 +1,36 @@
#ifdef _WIN32
# include <windows.h>
# include <io.h>
#else
# include <unistd.h>
#endif
#include <atomic>
#include <chrono>
#include <inttypes.h>
#include <mutex>
#include <signal.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include "../../common/TracyProtocol.hpp"
#include "../../common/TracyStackFrames.hpp"
#include "../../server/TracyFileWrite.hpp"
#include "../../server/TracyMemory.hpp"
#include "../../server/TracyPrint.hpp"
#include "../../server/TracyWorker.hpp"
#include "getopt.h"
#ifdef _WIN32
# include "../../getopt/getopt.h"
#endif
// This atomic is written by a signal handler (SigInt). Traditionally that would
// have had to be `volatile sig_atomic_t`, and annoyingly, `bool` was
// technically not allowed there, even though in practice it would work.
// The good thing with C++11 atomics is that we can use atomic<bool> instead
// here and be on the actually supported path.
static std::atomic<bool> s_disconnect { false };
#ifndef _MSC_VER
struct sigaction oldsigint;
bool disconnect = false;
void SigInt( int )
{
// Relaxed order is closest to a traditional `volatile` write.
// We don't need stronger ordering since this signal handler doesn't do
// anything else that would need to be ordered relatively to this.
s_disconnect.store(true, std::memory_order_relaxed);
disconnect = true;
}
static bool s_isStdoutATerminal = false;
void InitIsStdoutATerminal() {
#ifdef _WIN32
s_isStdoutATerminal = _isatty( fileno( stdout ) );
#else
s_isStdoutATerminal = isatty( fileno( stdout ) );
#endif
}
bool IsStdoutATerminal() { return s_isStdoutATerminal; }
#define ANSI_RESET "\033[0m"
#define ANSI_BOLD "\033[1m"
#define ANSI_BLACK "\033[30m"
#define ANSI_RED "\033[31m"
#define ANSI_GREEN "\033[32m"
#define ANSI_YELLOW "\033[33m"
#define ANSI_MAGENTA "\033[35m"
#define ANSI_CYAN "\033[36m"
#define ANSI_ERASE_LINE "\033[2K"
// Like printf, but if stdout is a terminal, prepends the output with
// the given `ansiEscape` and appends ANSI_RESET.
void AnsiPrintf( const char* ansiEscape, const char* format, ... ) {
if( IsStdoutATerminal() )
{
// Prepend ansiEscape and append ANSI_RESET.
char buf[256];
va_list args;
va_start( args, format );
vsnprintf( buf, sizeof buf, format, args );
va_end( args );
printf( "%s%s" ANSI_RESET, ansiEscape, buf );
}
else
{
// Just a normal printf.
va_list args;
va_start( args, format );
vfprintf( stdout, format, args );
va_end( args );
}
}
[[noreturn]] void Usage()
void Usage()
{
printf( "Usage: capture -o output.tracy [-a address] [-p port] [-f] [-s seconds]\n" );
printf( "Usage: capture -a address -o output.tracy\n" );
exit( 1 );
}
@@ -104,16 +44,11 @@ int main( int argc, char** argv )
}
#endif
InitIsStdoutATerminal();
bool overwrite = false;
const char* address = "127.0.0.1";
const char* address = nullptr;
const char* output = nullptr;
int port = 8086;
int seconds = -1;
int c;
while( ( c = getopt( argc, argv, "a:o:p:fs:" ) ) != -1 )
while( ( c = getopt( argc, argv, "a:o:" ) ) != -1 )
{
switch( c )
{
@@ -123,15 +58,6 @@ int main( int argc, char** argv )
case 'o':
output = optarg;
break;
case 'p':
port = atoi( optarg );
break;
case 'f':
overwrite = true;
break;
case 's':
seconds = atoi (optarg);
break;
default:
Usage();
break;
@@ -140,25 +66,9 @@ int main( int argc, char** argv )
if( !address || !output ) Usage();
struct stat st;
if( stat( output, &st ) == 0 && !overwrite )
{
printf( "Output file %s already exists! Use -f to force overwrite.\n", output );
return 4;
}
FILE* test = fopen( output, "wb" );
if( !test )
{
printf( "Cannot open output file %s for writing!\n", output );
return 5;
}
fclose( test );
unlink( output );
printf( "Connecting to %s:%i...", address, port );
printf( "Connecting to %s...", address );
fflush( stdout );
tracy::Worker worker( address, port );
tracy::Worker worker( address );
while( !worker.IsConnected() )
{
const auto handshake = worker.GetHandshakeStatus();
@@ -181,10 +91,8 @@ int main( int argc, char** argv )
while( !worker.HasData() ) std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
printf( "\nQueue delay: %s\nTimer resolution: %s\n", tracy::TimeToString( worker.GetDelay() ), tracy::TimeToString( worker.GetResolution() ) );
#ifdef _WIN32
signal( SIGINT, SigInt );
#else
struct sigaction sigint, oldsigint;
#ifndef _MSC_VER
struct sigaction sigint;
memset( &sigint, 0, sizeof( sigint ) );
sigint.sa_handler = SigInt;
sigaction( SIGINT, &sigint, &oldsigint );
@@ -192,163 +100,52 @@ int main( int argc, char** argv )
auto& lock = worker.GetMbpsDataLock();
const auto t0 = std::chrono::high_resolution_clock::now();
while( worker.IsConnected() )
{
// Relaxed order is sufficient here because `s_disconnect` is only ever
// set by this thread or by the SigInt handler, and that handler does
// nothing else than storing `s_disconnect`.
if( s_disconnect.load( std::memory_order_relaxed ) )
#ifndef _MSC_VER
if( disconnect )
{
worker.Disconnect();
// Relaxed order is sufficient because only this thread ever reads
// this value.
s_disconnect.store(false, std::memory_order_relaxed );
break;
disconnect = false;
}
#endif
lock.lock();
const auto mbps = worker.GetMbpsData().back();
const auto compRatio = worker.GetCompRatio();
const auto netTotal = worker.GetDataTransferred();
lock.unlock();
// Output progress info only if destination is a TTY to avoid bloating
// log files (so this is not just about usage of ANSI color codes).
if( IsStdoutATerminal() )
if( mbps < 0.1f )
{
const char* unit = "Mbps";
float unitsPerMbps = 1.f;
if( mbps < 0.1f )
{
unit = "Kbps";
unitsPerMbps = 1000.f;
}
AnsiPrintf( ANSI_ERASE_LINE ANSI_CYAN ANSI_BOLD, "\r%7.2f %s", mbps * unitsPerMbps, unit );
printf( " /");
AnsiPrintf( ANSI_CYAN ANSI_BOLD, "%5.1f%%", compRatio * 100.f );
printf( " =");
AnsiPrintf( ANSI_YELLOW ANSI_BOLD, "%7.2f Mbps", mbps / compRatio );
printf( " | ");
AnsiPrintf( ANSI_YELLOW, "Tx: ");
AnsiPrintf( ANSI_GREEN, "%s", tracy::MemSizeToString( netTotal ) );
printf( " | ");
AnsiPrintf( ANSI_RED ANSI_BOLD, "%s", tracy::MemSizeToString( tracy::memUsage ) );
printf( " | ");
AnsiPrintf( ANSI_RED, "%s", tracy::TimeToString( worker.GetLastTime() ) );
fflush( stdout );
printf( "\33[2K\r\033[36;1m%7.2f Kbps", mbps * 1000.f );
}
else
{
printf( "\33[2K\r\033[36;1m%7.2f Mbps", mbps );
}
printf( " \033[0m /\033[36;1m%5.1f%% \033[0m=\033[33;1m%7.2f Mbps \033[0m| Mem: \033[31;1m%.2f MB\033[0m | \033[33mTime: %s\033[0m", compRatio * 100.f, mbps / compRatio, tracy::memUsage.load( std::memory_order_relaxed ) / ( 1024.f * 1024.f ), tracy::TimeToString( worker.GetLastTime() - worker.GetTimeBegin() ) );
fflush( stdout );
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
if( seconds != -1 )
{
const auto dur = std::chrono::high_resolution_clock::now() - t0;
if( std::chrono::duration_cast<std::chrono::seconds>(dur).count() >= seconds )
{
// Relaxed order is sufficient because only this thread ever reads
// this value.
s_disconnect.store(true, std::memory_order_relaxed );
}
}
}
const auto t1 = std::chrono::high_resolution_clock::now();
const auto& failure = worker.GetFailureType();
if( failure != tracy::Worker::Failure::None )
{
AnsiPrintf( ANSI_RED ANSI_BOLD, "\nInstrumentation failure: %s", tracy::Worker::GetFailureString( failure ) );
auto& fd = worker.GetFailureData();
if( !fd.message.empty() )
{
printf( "\nContext: %s", fd.message.c_str() );
}
if( fd.callstack != 0 )
{
AnsiPrintf( ANSI_BOLD, "\n%sFailure callstack:%s\n" );
auto& cs = worker.GetCallstack( fd.callstack );
int fidx = 0;
int bidx = 0;
for( auto& entry : cs )
{
auto frameData = worker.GetCallstackFrame( entry );
if( !frameData )
{
printf( "%3i. %p\n", fidx++, (void*)worker.GetCanonicalPointer( entry ) );
}
else
{
const auto fsz = frameData->size;
for( uint8_t f=0; f<fsz; f++ )
{
const auto& frame = frameData->data[f];
auto txt = worker.GetString( frame.name );
if( fidx == 0 && f != fsz-1 )
{
auto test = tracy::s_tracyStackFrames;
bool match = false;
do
{
if( strcmp( txt, *test ) == 0 )
{
match = true;
break;
}
}
while( *++test );
if( match ) continue;
}
bidx++;
if( f == fsz-1 )
{
printf( "%3i. ", fidx++ );
}
else
{
AnsiPrintf( ANSI_BLACK ANSI_BOLD, "inl. " );
}
AnsiPrintf( ANSI_CYAN, "%s ", txt );
txt = worker.GetString( frame.file );
if( frame.line == 0 )
{
AnsiPrintf( ANSI_YELLOW, "(%s)", txt );
}
else
{
AnsiPrintf( ANSI_YELLOW, "(%s:%" PRIu32 ")", txt, frame.line );
}
if( frameData->imageName.Active() )
{
AnsiPrintf( ANSI_MAGENTA, " %s\n", worker.GetString( frameData->imageName ) );
}
else
{
printf( "\n" );
}
}
}
}
}
printf( "\n\033[31;1mInstrumentation failure: %s\033[0m", tracy::Worker::GetFailureString( failure ) );
}
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nElapsed time: %s\nSaving trace...",
worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() ), tracy::RealToString( worker.GetZoneCount() ),
tracy::TimeToString( std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count() ) );
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nSaving trace...", worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() - worker.GetTimeBegin() ), tracy::RealToString( worker.GetZoneCount(), true ) );
fflush( stdout );
auto f = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output ) );
if( f )
{
worker.Write( *f, false );
AnsiPrintf( ANSI_GREEN ANSI_BOLD, " done!\n" );
f->Finish();
const auto stats = f->GetCompressionStatistics();
printf( "Trace size %s (%.2f%% ratio)\n", tracy::MemSizeToString( stats.second ), 100.f * stats.second / stats.first );
worker.Write( *f );
printf( " \033[32;1mdone!\033[0m\n" );
}
else
{
AnsiPrintf( ANSI_RED ANSI_BOLD, " failed!\n");
printf( " \033[31;1failed!\033[0m\n" );
}
return 0;

View File

@@ -1,42 +0,0 @@
#ifdef TRACY_ENABLE
#include <atomic>
#include "../common/TracyAlloc.hpp"
#include "../common/TracyForceInline.hpp"
#include "../common/TracyYield.hpp"
namespace tracy
{
extern thread_local bool RpThreadInitDone;
extern std::atomic<int> RpInitDone;
extern std::atomic<int> RpInitLock;
tracy_no_inline static void InitRpmallocPlumbing()
{
const auto done = RpInitDone.load( std::memory_order_acquire );
if( !done )
{
int expected = 0;
while( !RpInitLock.compare_exchange_weak( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; YieldThread(); }
const auto done = RpInitDone.load( std::memory_order_acquire );
if( !done )
{
rpmalloc_initialize();
RpInitDone.store( 1, std::memory_order_release );
}
RpInitLock.store( 0, std::memory_order_release );
}
rpmalloc_thread_initialize();
RpThreadInitDone = true;
}
TRACY_API void InitRpmalloc()
{
if( !RpThreadInitDone ) InitRpmallocPlumbing();
}
}
#endif

View File

@@ -1,8 +1,6 @@
namespace tracy
{
#if defined __linux__ && defined __ARM_ARCH
static const char* DecodeArmImplementer( uint32_t v )
{
static char buf[16];
@@ -14,7 +12,6 @@ static const char* DecodeArmImplementer( uint32_t v )
case 0x44: return "DEC";
case 0x46: return "Fujitsu";
case 0x48: return "HiSilicon";
case 0x49: return "Infineon";
case 0x4d: return "Motorola";
case 0x4e: return "Nvidia";
case 0x50: return "Applied Micro";
@@ -26,7 +23,6 @@ static const char* DecodeArmImplementer( uint32_t v )
case 0x66: return "Faraday";
case 0x68: return "HXT";
case 0x69: return "Intel";
case 0xc0: return "Ampere Computing";
default: break;
}
sprintf( buf, "0x%x", v );
@@ -38,7 +34,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
static char buf[16];
switch( impl )
{
case 0x41: // ARM
case 0x41:
switch( part )
{
case 0x810: return "810";
@@ -61,8 +57,8 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xc09: return " Cortex-A9";
case 0xc0c: return " Cortex-A12";
case 0xc0d: return " Rockchip RK3288";
case 0xc0e: return " Cortex-A17";
case 0xc0f: return " Cortex-A15";
case 0xc0e: return " Cortex-A17";
case 0xc14: return " Cortex-R4";
case 0xc15: return " Cortex-R5";
case 0xc17: return " Cortex-R7";
@@ -75,7 +71,6 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xc60: return " Cortex-M0+";
case 0xd00: return " AArch64 simulator";
case 0xd01: return " Cortex-A32";
case 0xd02: return " Cortex-A34";
case 0xd03: return " Cortex-A53";
case 0xd04: return " Cortex-A35";
case 0xd05: return " Cortex-A55";
@@ -92,21 +87,10 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xd13: return " Cortex-R52";
case 0xd20: return " Cortex-M23";
case 0xd21: return " Cortex-M33";
case 0xd22: return " Cortex-M55";
case 0xd40: return " Neoverse V1";
case 0xd41: return " Cortex-A78";
case 0xd42: return " Cortex-A78AE";
case 0xd43: return " Cortex-A65AE";
case 0xd44: return " Cortex-X1";
case 0xd47: return " Cortex-A710";
case 0xd48: return " Cortex-X2";
case 0xd49: return " Neoverse N2";
case 0xd4a: return " Neoverse E1";
case 0xd4b: return " Cortex-A78C";
case 0xd4c: return " Cortex-X1C";
default: break;
}
case 0x42: // Broadcom
case 0x42:
switch( part )
{
case 0xf: return " Brahma B15";
@@ -114,7 +98,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0x516: return " ThunderX2";
default: break;
}
case 0x43: // Cavium
case 0x43:
switch( part )
{
case 0xa0: return " ThunderX";
@@ -122,37 +106,29 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xa2: return " ThunderX 81XX";
case 0xa3: return " ThunderX 83XX";
case 0xaf: return " ThunderX2 99xx";
case 0xb0: return " OcteonTX2";
case 0xb1: return " OcteonTX2 T98";
case 0xb2: return " OcteonTX2 T96";
case 0xb3: return " OcteonTX2 F95";
case 0xb4: return " OcteonTX2 F95N";
case 0xb5: return " OcteonTX2 F95MM";
case 0xb6: return " OcteonTX2 F95O";
case 0xb8: return " ThunderX3 T110";
default: break;
}
case 0x44: // DEC
case 0x44:
switch( part )
{
case 0xa10: return " SA110";
case 0xa11: return " SA1100";
default: break;
}
case 0x46: // Fujitsu
case 0x46:
switch( part )
{
case 0x1: return " A64FX";
default: break;
}
case 0x48: // HiSilicon
case 0x48:
switch( part )
{
case 0xd01: return " TSV100";
case 0xd40: return " Kirin 980";
default: break;
}
case 0x4e: // Nvidia
case 0x4e:
switch( part )
{
case 0x0: return " Denver";
@@ -160,13 +136,13 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0x4: return " Carmel";
default: break;
}
case 0x50: // Applied Micro
case 0x50:
switch( part )
{
case 0x0: return " X-Gene";
default: break;
}
case 0x51: // Qualcomm
case 0x51:
switch( part )
{
case 0xf: return " Scorpion";
@@ -182,27 +158,18 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0x802: return " Kryo 385 Gold";
case 0x803: return " Kryo 385 Silver";
case 0x804: return " Kryo 485 Gold";
case 0x805: return " Kryo 4xx/5xx Silver";
case 0xc00: return " Falkor";
case 0xc01: return " Saphira";
default: break;
}
case 0x53: // Samsung
case 0x53:
switch( part )
{
case 0x1: return " Exynos M1/M2";
case 0x2: return " Exynos M3";
case 0x3: return " Exynos M4";
case 0x4: return " Exynos M5";
default: break;
}
case 0x54: // Texas Instruments
switch( part )
{
case 0x925: return " TI925";
default: break;
}
case 0x56: // Marvell
case 0x56:
switch( part )
{
case 0x131: return " Feroceon 88FR131";
@@ -210,7 +177,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0x584: return " PJ4B-MP / PJ4C";
default: break;
}
case 0x61: // Apple
case 0x61:
switch( part )
{
case 0x1: return " Cyclone";
@@ -220,41 +187,27 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0x5: return " Twister/Elba/Malta";
case 0x6: return " Hurricane";
case 0x7: return " Hurricane/Myst";
case 0x22: return " M1 Icestorm";
case 0x23: return " M1 Firestorm";
case 0x24: return " M1 Icestorm Pro";
case 0x25: return " M1 Firestorm Pro";
case 0x28: return " M1 Icestorm Max";
case 0x29: return " M1 Firestorm Max";
default: break;
}
case 0x66: // Faraday
case 0x66:
switch( part )
{
case 0x526: return " FA526";
case 0x626: return " FA626";
default: break;
}
case 0x68: // HXT
case 0x68:
switch( part )
{
case 0x0: return " Phecda";
default: break;
}
case 0xc0: // Ampere Computing
switch( part )
{
case 0xac3: return " Ampere1";
default: break;
}
default: break;
}
sprintf( buf, " 0x%x", part );
return buf;
}
#elif defined __APPLE__ && TARGET_OS_IPHONE == 1
static const char* DecodeIosDevice( const char* id )
{
static const char* DeviceTable[] = {
@@ -292,19 +245,6 @@ static const char* DecodeIosDevice( const char* id )
"iPhone11,4", "iPhone XS Max",
"iPhone11,6", "iPhone XS Max China",
"iPhone11,8", "iPhone XR",
"iPhone12,1", "iPhone 11",
"iPhone12,3", "iPhone 11 Pro",
"iPhone12,5", "iPhone 11 Pro Max",
"iPhone12,8", "iPhone SE 2nd Gen",
"iPhone13,1", "iPhone 12 Mini",
"iPhone13,2", "iPhone 12",
"iPhone13,3", "iPhone 12 Pro",
"iPhone13,4", "iPhone 12 Pro Max",
"iPhone14,2", "iPhone 13 Pro",
"iPhone14,3", "iPhone 13 Pro Max",
"iPhone14,4", "iPhone 13 Mini",
"iPhone14,5", "iPhone 13",
"iPhone14,6", "iPhone SE 3rd Gen",
"iPad1,1", "iPad (A1219/A1337)",
"iPad2,1", "iPad 2 (A1395)",
"iPad2,2", "iPad 2 (A1396)",
@@ -345,8 +285,6 @@ static const char* DecodeIosDevice( const char* id )
"iPad7,4", "iPad Pro 10.5\" (A1709)",
"iPad7,5", "iPad 6th gen (A1893)",
"iPad7,6", "iPad 6th gen (A1954)",
"iPad7,11", "iPad 7th gen 10.2\" (Wifi)",
"iPad7,12", "iPad 7th gen 10.2\" (Wifi+Cellular)",
"iPad8,1", "iPad Pro 11\" (A1980)",
"iPad8,2", "iPad Pro 11\" (A1980)",
"iPad8,3", "iPad Pro 11\" (A1934/A1979/A2013)",
@@ -355,28 +293,10 @@ static const char* DecodeIosDevice( const char* id )
"iPad8,6", "iPad Pro 12.9\" 3rd gen (A1876)",
"iPad8,7", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
"iPad8,8", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
"iPad8,9", "iPad Pro 11\" 2nd gen (Wifi)",
"iPad8,10", "iPad Pro 11\" 2nd gen (Wifi+Cellular)",
"iPad8,11", "iPad Pro 12.9\" 4th gen (Wifi)",
"iPad8,12", "iPad Pro 12.9\" 4th gen (Wifi+Cellular)",
"iPad11,1", "iPad Mini 5th gen (A2133)",
"iPad11,2", "iPad Mini 5th gen (A2124/A2125/A2126)",
"iPad11,3", "iPad Air 3rd gen (A2152)",
"iPad11,4", "iPad Air 3rd gen (A2123/A2153/A2154)",
"iPad11,6", "iPad 8th gen (WiFi)",
"iPad11,7", "iPad 8th gen (WiFi+Cellular)",
"iPad13,1", "iPad Air 4th gen (WiFi)",
"iPad13,2", "iPad Air 4th gen (WiFi+Cellular)",
"iPad13,4", "iPad Pro 11\" 3rd gen",
"iPad13,5", "iPad Pro 11\" 3rd gen",
"iPad13,6", "iPad Pro 11\" 3rd gen",
"iPad13,7", "iPad Pro 11\" 3rd gen",
"iPad13,8", "iPad Pro 12.9\" 5th gen",
"iPad13,9", "iPad Pro 12.9\" 5th gen",
"iPad13,10", "iPad Pro 12.9\" 5th gen",
"iPad13,11", "iPad Pro 12.9\" 5th gen",
"iPad13,16", "iPad Air 5th Gen (WiFi)",
"iPad13,17", "iPad Air 5th Gen (WiFi+Cellular)",
"iPod1,1", "iPod Touch",
"iPod2,1", "iPod Touch 2nd gen",
"iPod3,1", "iPod Touch 3rd gen",
@@ -396,6 +316,4 @@ static const char* DecodeIosDevice( const char* id )
return id;
}
#endif
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,35 +1,22 @@
#ifndef __TRACYCALLSTACK_H__
#define __TRACYCALLSTACK_H__
#ifndef TRACY_NO_CALLSTACK
# if !defined _WIN32
# include <sys/param.h>
#if defined _WIN32 || defined __CYGWIN__
# define TRACY_HAS_CALLSTACK 1
#elif defined __ANDROID__
# if !defined __arm__ || __ANDROID_API__ >= 21
# define TRACY_HAS_CALLSTACK 2
# else
# define TRACY_HAS_CALLSTACK 5
# endif
# if defined _WIN32
# include "../common/TracyUwp.hpp"
# ifndef TRACY_UWP
# define TRACY_HAS_CALLSTACK 1
# endif
# elif defined __ANDROID__
# if !defined __arm__ || __ANDROID_API__ >= 21
# define TRACY_HAS_CALLSTACK 2
# else
# define TRACY_HAS_CALLSTACK 5
# endif
# elif defined __linux
# if defined _GNU_SOURCE && defined __GLIBC__
# define TRACY_HAS_CALLSTACK 3
# else
# define TRACY_HAS_CALLSTACK 2
# endif
# elif defined __APPLE__
# define TRACY_HAS_CALLSTACK 4
# elif defined BSD
# define TRACY_HAS_CALLSTACK 6
#elif defined __linux
# if defined _GNU_SOURCE && defined __GLIBC__
# define TRACY_HAS_CALLSTACK 3
# else
# define TRACY_HAS_CALLSTACK 2
# endif
#elif defined __APPLE__
# define TRACY_HAS_CALLSTACK 4
#endif
#endif

View File

@@ -1,10 +1,15 @@
#ifndef __TRACYCALLSTACK_HPP__
#define __TRACYCALLSTACK_HPP__
#include "../common/TracyApi.h"
#include "TracyCallstack.h"
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5
#if TRACY_HAS_CALLSTACK == 1
extern "C"
{
typedef unsigned long (__stdcall *t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
extern t_RtlWalkFrameChain RtlWalkFrameChain;
}
#elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5
# include <unwind.h>
#elif TRACY_HAS_CALLSTACK >= 3
# include <execinfo.h>
@@ -13,12 +18,9 @@
#ifdef TRACY_HAS_CALLSTACK
#ifdef TRACY_DEBUGINFOD
# include <elfutils/debuginfod.h>
#endif
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "../common/TracyAlloc.hpp"
#include "../common/TracyForceInline.hpp"
@@ -26,57 +28,33 @@
namespace tracy
{
struct CallstackSymbolData
{
const char* file;
uint32_t line;
bool needFree;
uint64_t symAddr;
};
struct CallstackEntry
{
const char* name;
const char* file;
uint32_t line;
uint32_t symLen;
uint64_t symAddr;
};
struct CallstackEntryData
{
const CallstackEntry* data;
uint8_t size;
const char* imageName;
};
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr );
CallstackSymbolData DecodeCodeAddress( uint64_t ptr );
const char* DecodeCallstackPtrFast( uint64_t ptr );
CallstackEntryData DecodeCallstackPtr( uint64_t ptr );
void InitCallstack();
void EndCallstack();
const char* GetKernelModulePath( uint64_t addr );
#ifdef TRACY_DEBUGINFOD
const uint8_t* GetBuildIdForImage( const char* image, size_t& size );
debuginfod_client* GetDebuginfodClient();
#endif
#if TRACY_HAS_CALLSTACK == 1
extern "C"
{
typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain;
}
static tracy_force_inline void* Callstack( int depth )
{
assert( depth >= 1 && depth < 63 );
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
const auto num = ___tracy_RtlWalkFrameChain( (void**)( trace + 1 ), depth, 0 );
const auto num = RtlWalkFrameChain( (void**)( trace + 1 ), depth, 0 );
*trace = num;
return trace;
}
@@ -113,14 +91,14 @@ static tracy_force_inline void* Callstack( int depth )
return trace;
}
#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
#elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4
static tracy_force_inline void* Callstack( int depth )
{
assert( depth >= 1 );
auto trace = (uintptr_t*)tracy_malloc( ( 1 + (size_t)depth ) * sizeof( uintptr_t ) );
const auto num = (size_t)backtrace( (void**)(trace+1), depth );
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
const auto num = backtrace( (void**)(trace+1), depth );
*trace = num;
return trace;

View File

@@ -1,11 +0,0 @@
#ifndef __TRACYPRINT_HPP__
#define __TRACYPRINT_HPP__
#ifdef TRACY_VERBOSE
# include <stdio.h>
# define TracyDebug(...) fprintf( stderr, __VA_ARGS__ );
#else
# define TracyDebug(...)
#endif
#endif

View File

@@ -18,8 +18,10 @@
# include <intrin.h>
# else
# include <x86intrin.h>
# ifndef _mm256_cvtsi256_si32
# define _mm256_cvtsi256_si32( v ) ( _mm_cvtsi128_si32( _mm256_castsi256_si128( v ) ) )
# ifdef __CYGWIN__
# ifndef _mm256_cvtsi256_si32
# define _mm256_cvtsi256_si32( v ) ( _mm_cvtsi128_si32( _mm256_castsi256_si128( v ) ) )
# endif
# endif
# endif
#endif
@@ -90,8 +92,56 @@ static const uint16_t DivTable[255*3+1] = {
0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c,
0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156
};
#if defined __ARM_NEON && defined __aarch64__
static const uint16_t DivTableAVX[255*3+1] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x38e3, 0x35e5, 0x3333, 0x30c3, 0x2e8b, 0x2c85, 0x2aaa, 0x28f5, 0x2762, 0x25ed, 0x2492, 0x234f, 0x2222, 0x2108, 0x2000,
0x1f07, 0x1e1e, 0x1d41, 0x1c71, 0x1bac, 0x1af2, 0x1a41, 0x1999, 0x18f9, 0x1861, 0x17d0, 0x1745, 0x16c1, 0x1642, 0x15c9, 0x1555,
0x14e5, 0x147a, 0x1414, 0x13b1, 0x1352, 0x12f6, 0x129e, 0x1249, 0x11f7, 0x11a7, 0x115b, 0x1111, 0x10c9, 0x1084, 0x1041, 0x1000,
0x0fc0, 0x0f83, 0x0f48, 0x0f0f, 0x0ed7, 0x0ea0, 0x0e6c, 0x0e38, 0x0e07, 0x0dd6, 0x0da7, 0x0d79, 0x0d4c, 0x0d20, 0x0cf6, 0x0ccc,
0x0ca4, 0x0c7c, 0x0c56, 0x0c30, 0x0c0c, 0x0be8, 0x0bc5, 0x0ba2, 0x0b81, 0x0b60, 0x0b40, 0x0b21, 0x0b02, 0x0ae4, 0x0ac7, 0x0aaa,
0x0a8e, 0x0a72, 0x0a57, 0x0a3d, 0x0a23, 0x0a0a, 0x09f1, 0x09d8, 0x09c0, 0x09a9, 0x0991, 0x097b, 0x0964, 0x094f, 0x0939, 0x0924,
0x090f, 0x08fb, 0x08e7, 0x08d3, 0x08c0, 0x08ad, 0x089a, 0x0888, 0x0876, 0x0864, 0x0853, 0x0842, 0x0831, 0x0820, 0x0810, 0x0800,
0x07f0, 0x07e0, 0x07d1, 0x07c1, 0x07b3, 0x07a4, 0x0795, 0x0787, 0x0779, 0x076b, 0x075d, 0x0750, 0x0743, 0x0736, 0x0729, 0x071c,
0x070f, 0x0703, 0x06f7, 0x06eb, 0x06df, 0x06d3, 0x06c8, 0x06bc, 0x06b1, 0x06a6, 0x069b, 0x0690, 0x0685, 0x067b, 0x0670, 0x0666,
0x065c, 0x0652, 0x0648, 0x063e, 0x0634, 0x062b, 0x0621, 0x0618, 0x060f, 0x0606, 0x05fd, 0x05f4, 0x05eb, 0x05e2, 0x05d9, 0x05d1,
0x05c9, 0x05c0, 0x05b8, 0x05b0, 0x05a8, 0x05a0, 0x0598, 0x0590, 0x0588, 0x0581, 0x0579, 0x0572, 0x056b, 0x0563, 0x055c, 0x0555,
0x054e, 0x0547, 0x0540, 0x0539, 0x0532, 0x052b, 0x0525, 0x051e, 0x0518, 0x0511, 0x050b, 0x0505, 0x04fe, 0x04f8, 0x04f2, 0x04ec,
0x04e6, 0x04e0, 0x04da, 0x04d4, 0x04ce, 0x04c8, 0x04c3, 0x04bd, 0x04b8, 0x04b2, 0x04ad, 0x04a7, 0x04a2, 0x049c, 0x0497, 0x0492,
0x048d, 0x0487, 0x0482, 0x047d, 0x0478, 0x0473, 0x046e, 0x0469, 0x0465, 0x0460, 0x045b, 0x0456, 0x0452, 0x044d, 0x0448, 0x0444,
0x043f, 0x043b, 0x0436, 0x0432, 0x042d, 0x0429, 0x0425, 0x0421, 0x041c, 0x0418, 0x0414, 0x0410, 0x040c, 0x0408, 0x0404, 0x0400,
0x03fc, 0x03f8, 0x03f4, 0x03f0, 0x03ec, 0x03e8, 0x03e4, 0x03e0, 0x03dd, 0x03d9, 0x03d5, 0x03d2, 0x03ce, 0x03ca, 0x03c7, 0x03c3,
0x03c0, 0x03bc, 0x03b9, 0x03b5, 0x03b2, 0x03ae, 0x03ab, 0x03a8, 0x03a4, 0x03a1, 0x039e, 0x039b, 0x0397, 0x0394, 0x0391, 0x038e,
0x038b, 0x0387, 0x0384, 0x0381, 0x037e, 0x037b, 0x0378, 0x0375, 0x0372, 0x036f, 0x036c, 0x0369, 0x0366, 0x0364, 0x0361, 0x035e,
0x035b, 0x0358, 0x0355, 0x0353, 0x0350, 0x034d, 0x034a, 0x0348, 0x0345, 0x0342, 0x0340, 0x033d, 0x033a, 0x0338, 0x0335, 0x0333,
0x0330, 0x032e, 0x032b, 0x0329, 0x0326, 0x0324, 0x0321, 0x031f, 0x031c, 0x031a, 0x0317, 0x0315, 0x0313, 0x0310, 0x030e, 0x030c,
0x0309, 0x0307, 0x0305, 0x0303, 0x0300, 0x02fe, 0x02fc, 0x02fa, 0x02f7, 0x02f5, 0x02f3, 0x02f1, 0x02ef, 0x02ec, 0x02ea, 0x02e8,
0x02e6, 0x02e4, 0x02e2, 0x02e0, 0x02de, 0x02dc, 0x02da, 0x02d8, 0x02d6, 0x02d4, 0x02d2, 0x02d0, 0x02ce, 0x02cc, 0x02ca, 0x02c8,
0x02c6, 0x02c4, 0x02c2, 0x02c0, 0x02be, 0x02bc, 0x02bb, 0x02b9, 0x02b7, 0x02b5, 0x02b3, 0x02b1, 0x02b0, 0x02ae, 0x02ac, 0x02aa,
0x02a8, 0x02a7, 0x02a5, 0x02a3, 0x02a1, 0x02a0, 0x029e, 0x029c, 0x029b, 0x0299, 0x0297, 0x0295, 0x0294, 0x0292, 0x0291, 0x028f,
0x028d, 0x028c, 0x028a, 0x0288, 0x0287, 0x0285, 0x0284, 0x0282, 0x0280, 0x027f, 0x027d, 0x027c, 0x027a, 0x0279, 0x0277, 0x0276,
0x0274, 0x0273, 0x0271, 0x0270, 0x026e, 0x026d, 0x026b, 0x026a, 0x0268, 0x0267, 0x0265, 0x0264, 0x0263, 0x0261, 0x0260, 0x025e,
0x025d, 0x025c, 0x025a, 0x0259, 0x0257, 0x0256, 0x0255, 0x0253, 0x0252, 0x0251, 0x024f, 0x024e, 0x024d, 0x024b, 0x024a, 0x0249,
0x0247, 0x0246, 0x0245, 0x0243, 0x0242, 0x0241, 0x0240, 0x023e, 0x023d, 0x023c, 0x023b, 0x0239, 0x0238, 0x0237, 0x0236, 0x0234,
0x0233, 0x0232, 0x0231, 0x0230, 0x022e, 0x022d, 0x022c, 0x022b, 0x022a, 0x0229, 0x0227, 0x0226, 0x0225, 0x0224, 0x0223, 0x0222,
0x0220, 0x021f, 0x021e, 0x021d, 0x021c, 0x021b, 0x021a, 0x0219, 0x0218, 0x0216, 0x0215, 0x0214, 0x0213, 0x0212, 0x0211, 0x0210,
0x020f, 0x020e, 0x020d, 0x020c, 0x020b, 0x020a, 0x0209, 0x0208, 0x0207, 0x0206, 0x0205, 0x0204, 0x0203, 0x0202, 0x0201, 0x0200,
0x01ff, 0x01fe, 0x01fd, 0x01fc, 0x01fb, 0x01fa, 0x01f9, 0x01f8, 0x01f7, 0x01f6, 0x01f5, 0x01f4, 0x01f3, 0x01f2, 0x01f1, 0x01f0,
0x01ef, 0x01ee, 0x01ed, 0x01ec, 0x01eb, 0x01ea, 0x01e9, 0x01e9, 0x01e8, 0x01e7, 0x01e6, 0x01e5, 0x01e4, 0x01e3, 0x01e2, 0x01e1,
0x01e0, 0x01e0, 0x01df, 0x01de, 0x01dd, 0x01dc, 0x01db, 0x01da, 0x01da, 0x01d9, 0x01d8, 0x01d7, 0x01d6, 0x01d5, 0x01d4, 0x01d4,
0x01d3, 0x01d2, 0x01d1, 0x01d0, 0x01cf, 0x01cf, 0x01ce, 0x01cd, 0x01cc, 0x01cb, 0x01cb, 0x01ca, 0x01c9, 0x01c8, 0x01c7, 0x01c7,
0x01c6, 0x01c5, 0x01c4, 0x01c3, 0x01c3, 0x01c2, 0x01c1, 0x01c0, 0x01c0, 0x01bf, 0x01be, 0x01bd, 0x01bd, 0x01bc, 0x01bb, 0x01ba,
0x01ba, 0x01b9, 0x01b8, 0x01b7, 0x01b7, 0x01b6, 0x01b5, 0x01b4, 0x01b4, 0x01b3, 0x01b2, 0x01b2, 0x01b1, 0x01b0, 0x01af, 0x01af,
0x01ae, 0x01ad, 0x01ad, 0x01ac, 0x01ab, 0x01aa, 0x01aa, 0x01a9, 0x01a8, 0x01a8, 0x01a7, 0x01a6, 0x01a6, 0x01a5, 0x01a4, 0x01a4,
0x01a3, 0x01a2, 0x01a2, 0x01a1, 0x01a0, 0x01a0, 0x019f, 0x019e, 0x019e, 0x019d, 0x019c, 0x019c, 0x019b, 0x019a, 0x019a, 0x0199,
0x0198, 0x0198, 0x0197, 0x0197, 0x0196, 0x0195, 0x0195, 0x0194, 0x0193, 0x0193, 0x0192, 0x0192, 0x0191, 0x0190, 0x0190, 0x018f,
0x018f, 0x018e, 0x018d, 0x018d, 0x018c, 0x018b, 0x018b, 0x018a, 0x018a, 0x0189, 0x0189, 0x0188, 0x0187, 0x0187, 0x0186, 0x0186,
0x0185, 0x0184, 0x0184, 0x0183, 0x0183, 0x0182, 0x0182, 0x0181, 0x0180, 0x0180, 0x017f, 0x017f, 0x017e, 0x017e, 0x017d, 0x017d,
0x017c, 0x017b, 0x017b, 0x017a, 0x017a, 0x0179, 0x0179, 0x0178, 0x0178, 0x0177, 0x0177, 0x0176, 0x0175, 0x0175, 0x0174, 0x0174,
0x0173, 0x0173, 0x0172, 0x0172, 0x0171, 0x0171, 0x0170, 0x0170, 0x016f, 0x016f, 0x016e, 0x016e, 0x016d, 0x016d, 0x016c, 0x016c,
0x016b, 0x016b, 0x016a, 0x016a, 0x0169, 0x0169, 0x0168, 0x0168, 0x0167, 0x0167, 0x0166, 0x0166, 0x0165, 0x0165, 0x0164, 0x0164,
0x0163, 0x0163, 0x0162, 0x0162, 0x0161, 0x0161, 0x0160, 0x0160, 0x015f, 0x015f, 0x015e, 0x015e, 0x015d, 0x015d, 0x015d, 0x015c,
0x015c, 0x015b, 0x015b, 0x015a, 0x015a, 0x0159, 0x0159, 0x0158, 0x0158, 0x0158, 0x0157, 0x0157, 0x0156, 0x0156
};
static const uint16_t DivTableNEON[255*3+1] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x1c71, 0x1af2, 0x1999, 0x1861, 0x1745, 0x1642, 0x1555, 0x147a, 0x13b1, 0x12f6, 0x1249, 0x11a7, 0x1111, 0x1084, 0x1000,
@@ -142,7 +192,6 @@ static const uint16_t DivTableNEON[255*3+1] = {
0x00b1, 0x00b1, 0x00b1, 0x00b1, 0x00b0, 0x00b0, 0x00b0, 0x00b0, 0x00af, 0x00af, 0x00af, 0x00af, 0x00ae, 0x00ae, 0x00ae, 0x00ae,
0x00ae, 0x00ad, 0x00ad, 0x00ad, 0x00ad, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ac, 0x00ab, 0x00ab, 0x00ab, 0x00ab,
};
#endif
static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
@@ -175,12 +224,6 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
}
__m128i amask = _mm_set1_epi32( 0xFFFFFF );
px0 = _mm_and_si128( px0, amask );
px1 = _mm_and_si128( px1, amask );
px2 = _mm_and_si128( px2, amask );
px3 = _mm_and_si128( px3, amask );
__m128i min0 = _mm_min_epu8( px0, px1 );
__m128i min1 = _mm_min_epu8( px2, px3 );
__m128i min2 = _mm_min_epu8( min0, min1 );
@@ -329,11 +372,10 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
}
uint32x4_t mask = vdupq_n_u32( 0xFFFFFF );
uint8x16_t l0 = vreinterpretq_u8_u32( vandq_u32( mask, px0 ) );
uint8x16_t l1 = vreinterpretq_u8_u32( vandq_u32( mask, px1 ) );
uint8x16_t l2 = vreinterpretq_u8_u32( vandq_u32( mask, px2 ) );
uint8x16_t l3 = vreinterpretq_u8_u32( vandq_u32( mask, px3 ) );
uint8x16_t l0 = vreinterpretq_u8_u32( px0 );
uint8x16_t l1 = vreinterpretq_u8_u32( px1 );
uint8x16_t l2 = vreinterpretq_u8_u32( px2 );
uint8x16_t l3 = vreinterpretq_u8_u32( px3 );
uint8x16_t min0 = vminq_u8( l0, l1 );
uint8x16_t min1 = vminq_u8( l2, l3 );
@@ -412,20 +454,19 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
return uint64_t( ( uint64_t( to565( vmin ) ) << 16 ) | to565( vmax ) | ( uint64_t( vp ) << 32 ) );
# endif
#else
uint32_t ref;
memcpy( &ref, src, 4 );
uint32_t refMask = ref & 0xF8FCF8;
const auto ref = to565( src[0], src[1], src[2] );
auto stmp = src + 4;
for( int i=1; i<16; i++ )
{
uint32_t px;
memcpy( &px, stmp, 4 );
if( ( px & 0xF8FCF8 ) != refMask ) break;
if( to565( stmp[0], stmp[1], stmp[2] ) != ref )
{
break;
}
stmp += 4;
}
if( stmp == src + 64 )
{
return uint64_t( to565( ref ) ) << 16;
return uint64_t( ref ) << 16;
}
uint8_t min[3] = { src[0], src[1], src[2] };
@@ -471,42 +512,6 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
__m256i px2 = _mm256_loadu_si256(((__m256i*)src) + 2);
__m256i px3 = _mm256_loadu_si256(((__m256i*)src) + 3);
__m256i smask = _mm256_set1_epi32( 0xF8FCF8 );
__m256i sd0 = _mm256_and_si256( px0, smask );
__m256i sd1 = _mm256_and_si256( px1, smask );
__m256i sd2 = _mm256_and_si256( px2, smask );
__m256i sd3 = _mm256_and_si256( px3, smask );
__m256i sc = _mm256_shuffle_epi32(sd0, _MM_SHUFFLE(0, 0, 0, 0));
__m256i sc0 = _mm256_cmpeq_epi8( sd0, sc );
__m256i sc1 = _mm256_cmpeq_epi8( sd1, sc );
__m256i sc2 = _mm256_cmpeq_epi8( sd2, sc );
__m256i sc3 = _mm256_cmpeq_epi8( sd3, sc );
__m256i sm0 = _mm256_and_si256( sc0, sc1 );
__m256i sm1 = _mm256_and_si256( sc2, sc3 );
__m256i sm = _mm256_and_si256( sm0, sm1 );
const int64_t solid0 = 1 - _mm_testc_si128( _mm256_castsi256_si128( sm ), _mm_set1_epi32( -1 ) );
const int64_t solid1 = 1 - _mm_testc_si128( _mm256_extracti128_si256( sm, 1 ), _mm_set1_epi32( -1 ) );
if( solid0 + solid1 == 0 )
{
const auto c0 = uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
const auto c1 = uint64_t( to565( src[16], src[17], src[18] ) ) << 16;
memcpy( dst, &c0, 8 );
memcpy( dst+8, &c1, 8 );
dst += 16;
return;
}
__m256i amask = _mm256_set1_epi32( 0xFFFFFF );
px0 = _mm256_and_si256( px0, amask );
px1 = _mm256_and_si256( px1, amask );
px2 = _mm256_and_si256( px2, amask );
px3 = _mm256_and_si256( px3, amask );
__m256i min0 = _mm256_min_epu8( px0, px1 );
__m256i min1 = _mm256_min_epu8( px2, px3 );
__m256i min2 = _mm256_min_epu8( min0, min1 );
@@ -528,8 +533,8 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
__m256i range1 = _mm256_subs_epu8( rmax, rmin );
__m256i range2 = _mm256_sad_epu8( rmax, rmin );
uint16_t vrange0 = DivTable[_mm256_cvtsi256_si32( range2 ) >> 1];
uint16_t vrange1 = DivTable[_mm256_extract_epi16( range2, 8 ) >> 1];
uint16_t vrange0 = DivTableAVX[_mm256_cvtsi256_si32( range2 ) >> 1];
uint16_t vrange1 = DivTableAVX[_mm256_extract_epi16( range2, 8 ) >> 1];
__m256i range00 = _mm256_set1_epi16( vrange0 );
__m256i range = _mm256_inserti128_si256( range00, _mm_set1_epi16( vrange1 ), 1 );
@@ -573,11 +578,7 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
__m256i d0 = _mm256_unpacklo_epi32( mm5, p );
__m256i d1 = _mm256_permute4x64_epi64( d0, _MM_SHUFFLE( 3, 2, 2, 0 ) );
__m128i d2 = _mm256_castsi256_si128( d1 );
__m128i mask = _mm_set_epi64x( 0xFFFF0000 | -solid1, 0xFFFF0000 | -solid0 );
__m128i d3 = _mm_and_si128( d2, mask );
_mm_storeu_si128( (__m128i*)dst, d3 );
_mm_storeu_si128( (__m128i*)dst, _mm256_castsi256_si128( d1 ) );
dst += 16;
}
#endif

View File

@@ -1,7 +1,6 @@
#ifndef __TRACYFASTVECTOR_HPP__
#define __TRACYFASTVECTOR_HPP__
#include <assert.h>
#include <stddef.h>
#include "../common/TracyAlloc.hpp"
@@ -22,7 +21,6 @@ public:
, m_write( m_ptr )
, m_end( m_ptr + capacity )
{
assert( capacity != 0 );
}
FastVector( const FastVector& ) = delete;
@@ -98,11 +96,11 @@ public:
private:
tracy_no_inline void AllocMore()
{
const auto cap = size_t( m_end - m_ptr ) * 2;
const auto size = size_t( m_write - m_ptr );
const auto cap = ( m_end - m_ptr ) * 2;
const auto size = m_write - m_ptr;
T* ptr = (T*)tracy_malloc( sizeof( T ) * cap );
memcpy( ptr, m_ptr, size * sizeof( T ) );
tracy_free_fast( m_ptr );
tracy_free( m_ptr );
m_ptr = ptr;
m_write = m_ptr + size;
m_end = m_ptr + cap;

View File

@@ -11,225 +11,11 @@
namespace tracy
{
class LockableCtx
{
public:
tracy_force_inline LockableCtx( const SourceLocationData* srcloc )
: m_id( GetLockCounter().fetch_add( 1, std::memory_order_relaxed ) )
#ifdef TRACY_ON_DEMAND
, m_lockCount( 0 )
, m_active( false )
#endif
{
assert( m_id != std::numeric_limits<uint32_t>::max() );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
MemWrite( &item->lockAnnounce.type, LockType::Lockable );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
LockableCtx( const LockableCtx& ) = delete;
LockableCtx& operator=( const LockableCtx& ) = delete;
tracy_force_inline ~LockableCtx()
{
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockTerminate );
MemWrite( &item->lockTerminate.id, m_id );
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
tracy_force_inline bool BeforeLock()
{
#ifdef TRACY_ON_DEMAND
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = GetProfiler().IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return false;
#endif
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockWait );
MemWrite( &item->lockWait.thread, GetThreadHandle() );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
return true;
}
tracy_force_inline void AfterLock()
{
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
}
tracy_force_inline void AfterUnlock()
{
#ifdef TRACY_ON_DEMAND
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
if( !m_active.load( std::memory_order_relaxed ) ) return;
if( !GetProfiler().IsConnected() )
{
m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockRelease );
MemWrite( &item->lockRelease.thread, GetThreadHandle() );
MemWrite( &item->lockRelease.id, m_id );
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
}
tracy_force_inline void AfterTryLock( bool acquired )
{
#ifdef TRACY_ON_DEMAND
if( !acquired ) return;
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = GetProfiler().IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return;
#endif
if( acquired )
{
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
}
}
tracy_force_inline void Mark( const SourceLocationData* srcloc )
{
#ifdef TRACY_ON_DEMAND
const auto active = m_active.load( std::memory_order_relaxed );
if( !active ) return;
const auto connected = GetProfiler().IsConnected();
if( !connected )
{
if( active ) m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockMark );
MemWrite( &item->lockMark.thread, GetThreadHandle() );
MemWrite( &item->lockMark.id, m_id );
MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc );
Profiler::QueueSerialFinish();
}
tracy_force_inline void CustomName( const char* name, size_t size )
{
assert( size < std::numeric_limits<uint16_t>::max() );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, name, size );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockName );
MemWrite( &item->lockNameFat.id, m_id );
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
MemWrite( &item->lockNameFat.size, (uint16_t)size );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
private:
uint32_t m_id;
#ifdef TRACY_ON_DEMAND
std::atomic<uint32_t> m_lockCount;
std::atomic<bool> m_active;
#endif
};
template<class T>
class Lockable
{
public:
tracy_force_inline Lockable( const SourceLocationData* srcloc )
: m_ctx( srcloc )
{
}
Lockable( const Lockable& ) = delete;
Lockable& operator=( const Lockable& ) = delete;
tracy_force_inline void lock()
{
const auto runAfter = m_ctx.BeforeLock();
m_lockable.lock();
if( runAfter ) m_ctx.AfterLock();
}
tracy_force_inline void unlock()
{
m_lockable.unlock();
m_ctx.AfterUnlock();
}
tracy_force_inline bool try_lock()
{
const auto acquired = m_lockable.try_lock();
m_ctx.AfterTryLock( acquired );
return acquired;
}
tracy_force_inline void Mark( const SourceLocationData* srcloc )
{
m_ctx.Mark( srcloc );
}
tracy_force_inline void CustomName( const char* name, size_t size )
{
m_ctx.CustomName( name, size );
}
private:
T m_lockable;
LockableCtx m_ctx;
};
class SharedLockableCtx
{
public:
tracy_force_inline SharedLockableCtx( const SourceLocationData* srcloc )
: m_id( GetLockCounter().fetch_add( 1, std::memory_order_relaxed ) )
#ifdef TRACY_ON_DEMAND
, m_lockCount( 0 )
@@ -238,34 +24,45 @@ public:
{
assert( m_id != std::numeric_limits<uint32_t>::max() );
auto item = Profiler::QueueSerial();
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
MemWrite( &item->lockAnnounce.type, LockType::SharedLockable );
MemWrite( &item->lockAnnounce.type, LockType::Lockable );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
}
SharedLockableCtx( const SharedLockableCtx& ) = delete;
SharedLockableCtx& operator=( const SharedLockableCtx& ) = delete;
Lockable( const Lockable& ) = delete;
Lockable& operator=( const Lockable& ) = delete;
tracy_force_inline ~SharedLockableCtx()
~Lockable()
{
auto item = Profiler::QueueSerial();
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockTerminate );
MemWrite( &item->lockTerminate.id, m_id );
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
MemWrite( &item->lockTerminate.type, LockType::Lockable );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline bool BeforeLock()
tracy_force_inline void lock()
{
#ifdef TRACY_ON_DEMAND
bool queue = false;
@@ -277,112 +74,42 @@ public:
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return false;
#endif
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockWait );
MemWrite( &item->lockWait.thread, GetThreadHandle() );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
return true;
}
tracy_force_inline void AfterLock()
{
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
}
tracy_force_inline void AfterUnlock()
{
#ifdef TRACY_ON_DEMAND
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
if( !m_active.load( std::memory_order_relaxed ) ) return;
if( !GetProfiler().IsConnected() )
if( !queue )
{
m_active.store( false, std::memory_order_relaxed );
m_lockable.lock();
return;
}
#endif
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockRelease );
MemWrite( &item->lockRelease.thread, GetThreadHandle() );
MemWrite( &item->lockRelease.id, m_id );
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
}
tracy_force_inline void AfterTryLock( bool acquired )
{
#ifdef TRACY_ON_DEMAND
if( !acquired ) return;
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = GetProfiler().IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockWait );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::Lockable );
tail.store( magic + 1, std::memory_order_release );
}
if( !queue ) return;
#endif
if( acquired )
m_lockable.lock();
{
auto item = Profiler::QueueSerial();
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
}
}
tracy_force_inline bool BeforeLockShared()
tracy_force_inline void unlock()
{
#ifdef TRACY_ON_DEMAND
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = GetProfiler().IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return false;
#endif
m_lockable.unlock();
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockSharedWait );
MemWrite( &item->lockWait.thread, GetThreadHandle() );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
return true;
}
tracy_force_inline void AfterLockShared()
{
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockSharedObtain );
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
}
tracy_force_inline void AfterUnlockShared()
{
#ifdef TRACY_ON_DEMAND
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
if( !m_active.load( std::memory_order_relaxed ) ) return;
@@ -393,18 +120,22 @@ public:
}
#endif
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockSharedRelease );
MemWrite( &item->lockRelease.thread, GetThreadHandle() );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockRelease );
MemWrite( &item->lockRelease.id, m_id );
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline void AfterTryLockShared( bool acquired )
tracy_force_inline bool try_lock()
{
const auto ret = m_lockable.try_lock();
#ifdef TRACY_ON_DEMAND
if( !acquired ) return;
if( !ret ) return ret;
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
@@ -415,18 +146,22 @@ public:
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return;
if( !queue ) return ret;
#endif
if( acquired )
if( ret )
{
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockSharedObtain );
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
}
return ret;
}
tracy_force_inline void Mark( const SourceLocationData* srcloc )
@@ -442,31 +177,18 @@ public:
}
#endif
auto item = Profiler::QueueSerial();
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockMark );
MemWrite( &item->lockMark.thread, GetThreadHandle() );
MemWrite( &item->lockMark.id, m_id );
MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc );
Profiler::QueueSerialFinish();
}
tracy_force_inline void CustomName( const char* name, size_t size )
{
assert( size < std::numeric_limits<uint16_t>::max() );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, name, size );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockName );
MemWrite( &item->lockNameFat.id, m_id );
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
MemWrite( &item->lockNameFat.size, (uint16_t)size );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
tail.store( magic + 1, std::memory_order_release );
}
private:
T m_lockable;
uint32_t m_id;
#ifdef TRACY_ON_DEMAND
@@ -475,74 +197,296 @@ private:
#endif
};
template<class T>
class SharedLockable
{
public:
tracy_force_inline SharedLockable( const SourceLocationData* srcloc )
: m_ctx( srcloc )
: m_id( GetLockCounter().fetch_add( 1, std::memory_order_relaxed ) )
#ifdef TRACY_ON_DEMAND
, m_lockCount( 0 )
, m_active( false )
#endif
{
assert( m_id != std::numeric_limits<uint32_t>::max() );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
MemWrite( &item->lockAnnounce.type, LockType::SharedLockable );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
}
SharedLockable( const SharedLockable& ) = delete;
SharedLockable& operator=( const SharedLockable& ) = delete;
~SharedLockable()
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockTerminate );
MemWrite( &item->lockTerminate.id, m_id );
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
MemWrite( &item->lockTerminate.type, LockType::SharedLockable );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline void lock()
{
const auto runAfter = m_ctx.BeforeLock();
#ifdef TRACY_ON_DEMAND
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = GetProfiler().IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue )
{
m_lockable.lock();
return;
}
#endif
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockWait );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::SharedLockable );
tail.store( magic + 1, std::memory_order_release );
}
m_lockable.lock();
if( runAfter ) m_ctx.AfterLock();
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
}
tracy_force_inline void unlock()
{
m_lockable.unlock();
m_ctx.AfterUnlock();
#ifdef TRACY_ON_DEMAND
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
if( !m_active.load( std::memory_order_relaxed ) ) return;
if( !GetProfiler().IsConnected() )
{
m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockRelease );
MemWrite( &item->lockRelease.id, m_id );
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline bool try_lock()
{
const auto acquired = m_lockable.try_lock();
m_ctx.AfterTryLock( acquired );
return acquired;
const auto ret = m_lockable.try_lock();
#ifdef TRACY_ON_DEMAND
if( !ret ) return ret;
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = GetProfiler().IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return ret;
#endif
if( ret )
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
return ret;
}
tracy_force_inline void lock_shared()
{
const auto runAfter = m_ctx.BeforeLockShared();
#ifdef TRACY_ON_DEMAND
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = GetProfiler().IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue )
{
m_lockable.lock_shared();
return;
}
#endif
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockSharedWait );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::SharedLockable );
tail.store( magic + 1, std::memory_order_release );
}
m_lockable.lock_shared();
if( runAfter ) m_ctx.AfterLockShared();
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockSharedObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
}
tracy_force_inline void unlock_shared()
{
m_lockable.unlock_shared();
m_ctx.AfterUnlockShared();
#ifdef TRACY_ON_DEMAND
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
if( !m_active.load( std::memory_order_relaxed ) ) return;
if( !GetProfiler().IsConnected() )
{
m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockSharedRelease );
MemWrite( &item->lockRelease.id, m_id );
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline bool try_lock_shared()
{
const auto acquired = m_lockable.try_lock_shared();
m_ctx.AfterTryLockShared( acquired );
return acquired;
const auto ret = m_lockable.try_lock_shared();
#ifdef TRACY_ON_DEMAND
if( !ret ) return ret;
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = GetProfiler().IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return ret;
#endif
if( ret )
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockSharedObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
return ret;
}
tracy_force_inline void Mark( const SourceLocationData* srcloc )
{
m_ctx.Mark( srcloc );
}
#ifdef TRACY_ON_DEMAND
const auto active = m_active.load( std::memory_order_relaxed );
if( !active ) return;
const auto connected = GetProfiler().IsConnected();
if( !connected )
{
if( active ) m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
tracy_force_inline void CustomName( const char* name, size_t size )
{
m_ctx.CustomName( name, size );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockMark );
MemWrite( &item->lockMark.id, m_id );
MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc );
tail.store( magic + 1, std::memory_order_release );
}
private:
T m_lockable;
SharedLockableCtx m_ctx;
uint32_t m_id;
#ifdef TRACY_ON_DEMAND
std::atomic<uint32_t> m_lockCount;
std::atomic<bool> m_active;
#endif
};
}
};
#endif

40
client/TracyMesh.hpp Normal file
View File

@@ -0,0 +1,40 @@
#ifndef __TRACYMESH_HPP__
#define __TRACYMESH_HPP__
#include "TracyProfiler.hpp"
namespace tracy
{
namespace mesh
{
static void MeshTri( float x0, float y0, float x1, float y1, float x2, float y2 )
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::MeshTri );
MemWrite( &item->meshTri.x0, x0 );
MemWrite( &item->meshTri.y0, y0 );
MemWrite( &item->meshTri.x1, x1 );
MemWrite( &item->meshTri.y1, y1 );
MemWrite( &item->meshTri.x2, x2 );
MemWrite( &item->meshTri.y2, y2 );
tail.store( magic + 1, std::memory_order_release );
}
static void MeshEnd()
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::MeshEnd );
tail.store( magic + 1, std::memory_order_release );
}
}
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,141 +0,0 @@
#include <atomic>
#include <assert.h>
#include <errno.h>
#include <linux/perf_event.h>
#include <stdint.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>
#include "TracyDebug.hpp"
namespace tracy
{
class RingBuffer
{
public:
RingBuffer( unsigned int size, int fd, int id, int cpu = -1 )
: m_size( size )
, m_id( id )
, m_cpu( cpu )
, m_fd( fd )
{
const auto pageSize = uint32_t( getpagesize() );
assert( size >= pageSize );
assert( __builtin_popcount( size ) == 1 );
m_mapSize = size + pageSize;
auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
if( mapAddr == MAP_FAILED )
{
TracyDebug( "mmap failed: errno %i (%s)\n", errno, strerror( errno ) );
m_fd = 0;
m_metadata = nullptr;
close( fd );
return;
}
m_metadata = (perf_event_mmap_page*)mapAddr;
assert( m_metadata->data_offset == pageSize );
m_buffer = ((char*)mapAddr) + pageSize;
m_tail = m_metadata->data_tail;
}
~RingBuffer()
{
if( m_metadata ) munmap( m_metadata, m_mapSize );
if( m_fd ) close( m_fd );
}
RingBuffer( const RingBuffer& ) = delete;
RingBuffer& operator=( const RingBuffer& ) = delete;
RingBuffer( RingBuffer&& other )
{
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
m_metadata = nullptr;
m_fd = 0;
}
RingBuffer& operator=( RingBuffer&& other )
{
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
m_metadata = nullptr;
m_fd = 0;
return *this;
}
bool IsValid() const { return m_metadata != nullptr; }
int GetId() const { return m_id; }
int GetCpu() const { return m_cpu; }
void Enable()
{
ioctl( m_fd, PERF_EVENT_IOC_ENABLE, 0 );
}
void Read( void* dst, uint64_t offset, uint64_t cnt )
{
const auto size = m_size;
auto src = ( m_tail + offset ) % size;
if( src + cnt <= size )
{
memcpy( dst, m_buffer + src, cnt );
}
else
{
const auto s0 = size - src;
const auto buf = m_buffer;
memcpy( dst, buf + src, s0 );
memcpy( (char*)dst + s0, buf, cnt - s0 );
}
}
void Advance( uint64_t cnt )
{
m_tail += cnt;
StoreTail();
}
bool CheckTscCaps() const
{
return m_metadata->cap_user_time_zero;
}
int64_t ConvertTimeToTsc( int64_t timestamp ) const
{
if( !m_metadata->cap_user_time_zero ) return 0;
const auto time = timestamp - m_metadata->time_zero;
const auto quot = time / m_metadata->time_mult;
const auto rem = time % m_metadata->time_mult;
return ( quot << m_metadata->time_shift ) + ( rem << m_metadata->time_shift ) / m_metadata->time_mult;
}
uint64_t LoadHead() const
{
return std::atomic_load_explicit( (const volatile std::atomic<uint64_t>*)&m_metadata->data_head, std::memory_order_acquire );
}
uint64_t GetTail() const
{
return m_tail;
}
private:
void StoreTail()
{
std::atomic_store_explicit( (volatile std::atomic<uint64_t>*)&m_metadata->data_tail, m_tail, std::memory_order_release );
}
unsigned int m_size;
uint64_t m_tail;
char* m_buffer;
int m_id;
int m_cpu;
perf_event_mmap_page* m_metadata;
size_t m_mapSize;
int m_fd;
};
}

View File

@@ -1,7 +1,6 @@
#ifndef __TRACYSCOPED_HPP__
#define __TRACYSCOPED_HPP__
#include <limits>
#include <stdint.h>
#include <string.h>
@@ -16,83 +15,56 @@ namespace tracy
class ScopedZone
{
public:
ScopedZone( const ScopedZone& ) = delete;
ScopedZone( ScopedZone&& ) = delete;
ScopedZone& operator=( const ScopedZone& ) = delete;
ScopedZone& operator=( ScopedZone&& ) = delete;
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
, m_connectionId( GetProfiler().ConnectionId() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
TracyQueuePrepare( QueueType::ZoneBegin );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyQueueCommit( zoneBeginThread );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
, m_connectionId( GetProfiler().ConnectionId() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginCallstack );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
GetProfiler().SendCallstack( depth );
TracyQueuePrepare( QueueType::ZoneBeginCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyQueueCommit( zoneBeginThread );
}
tail.store( magic + 1, std::memory_order_release );
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
GetProfiler().SendCallstack( depth );
TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyQueueCommit( zoneBeginThread );
}
tracy_force_inline ~ScopedZone()
@@ -101,67 +73,57 @@ public:
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
TracyQueuePrepare( QueueType::ZoneEnd );
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
TracyQueueCommit( zoneEndThread );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneEnd.time, Profiler::GetTime( item->zoneEnd.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneEnd.cpu, cpu );
#endif
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline void Text( const char* txt, size_t size )
{
assert( size < std::numeric_limits<uint16_t>::max() );
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
auto ptr = (char*)tracy_malloc( size );
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
TracyQueuePrepare( QueueType::ZoneText );
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyQueueCommit( zoneTextFatThread );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneText );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline void Name( const char* txt, size_t size )
{
assert( size < std::numeric_limits<uint16_t>::max() );
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
auto ptr = (char*)tracy_malloc( size );
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
TracyQueuePrepare( QueueType::ZoneName );
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyQueueCommit( zoneTextFatThread );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneName );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline void Color( uint32_t color )
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
TracyQueuePrepare( QueueType::ZoneColor );
MemWrite( &item->zoneColor.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->zoneColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->zoneColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
TracyQueueCommit( zoneColorThread );
}
tracy_force_inline void Value( uint64_t value )
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
TracyQueuePrepare( QueueType::ZoneValue );
MemWrite( &item->zoneValue.value, value );
TracyQueueCommit( zoneValueThread );
}
tracy_force_inline bool IsActive() const { return m_active; }
private:
const bool m_active;

View File

@@ -1,50 +0,0 @@
#ifndef __TRACYSTRINGHELPERS_HPP__
#define __TRACYSTRINGHELPERS_HPP__
#include <assert.h>
#include <string.h>
#include "../common/TracyAlloc.hpp"
namespace tracy
{
static inline char* CopyString( const char* src, size_t sz )
{
assert( strlen( src ) == sz );
auto dst = (char*)tracy_malloc( sz + 1 );
memcpy( dst, src, sz );
dst[sz] = '\0';
return dst;
}
static inline char* CopyString( const char* src )
{
const auto sz = strlen( src );
auto dst = (char*)tracy_malloc( sz + 1 );
memcpy( dst, src, sz );
dst[sz] = '\0';
return dst;
}
static inline char* CopyStringFast( const char* src, size_t sz )
{
assert( strlen( src ) == sz );
auto dst = (char*)tracy_malloc_fast( sz + 1 );
memcpy( dst, src, sz );
dst[sz] = '\0';
return dst;
}
static inline char* CopyStringFast( const char* src )
{
const auto sz = strlen( src );
auto dst = (char*)tracy_malloc_fast( sz + 1 );
memcpy( dst, src, sz );
dst[sz] = '\0';
return dst;
}
}
#endif

View File

@@ -2,23 +2,21 @@
#ifdef TRACY_HAS_SYSTIME
# if defined _WIN32
# if defined _WIN32 || defined __CYGWIN__
# include <windows.h>
# elif defined __linux__
# include <assert.h>
# include <stdio.h>
# include <inttypes.h>
# elif defined __APPLE__
# include <mach/mach_host.h>
# include <mach/host_info.h>
# elif defined BSD
# include <sys/types.h>
# include <sys/sysctl.h>
# endif
namespace tracy
{
# if defined _WIN32
# if defined _WIN32 || defined __CYGWIN__
static inline uint64_t ConvertTime( const FILETIME& t )
{
@@ -47,12 +45,9 @@ void SysTime::ReadTimes()
FILE* f = fopen( "/proc/stat", "r" );
if( f )
{
int read = fscanf( f, "cpu %" PRIu64 " %" PRIu64 " %" PRIu64" %" PRIu64, &user, &nice, &system, &idle );
fscanf( f, "cpu %" PRIu64 " %" PRIu64 " %" PRIu64" %" PRIu64, &user, &nice, &system, &idle );
fclose( f );
if (read == 4)
{
used = user + nice + system;
}
used = user + nice + system;
}
}
@@ -62,22 +57,11 @@ void SysTime::ReadTimes()
{
host_cpu_load_info_data_t info;
mach_msg_type_number_t cnt = HOST_CPU_LOAD_INFO_COUNT;
host_statistics( mach_host_self(), HOST_CPU_LOAD_INFO, reinterpret_cast<host_info_t>( &info ), &cnt );
host_statistics( mach_host_self(), HOST_CPU_LOAD_INFO, reinterpret_cast<host_info_t>( &info ), &cnt );
used = info.cpu_ticks[CPU_STATE_USER] + info.cpu_ticks[CPU_STATE_NICE] + info.cpu_ticks[CPU_STATE_SYSTEM];
idle = info.cpu_ticks[CPU_STATE_IDLE];
}
# elif defined BSD
void SysTime::ReadTimes()
{
u_long data[5];
size_t sz = sizeof( data );
sysctlbyname( "kern.cp_time", &data, &sz, nullptr, 0 );
used = data[0] + data[1] + data[2] + data[3];
idle = data[4];
}
#endif
SysTime::SysTime()
@@ -95,9 +79,9 @@ float SysTime::Get()
const auto diffIdle = idle - oldIdle;
const auto diffUsed = used - oldUsed;
#if defined _WIN32
#if defined _WIN32 || defined __CYGWIN__
return diffUsed == 0 ? -1 : ( diffUsed - diffIdle ) * 100.f / diffUsed;
#elif defined __linux__ || defined __APPLE__ || defined BSD
#elif defined __linux__ || defined __APPLE__
const auto total = diffUsed + diffIdle;
return total == 0 ? -1 : diffUsed * 100.f / total;
#endif

View File

@@ -1,13 +1,7 @@
#ifndef __TRACYSYSTIME_HPP__
#define __TRACYSYSTIME_HPP__
#if defined _WIN32 || defined __linux__ || defined __APPLE__
# define TRACY_HAS_SYSTIME
#else
# include <sys/param.h>
#endif
#ifdef BSD
#if defined _WIN32 || defined __CYGWIN__ || defined __linux__ || defined __APPLE__
# define TRACY_HAS_SYSTIME
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,28 +0,0 @@
#ifndef __TRACYSYSTRACE_HPP__
#define __TRACYSYSTRACE_HPP__
#if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __linux__ )
# include "../common/TracyUwp.hpp"
# ifndef TRACY_UWP
# define TRACY_HAS_SYSTEM_TRACING
# endif
#endif
#ifdef TRACY_HAS_SYSTEM_TRACING
#include <stdint.h>
namespace tracy
{
bool SysTraceStart( int64_t& samplingPeriod );
void SysTraceStop();
void SysTraceWorker( void* ptr );
void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name );
}
#endif
#endif

View File

@@ -1,36 +1,16 @@
#ifndef __TRACYTHREAD_HPP__
#define __TRACYTHREAD_HPP__
#if defined _WIN32
#ifdef _MSC_VER
# include <windows.h>
#else
# include <pthread.h>
#endif
#ifdef TRACY_MANUAL_LIFETIME
# include "tracy_rpmalloc.hpp"
#endif
namespace tracy
{
#ifdef TRACY_MANUAL_LIFETIME
extern thread_local bool RpThreadInitDone;
#endif
class ThreadExitHandler
{
public:
~ThreadExitHandler()
{
#ifdef TRACY_MANUAL_LIFETIME
rpmalloc_thread_finalize();
RpThreadInitDone = false;
#endif
}
};
#if defined _WIN32
#ifdef _MSC_VER
class Thread
{

View File

@@ -1,148 +0,0 @@
/*
Copyright (c) 2020 Erik Rigtorp <erik@rigtorp.se>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#pragma once
#include <atomic>
#include <cassert>
#include <cstddef>
#include <stdexcept>
#include <type_traits> // std::enable_if, std::is_*_constructible
#include "../common/TracyAlloc.hpp"
#if defined (_MSC_VER)
#pragma warning(push)
#pragma warning(disable:4324)
#endif
namespace tracy {
template <typename T> class SPSCQueue {
public:
explicit SPSCQueue(const size_t capacity)
: capacity_(capacity) {
capacity_++; // Needs one slack element
slots_ = (T*)tracy_malloc(sizeof(T) * (capacity_ + 2 * kPadding));
static_assert(alignof(SPSCQueue<T>) == kCacheLineSize, "");
static_assert(sizeof(SPSCQueue<T>) >= 3 * kCacheLineSize, "");
assert(reinterpret_cast<char *>(&readIdx_) -
reinterpret_cast<char *>(&writeIdx_) >=
static_cast<std::ptrdiff_t>(kCacheLineSize));
}
~SPSCQueue() {
while (front()) {
pop();
}
tracy_free(slots_);
}
// non-copyable and non-movable
SPSCQueue(const SPSCQueue &) = delete;
SPSCQueue &operator=(const SPSCQueue &) = delete;
template <typename... Args>
void emplace(Args &&...args) noexcept(
std::is_nothrow_constructible<T, Args &&...>::value) {
static_assert(std::is_constructible<T, Args &&...>::value,
"T must be constructible with Args&&...");
auto const writeIdx = writeIdx_.load(std::memory_order_relaxed);
auto nextWriteIdx = writeIdx + 1;
if (nextWriteIdx == capacity_) {
nextWriteIdx = 0;
}
while (nextWriteIdx == readIdxCache_) {
readIdxCache_ = readIdx_.load(std::memory_order_acquire);
}
new (&slots_[writeIdx + kPadding]) T(std::forward<Args>(args)...);
writeIdx_.store(nextWriteIdx, std::memory_order_release);
}
T *front() noexcept {
auto const readIdx = readIdx_.load(std::memory_order_relaxed);
if (readIdx == writeIdxCache_) {
writeIdxCache_ = writeIdx_.load(std::memory_order_acquire);
if (writeIdxCache_ == readIdx) {
return nullptr;
}
}
return &slots_[readIdx + kPadding];
}
void pop() noexcept {
static_assert(std::is_nothrow_destructible<T>::value,
"T must be nothrow destructible");
auto const readIdx = readIdx_.load(std::memory_order_relaxed);
assert(writeIdx_.load(std::memory_order_acquire) != readIdx);
slots_[readIdx + kPadding].~T();
auto nextReadIdx = readIdx + 1;
if (nextReadIdx == capacity_) {
nextReadIdx = 0;
}
readIdx_.store(nextReadIdx, std::memory_order_release);
}
size_t size() const noexcept {
std::ptrdiff_t diff = writeIdx_.load(std::memory_order_acquire) -
readIdx_.load(std::memory_order_acquire);
if (diff < 0) {
diff += capacity_;
}
return static_cast<size_t>(diff);
}
bool empty() const noexcept {
return writeIdx_.load(std::memory_order_acquire) ==
readIdx_.load(std::memory_order_acquire);
}
size_t capacity() const noexcept { return capacity_ - 1; }
private:
static constexpr size_t kCacheLineSize = 64;
// Padding to avoid false sharing between slots_ and adjacent allocations
static constexpr size_t kPadding = (kCacheLineSize - 1) / sizeof(T) + 1;
private:
size_t capacity_;
T *slots_;
// Align to cache line size in order to avoid false sharing
// readIdxCache_ and writeIdxCache_ is used to reduce the amount of cache
// coherency traffic
alignas(kCacheLineSize) std::atomic<size_t> writeIdx_ = {0};
alignas(kCacheLineSize) size_t readIdxCache_ = 0;
alignas(kCacheLineSize) std::atomic<size_t> readIdx_ = {0};
alignas(kCacheLineSize) size_t writeIdxCache_ = 0;
// Padding to avoid adjacent allocations to share cache line with
// writeIdxCache_
char padding_[kCacheLineSize - sizeof(SPSCQueue<T>::writeIdxCache_)];
};
} // namespace rigtorp
#if defined (_MSC_VER)
#pragma warning(pop)
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,9 +1,9 @@
/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson
/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson / Rampant Pixels
*
* This library provides a cross-platform lock free thread caching malloc implementation in C11.
* The latest source code is always available at
*
* https://github.com/mjansson/rpmalloc
* https://github.com/rampantpixels/rpmalloc
*
* This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
*
@@ -12,113 +12,53 @@
#pragma once
#include <stddef.h>
#include "../common/TracyApi.h"
namespace tracy
{
#if defined(__clang__) || defined(__GNUC__)
# define RPMALLOC_EXPORT __attribute__((visibility("default")))
# define RPMALLOC_ALLOCATOR
# define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
# if defined(__clang_major__) && (__clang_major__ < 4)
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
# else
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) __attribute__((alloc_size(count, size)))
# endif
# define RPMALLOC_ATTRIBUTE __attribute__((__malloc__))
# define RPMALLOC_RESTRICT
# define RPMALLOC_CDECL
#elif defined(_MSC_VER)
# define RPMALLOC_EXPORT
# define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
# define RPMALLOC_ATTRIB_MALLOC
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
# define RPMALLOC_ATTRIBUTE
# define RPMALLOC_RESTRICT __declspec(restrict)
# define RPMALLOC_CDECL __cdecl
#else
# define RPMALLOC_EXPORT
# define RPMALLOC_ALLOCATOR
# define RPMALLOC_ATTRIB_MALLOC
# define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
# define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
# define RPMALLOC_ATTRIBUTE
# define RPMALLOC_RESTRICT
# define RPMALLOC_CDECL
#endif
//! Define RPMALLOC_CONFIGURABLE to enable configuring sizes
#ifndef RPMALLOC_CONFIGURABLE
#define RPMALLOC_CONFIGURABLE 0
#endif
//! Flag to rpaligned_realloc to not preserve content in reallocation
#define RPMALLOC_NO_PRESERVE 1
typedef struct rpmalloc_global_statistics_t {
//! Current amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
//! Current amount of virtual memory mapped (only if ENABLE_STATISTICS=1)
size_t mapped;
//! Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
size_t mapped_peak;
//! Current amount of memory in global caches for small and medium sizes (<32KiB)
//! Current amount of memory in global caches for small and medium sizes (<64KiB)
size_t cached;
//! Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
size_t huge_alloc;
//! Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
size_t huge_alloc_peak;
//! Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1)
//! Total amount of memory mapped (only if ENABLE_STATISTICS=1)
size_t mapped_total;
//! Total amount of memory unmapped since initialization (only if ENABLE_STATISTICS=1)
//! Total amount of memory unmapped (only if ENABLE_STATISTICS=1)
size_t unmapped_total;
} rpmalloc_global_statistics_t;
typedef struct rpmalloc_thread_statistics_t {
//! Current number of bytes available in thread size class caches for small and medium sizes (<32KiB)
//! Current number of bytes available for allocation from active spans
size_t active;
//! Current number of bytes available in thread size class caches
size_t sizecache;
//! Current number of bytes available in thread span caches for small and medium sizes (<32KiB)
//! Current number of bytes available in thread span caches
size_t spancache;
//! Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1)
//! Current number of bytes in pending deferred deallocations
size_t deferred;
//! Total number of bytes transitioned from thread cache to global cache
size_t thread_to_global;
//! Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1)
//! Total number of bytes transitioned from global cache to thread cache
size_t global_to_thread;
//! Per span count statistics (only if ENABLE_STATISTICS=1)
struct {
//! Currently used number of spans
size_t current;
//! High water mark of spans used
size_t peak;
//! Number of spans transitioned to global cache
size_t to_global;
//! Number of spans transitioned from global cache
size_t from_global;
//! Number of spans transitioned to thread cache
size_t to_cache;
//! Number of spans transitioned from thread cache
size_t from_cache;
//! Number of spans transitioned to reserved state
size_t to_reserved;
//! Number of spans transitioned from reserved state
size_t from_reserved;
//! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
size_t map_calls;
} span_use[32];
//! Per size class statistics (only if ENABLE_STATISTICS=1)
struct {
//! Current number of allocations
size_t alloc_current;
//! Peak number of allocations
size_t alloc_peak;
//! Total number of allocations
size_t alloc_total;
//! Total number of frees
size_t free_total;
//! Number of spans transitioned to cache
size_t spans_to_cache;
//! Number of spans transitioned from cache
size_t spans_from_cache;
//! Number of spans transitioned from reserved state
size_t spans_from_reserved;
//! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
size_t map_calls;
} size_use[128];
} rpmalloc_thread_statistics_t;
typedef struct rpmalloc_config_t {
@@ -129,133 +69,85 @@ typedef struct rpmalloc_config_t {
// actual start of the memory region due to this alignment. The alignment offset
// will be passed to the memory unmap function. The alignment offset MUST NOT be
// larger than 65535 (storable in an uint16_t), if it is you must use natural
// alignment to shift it into 16 bits. If you set a memory_map function, you
// must also set a memory_unmap function or else the default implementation will
// be used for both.
// alignment to shift it into 16 bits.
void* (*memory_map)(size_t size, size_t* offset);
//! Unmap the memory pages starting at address and spanning the given number of bytes.
// If release is set to non-zero, the unmap is for an entire span range as returned by
// a previous call to memory_map and that the entire range should be released. The
// release argument holds the size of the entire span range. If release is set to 0,
// the unmap is a partial decommit of a subset of the mapped memory range.
// If you set a memory_unmap function, you must also set a memory_map function or
// else the default implementation will be used for both.
void (*memory_unmap)(void* address, size_t size, size_t offset, size_t release);
//! Size of memory pages. The page size MUST be a power of two. All memory mapping
// If release is set to 1, the unmap is for an entire span range as returned by
// a previous call to memory_map and that the entire range should be released.
// If release is set to 0, the unmap is a partial decommit of a subset of the mapped
// memory range.
void (*memory_unmap)(void* address, size_t size, size_t offset, int release);
//! Size of memory pages. The page size MUST be a power of two in [512,16384] range
// (2^9 to 2^14) unless 0 - set to 0 to use system page size. All memory mapping
// requests to memory_map will be made with size set to a multiple of the page size.
// Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system page size is used.
size_t page_size;
//! Size of a span of memory blocks. MUST be a power of two, and in [4096,262144]
// range (unless 0 - set to 0 to use the default span size). Used if RPMALLOC_CONFIGURABLE
// is defined to 1.
//! Size of a span of memory pages. MUST be a multiple of page size, and in [4096,262144]
// range (unless 0 - set to 0 to use the default span size).
size_t span_size;
//! Number of spans to map at each request to map new virtual memory blocks. This can
// be used to minimize the system call overhead at the cost of virtual memory address
// space. The extra mapped pages will not be written until actually used, so physical
// committed memory should not be affected in the default implementation. Will be
// aligned to a multiple of spans that match memory page size in case of huge pages.
// committed memory should not be affected in the default implementation.
size_t span_map_count;
//! Enable use of large/huge pages. If this flag is set to non-zero and page size is
// zero, the allocator will try to enable huge pages and auto detect the configuration.
// If this is set to non-zero and page_size is also non-zero, the allocator will
// assume huge pages have been configured and enabled prior to initializing the
// allocator.
// For Windows, see https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
// For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
int enable_huge_pages;
//! Debug callback if memory guards are enabled. Called if a memory overwrite is detected
void (*memory_overwrite)(void* address);
} rpmalloc_config_t;
//! Initialize allocator with default configuration
TRACY_API int
extern int
rpmalloc_initialize(void);
//! Initialize allocator with given configuration
RPMALLOC_EXPORT int
extern int
rpmalloc_initialize_config(const rpmalloc_config_t* config);
//! Get allocator configuration
RPMALLOC_EXPORT const rpmalloc_config_t*
extern const rpmalloc_config_t*
rpmalloc_config(void);
//! Finalize allocator
TRACY_API void
extern void
rpmalloc_finalize(void);
//! Initialize allocator for calling thread
TRACY_API void
void
rpmalloc_thread_initialize(void);
//! Finalize allocator for calling thread
TRACY_API void
extern void
rpmalloc_thread_finalize(void);
//! Perform deferred deallocations pending for the calling thread heap
RPMALLOC_EXPORT void
extern void
rpmalloc_thread_collect(void);
//! Query if allocator is initialized for calling thread
RPMALLOC_EXPORT int
extern int
rpmalloc_is_thread_initialized(void);
//! Get per-thread statistics
RPMALLOC_EXPORT void
extern void
rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats);
//! Get global statistics
RPMALLOC_EXPORT void
extern void
rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats);
//! Dump all statistics in human readable format to file (should be a FILE*)
RPMALLOC_EXPORT void
rpmalloc_dump_statistics(void* file);
TRACY_API RPMALLOC_RESTRICT void*
rpmalloc(size_t size) RPMALLOC_ATTRIBUTE;
//! Allocate a memory block of at least the given size
TRACY_API RPMALLOC_ALLOCATOR void*
rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
//! Free the given memory block
TRACY_API void
rpfree(void* ptr);
//! Allocate a memory block of at least the given size and zero initialize it
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
extern RPMALLOC_RESTRICT void*
rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIBUTE;
//! Reallocate the given block to at least the given size
TRACY_API RPMALLOC_ALLOCATOR void*
rprealloc(void* ptr, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
extern void*
rprealloc(void* ptr, size_t size);
//! Reallocate the given block to at least the given size and alignment,
// with optional control flags (see RPMALLOC_NO_PRESERVE).
// Alignment must be a power of two and a multiple of sizeof(void*),
// and should ideally be less than memory page size. A caveat of rpmalloc
// internals is that this must also be strictly less than the span size (default 64KiB)
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3);
extern void*
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags);
//! Allocate a memory block of at least the given size and alignment.
// Alignment must be a power of two and a multiple of sizeof(void*),
// and should ideally be less than memory page size. A caveat of rpmalloc
// internals is that this must also be strictly less than the span size (default 64KiB)
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
extern RPMALLOC_RESTRICT void*
rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
//! Allocate a memory block of at least the given size and alignment.
// Alignment must be a power of two and a multiple of sizeof(void*),
// and should ideally be less than memory page size. A caveat of rpmalloc
// internals is that this must also be strictly less than the span size (default 64KiB)
RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void*
rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
extern RPMALLOC_RESTRICT void*
rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
//! Allocate a memory block of at least the given size and alignment.
// Alignment must be a power of two and a multiple of sizeof(void*),
// and should ideally be less than memory page size. A caveat of rpmalloc
// internals is that this must also be strictly less than the span size (default 64KiB)
RPMALLOC_EXPORT int
extern int
rpposix_memalign(void **memptr, size_t alignment, size_t size);
//! Query the usable size of the given memory block (from given pointer to the end of block)
RPMALLOC_EXPORT size_t
extern size_t
rpmalloc_usable_size(void* ptr);
}

View File

@@ -1,33 +1,17 @@
#ifndef __TRACYALLOC_HPP__
#define __TRACYALLOC_HPP__
#include <stdlib.h>
#include <cstdlib>
#ifdef TRACY_ENABLE
# include "TracyApi.h"
# include "TracyForceInline.hpp"
# include "../client/tracy_rpmalloc.hpp"
#endif
namespace tracy
{
#ifdef TRACY_ENABLE
TRACY_API void InitRpmalloc();
#endif
static inline void* tracy_malloc( size_t size )
{
#ifdef TRACY_ENABLE
InitRpmalloc();
return rpmalloc( size );
#else
return malloc( size );
#endif
}
static inline void* tracy_malloc_fast( size_t size )
{
#ifdef TRACY_ENABLE
return rpmalloc( size );
#else
@@ -38,32 +22,12 @@ static inline void* tracy_malloc_fast( size_t size )
static inline void tracy_free( void* ptr )
{
#ifdef TRACY_ENABLE
InitRpmalloc();
rpfree( ptr );
#else
free( ptr );
#endif
}
static inline void tracy_free_fast( void* ptr )
{
#ifdef TRACY_ENABLE
rpfree( ptr );
#else
free( ptr );
#endif
}
static inline void* tracy_realloc( void* ptr, size_t size )
{
#ifdef TRACY_ENABLE
InitRpmalloc();
return rprealloc( ptr, size );
#else
return realloc( ptr, size );
#endif
}
}
#endif

View File

@@ -1,16 +1,14 @@
#ifndef __TRACYAPI_H__
#define __TRACYAPI_H__
#if defined _WIN32
# if defined TRACY_EXPORTS
# define TRACY_API __declspec(dllexport)
# elif defined TRACY_IMPORTS
# define TRACY_API __declspec(dllimport)
# else
# define TRACY_API
# endif
#else
# define TRACY_API __attribute__((visibility("default")))
#endif
#endif // __TRACYAPI_H__
#ifndef __TRACYAPI_H__
#define __TRACYAPI_H__
#ifdef _WIN32
# if defined TRACY_IMPORTS
# define TRACY_API __declspec(dllimport)
# else
# define TRACY_API __declspec(dllexport)
# endif
#else
# define TRACY_API __attribute__((visibility("default")))
#endif
#endif // __TRACYAPI_H__

View File

@@ -10,6 +10,15 @@ namespace tracy
using TracyMutex = std::shared_mutex;
}
#elif defined __CYGWIN__
#include "tracy_benaphore.h"
namespace tracy
{
using TracyMutex = NonRecursiveBenaphore;
}
#else
#include <mutex>

View File

@@ -4,18 +4,18 @@
#include <limits>
#include <stdint.h>
#include "../common/tracy_lz4.hpp"
namespace tracy
{
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 57 };
enum : uint16_t { BroadcastVersion = 2 };
enum : uint32_t { ProtocolVersion = 15 };
enum : uint32_t { BroadcastVersion = 0 };
using lz4sz_t = uint32_t;
enum { TargetFrameSize = 256 * 1024 };
enum { LZ4Size = Lz4CompressBound( TargetFrameSize ) };
enum { LZ4Size = LZ4_COMPRESSBOUND( TargetFrameSize ) };
static_assert( LZ4Size <= std::numeric_limits<lz4sz_t>::max(), "LZ4Size greater than lz4sz_t" );
static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" );
@@ -36,7 +36,6 @@ enum { WelcomeMessageHostInfoSize = 1024 };
#pragma pack( 1 )
// Must increase left query space after handling!
enum ServerQuery : uint8_t
{
ServerQueryTerminate,
@@ -44,53 +43,20 @@ enum ServerQuery : uint8_t
ServerQueryThreadString,
ServerQuerySourceLocation,
ServerQueryPlotName,
ServerQueryFrameName,
ServerQueryParameter,
ServerQueryFiberName,
// Items above are high priority. Split order must be preserved. See IsQueryPrio().
ServerQueryDisconnect,
ServerQueryCallstackFrame,
ServerQueryExternalName,
ServerQuerySymbol,
ServerQuerySymbolCode,
ServerQueryCodeLocation,
ServerQuerySourceCode,
ServerQueryDataTransfer,
ServerQueryDataTransferPart
ServerQueryFrameName,
ServerQueryDisconnect
};
struct ServerQueryPacket
{
ServerQuery type;
uint64_t ptr;
uint32_t extra;
};
enum { ServerQueryPacketSize = sizeof( ServerQueryPacket ) };
enum CpuArchitecture : uint8_t
{
CpuArchUnknown,
CpuArchX86,
CpuArchX64,
CpuArchArm32,
CpuArchArm64
};
struct WelcomeFlag
{
enum _t : uint8_t
{
OnDemand = 1 << 0,
IsApple = 1 << 1,
CodeTransfer = 1 << 2,
CombineSamples = 1 << 3,
IdentifySamples = 1 << 4,
};
};
struct WelcomeMessage
{
double timerMul;
@@ -99,13 +65,8 @@ struct WelcomeMessage
uint64_t delay;
uint64_t resolution;
uint64_t epoch;
uint64_t exectime;
uint64_t pid;
int64_t samplingPeriod;
uint8_t flags;
uint8_t cpuArch;
char cpuManufacturer[12];
uint32_t cpuId;
uint8_t onDemand;
uint8_t isApple;
char programName[WelcomeMessageProgramNameSize];
char hostInfo[WelcomeMessageHostInfoSize];
};
@@ -124,10 +85,9 @@ enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) };
struct BroadcastMessage
{
uint16_t broadcastVersion;
uint16_t listenPort;
uint32_t broadcastVersion;
uint32_t protocolVersion;
int32_t activeTime; // in seconds
uint32_t activeTime; // in seconds
char programName[WelcomeMessageProgramNameSize];
};

View File

@@ -1,7 +1,6 @@
#ifndef __TRACYQUEUE_HPP__
#define __TRACYQUEUE_HPP__
#include <stddef.h>
#include <stdint.h>
namespace tracy
@@ -13,109 +12,61 @@ enum class QueueType : uint8_t
ZoneName,
Message,
MessageColor,
MessageCallstack,
MessageColorCallstack,
MessageAppInfo,
ZoneBeginAllocSrcLoc,
ZoneBeginAllocSrcLocCallstack,
CallstackSerial,
CallstackMemory,
Callstack,
CallstackAlloc,
CallstackSample,
CallstackSampleContextSwitch,
FrameImage,
ZoneBegin,
ZoneBeginCallstack,
ZoneEnd,
LockWait,
LockObtain,
LockRelease,
LockSharedWait,
LockSharedObtain,
LockSharedRelease,
LockName,
MemAlloc,
MemAllocNamed,
MemFree,
MemFreeNamed,
MemAllocCallstack,
MemAllocCallstackNamed,
MemFreeCallstack,
MemFreeCallstackNamed,
GpuZoneBegin,
GpuZoneBeginCallstack,
GpuZoneBeginAllocSrcLoc,
GpuZoneBeginAllocSrcLocCallstack,
GpuZoneEnd,
GpuZoneBeginSerial,
GpuZoneBeginCallstackSerial,
GpuZoneBeginAllocSrcLocSerial,
GpuZoneBeginAllocSrcLocCallstackSerial,
GpuZoneEndSerial,
PlotData,
ContextSwitch,
ThreadWakeup,
GpuTime,
GpuContextName,
CallstackFrameSize,
SymbolInformation,
CodeInformation,
ExternalNameMetadata,
SymbolCodeMetadata,
FiberEnter,
FiberLeave,
Terminate,
KeepAlive,
ThreadContext,
GpuCalibration,
Crash,
CrashReport,
ZoneBegin,
ZoneBeginCallstack,
ZoneEnd,
ZoneValidation,
ZoneColor,
ZoneValue,
FrameMarkMsg,
FrameMarkMsgStart,
FrameMarkMsgEnd,
SourceLocation,
LockAnnounce,
LockTerminate,
LockWait,
LockObtain,
LockRelease,
LockSharedWait,
LockSharedObtain,
LockSharedRelease,
LockMark,
PlotData,
MessageLiteral,
MessageLiteralColor,
MessageLiteralCallstack,
MessageLiteralColorCallstack,
GpuNewContext,
GpuZoneBegin,
GpuZoneBeginCallstack,
GpuZoneEnd,
GpuTime,
MemAlloc,
MemFree,
MemAllocCallstack,
MemFreeCallstack,
CallstackFrameSize,
CallstackFrame,
SysTimeReport,
TidToPid,
HwSampleCpuCycle,
HwSampleInstructionRetired,
HwSampleCacheReference,
HwSampleCacheMiss,
HwSampleBranchRetired,
HwSampleBranchMiss,
PlotConfig,
ParamSetup,
AckServerQueryNoop,
AckSourceCodeNotAvailable,
AckSymbolCodeNotAvailable,
CpuTopology,
SingleStringData,
SecondStringData,
MemNamePayload,
MeshTri,
MeshEnd,
StringData,
ThreadName,
CustomStringData,
PlotName,
SourceLocationPayload,
CallstackPayload,
CallstackAllocPayload,
FrameName,
FrameImageData,
ExternalName,
ExternalThreadName,
SymbolCode,
SourceCode,
FiberName,
NUM_TYPES
};
@@ -123,32 +74,20 @@ enum class QueueType : uint8_t
struct QueueThreadContext
{
uint32_t thread;
uint64_t thread;
};
struct QueueZoneBeginLean
struct QueueZoneBegin
{
int64_t time;
};
struct QueueZoneBegin : public QueueZoneBeginLean
{
uint64_t srcloc; // ptr
};
struct QueueZoneBeginThread : public QueueZoneBegin
{
uint32_t thread;
uint32_t cpu;
};
struct QueueZoneEnd
{
int64_t time;
};
struct QueueZoneEndThread : public QueueZoneEnd
{
uint32_t thread;
uint32_t cpu;
};
struct QueueZoneValidation
@@ -156,33 +95,6 @@ struct QueueZoneValidation
uint32_t id;
};
struct QueueZoneValidationThread : public QueueZoneValidation
{
uint32_t thread;
};
struct QueueZoneColor
{
uint8_t r;
uint8_t g;
uint8_t b;
};
struct QueueZoneColorThread : public QueueZoneColor
{
uint32_t thread;
};
struct QueueZoneValue
{
uint64_t value;
};
struct QueueZoneValueThread : public QueueZoneValue
{
uint32_t thread;
};
struct QueueStringTransfer
{
uint64_t ptr;
@@ -196,17 +108,13 @@ struct QueueFrameMark
struct QueueFrameImage
{
uint32_t frame;
uint64_t image; // ptr
uint64_t frame;
uint16_t w;
uint16_t h;
uint8_t flip;
};
struct QueueFrameImageFat : public QueueFrameImage
{
uint64_t image; // ptr
};
struct QueueSourceLocation
{
uint64_t name;
@@ -218,15 +126,9 @@ struct QueueSourceLocation
uint8_t b;
};
struct QueueZoneTextFat
struct QueueZoneText
{
uint64_t text; // ptr
uint16_t size;
};
struct QueueZoneTextFatThread : public QueueZoneTextFat
{
uint32_t thread;
};
enum class LockType : uint8_t
@@ -243,64 +145,38 @@ struct QueueLockAnnounce
LockType type;
};
struct QueueFiberEnter
{
int64_t time;
uint64_t fiber; // ptr
uint32_t thread;
};
struct QueueFiberLeave
{
int64_t time;
uint32_t thread;
};
struct QueueLockTerminate
{
uint32_t id;
int64_t time;
LockType type;
};
struct QueueLockWait
{
uint32_t thread;
uint32_t id;
int64_t time;
LockType type;
};
struct QueueLockObtain
{
uint32_t thread;
uint32_t id;
int64_t time;
};
struct QueueLockRelease
{
uint32_t thread;
uint32_t id;
int64_t time;
};
struct QueueLockMark
{
uint32_t thread;
uint32_t id;
uint64_t srcloc; // ptr
};
struct QueueLockName
{
uint32_t id;
};
struct QueueLockNameFat : public QueueLockName
{
uint64_t name; // ptr
uint16_t size;
};
enum class PlotDataType : uint8_t
{
Float,
@@ -324,6 +200,7 @@ struct QueuePlotData
struct QueueMessage
{
int64_t time;
uint64_t text; // ptr
};
struct QueueMessageColor : public QueueMessage
@@ -333,92 +210,28 @@ struct QueueMessageColor : public QueueMessage
uint8_t b;
};
struct QueueMessageLiteral : public QueueMessage
{
uint64_t text; // ptr
};
struct QueueMessageLiteralThread : public QueueMessageLiteral
{
uint32_t thread;
};
struct QueueMessageColorLiteral : public QueueMessageColor
{
uint64_t text; // ptr
};
struct QueueMessageColorLiteralThread : public QueueMessageColorLiteral
{
uint32_t thread;
};
struct QueueMessageFat : public QueueMessage
{
uint64_t text; // ptr
uint16_t size;
};
struct QueueMessageFatThread : public QueueMessageFat
{
uint32_t thread;
};
struct QueueMessageColorFat : public QueueMessageColor
{
uint64_t text; // ptr
uint16_t size;
};
struct QueueMessageColorFatThread : public QueueMessageColorFat
{
uint32_t thread;
};
// Don't change order, only add new entries at the end, this is also used on trace dumps!
enum class GpuContextType : uint8_t
{
Invalid,
OpenGl,
Vulkan,
OpenCL,
Direct3D12,
Direct3D11
};
enum GpuContextFlags : uint8_t
{
GpuContextCalibration = 1 << 0
};
struct QueueGpuNewContext
{
int64_t cpuTime;
int64_t gpuTime;
uint32_t thread;
uint64_t thread;
float period;
uint8_t context;
GpuContextFlags flags;
GpuContextType type;
uint8_t accuracyBits;
};
struct QueueGpuZoneBeginLean
struct QueueGpuZoneBegin
{
int64_t cpuTime;
uint32_t thread;
uint64_t srcloc;
uint64_t thread;
uint16_t queryId;
uint8_t context;
};
struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean
{
uint64_t srcloc;
};
struct QueueGpuZoneEnd
{
int64_t cpuTime;
uint32_t thread;
uint16_t queryId;
uint8_t context;
};
@@ -430,34 +243,10 @@ struct QueueGpuTime
uint8_t context;
};
struct QueueGpuCalibration
{
int64_t gpuTime;
int64_t cpuTime;
int64_t cpuDelta;
uint8_t context;
};
struct QueueGpuContextName
{
uint8_t context;
};
struct QueueGpuContextNameFat : public QueueGpuContextName
{
uint64_t ptr;
uint16_t size;
};
struct QueueMemNamePayload
{
uint64_t name;
};
struct QueueMemAlloc
{
int64_t time;
uint32_t thread;
uint64_t thread;
uint64_t ptr;
char size[6];
};
@@ -465,84 +254,37 @@ struct QueueMemAlloc
struct QueueMemFree
{
int64_t time;
uint32_t thread;
uint64_t thread;
uint64_t ptr;
};
struct QueueCallstackFat
struct QueueCallstackMemory
{
uint64_t ptr;
};
struct QueueCallstackFatThread : public QueueCallstackFat
struct QueueCallstack
{
uint32_t thread;
uint64_t ptr;
};
struct QueueCallstackAllocFat
struct QueueCallstackAlloc
{
uint64_t ptr;
uint64_t nativePtr;
};
struct QueueCallstackAllocFatThread : public QueueCallstackAllocFat
{
uint32_t thread;
};
struct QueueCallstackSample
{
int64_t time;
uint32_t thread;
};
struct QueueCallstackSampleFat : public QueueCallstackSample
{
uint64_t ptr;
};
struct QueueCallstackFrameSize
{
uint64_t ptr;
uint8_t size;
};
struct QueueCallstackFrameSizeFat : public QueueCallstackFrameSize
{
uint64_t data;
uint64_t imageName;
};
struct QueueCallstackFrame
{
uint64_t name;
uint64_t file;
uint32_t line;
uint64_t symAddr;
uint32_t symLen;
};
struct QueueSymbolInformation
{
uint32_t line;
uint64_t symAddr;
};
struct QueueSymbolInformationFat : public QueueSymbolInformation
{
uint64_t fileString;
uint8_t needFree;
};
struct QueueCodeInformation
{
uint64_t symAddr;
uint32_t line;
uint64_t ptrOffset;
};
struct QueueCodeInformationFat : public QueueCodeInformation
{
uint64_t fileString;
uint8_t needFree;
};
struct QueueCrashReport
@@ -551,85 +293,17 @@ struct QueueCrashReport
uint64_t text; // ptr
};
struct QueueCrashReportThread
{
uint32_t thread;
};
struct QueueSysTime
{
int64_t time;
float sysTime;
};
struct QueueContextSwitch
struct QueueMeshTri
{
int64_t time;
uint32_t oldThread;
uint32_t newThread;
uint8_t cpu;
uint8_t reason;
uint8_t state;
};
struct QueueThreadWakeup
{
int64_t time;
uint32_t thread;
};
struct QueueTidToPid
{
uint64_t tid;
uint64_t pid;
};
struct QueueHwSample
{
uint64_t ip;
int64_t time;
};
enum class PlotFormatType : uint8_t
{
Number,
Memory,
Percentage
};
struct QueuePlotConfig
{
uint64_t name; // ptr
uint8_t type;
};
struct QueueParamSetup
{
uint32_t idx;
uint64_t name; // ptr
uint8_t isBool;
int32_t val;
};
struct QueueCpuTopology
{
uint32_t package;
uint32_t core;
uint32_t thread;
};
struct QueueExternalNameMetadata
{
uint64_t thread;
uint64_t name;
uint64_t threadName;
};
struct QueueSymbolCodeMetadata
{
uint64_t symbol;
uint64_t ptr;
uint32_t size;
float x0, y0;
float x1, y1;
float x2, y2;
};
struct QueueHeader
@@ -648,80 +322,36 @@ struct QueueItem
{
QueueThreadContext threadCtx;
QueueZoneBegin zoneBegin;
QueueZoneBeginLean zoneBeginLean;
QueueZoneBeginThread zoneBeginThread;
QueueZoneEnd zoneEnd;
QueueZoneEndThread zoneEndThread;
QueueZoneValidation zoneValidation;
QueueZoneValidationThread zoneValidationThread;
QueueZoneColor zoneColor;
QueueZoneColorThread zoneColorThread;
QueueZoneValue zoneValue;
QueueZoneValueThread zoneValueThread;
QueueStringTransfer stringTransfer;
QueueFrameMark frameMark;
QueueFrameImage frameImage;
QueueFrameImageFat frameImageFat;
QueueSourceLocation srcloc;
QueueZoneTextFat zoneTextFat;
QueueZoneTextFatThread zoneTextFatThread;
QueueZoneText zoneText;
QueueLockAnnounce lockAnnounce;
QueueLockTerminate lockTerminate;
QueueLockWait lockWait;
QueueLockObtain lockObtain;
QueueLockRelease lockRelease;
QueueLockMark lockMark;
QueueLockName lockName;
QueueLockNameFat lockNameFat;
QueuePlotData plotData;
QueueMessage message;
QueueMessageColor messageColor;
QueueMessageLiteral messageLiteral;
QueueMessageLiteralThread messageLiteralThread;
QueueMessageColorLiteral messageColorLiteral;
QueueMessageColorLiteralThread messageColorLiteralThread;
QueueMessageFat messageFat;
QueueMessageFatThread messageFatThread;
QueueMessageColorFat messageColorFat;
QueueMessageColorFatThread messageColorFatThread;
QueueGpuNewContext gpuNewContext;
QueueGpuZoneBegin gpuZoneBegin;
QueueGpuZoneBeginLean gpuZoneBeginLean;
QueueGpuZoneEnd gpuZoneEnd;
QueueGpuTime gpuTime;
QueueGpuCalibration gpuCalibration;
QueueGpuContextName gpuContextName;
QueueGpuContextNameFat gpuContextNameFat;
QueueMemAlloc memAlloc;
QueueMemFree memFree;
QueueMemNamePayload memName;
QueueCallstackFat callstackFat;
QueueCallstackFatThread callstackFatThread;
QueueCallstackAllocFat callstackAllocFat;
QueueCallstackAllocFatThread callstackAllocFatThread;
QueueCallstackSample callstackSample;
QueueCallstackSampleFat callstackSampleFat;
QueueCallstackMemory callstackMemory;
QueueCallstack callstack;
QueueCallstackAlloc callstackAlloc;
QueueCallstackFrameSize callstackFrameSize;
QueueCallstackFrameSizeFat callstackFrameSizeFat;
QueueCallstackFrame callstackFrame;
QueueSymbolInformation symbolInformation;
QueueSymbolInformationFat symbolInformationFat;
QueueCodeInformation codeInformation;
QueueCodeInformationFat codeInformationFat;
QueueCrashReport crashReport;
QueueCrashReportThread crashReportThread;
QueueSysTime sysTime;
QueueContextSwitch contextSwitch;
QueueThreadWakeup threadWakeup;
QueueTidToPid tidToPid;
QueueHwSample hwSample;
QueuePlotConfig plotConfig;
QueueParamSetup paramSetup;
QueueCpuTopology cpuTopology;
QueueExternalNameMetadata externalNameMetadata;
QueueSymbolCodeMetadata symbolCodeMetadata;
QueueFiberEnter fiberEnter;
QueueFiberLeave fiberLeave;
QueueMeshTri meshTri;
};
};
#pragma pack()
@@ -729,116 +359,68 @@ struct QueueItem
enum { QueueItemSize = sizeof( QueueItem ) };
static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ), // zone text
sizeof( QueueHeader ), // zone name
static const size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueZoneText ),
sizeof( QueueHeader ) + sizeof( QueueZoneText ), // zone name
sizeof( QueueHeader ) + sizeof( QueueMessage ),
sizeof( QueueHeader ) + sizeof( QueueMessageColor ),
sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack
sizeof( QueueHeader ), // callstack memory
sizeof( QueueHeader ), // callstack
sizeof( QueueHeader ), // callstack alloc
sizeof( QueueHeader ) + sizeof( QueueCallstackSample ),
sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), // context switch
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location, callstack
sizeof( QueueHeader ) + sizeof( QueueCallstackMemory ),
sizeof( QueueHeader ) + sizeof( QueueCallstack ),
sizeof( QueueHeader ) + sizeof( QueueCallstackAlloc ),
sizeof( QueueHeader ) + sizeof( QueueFrameImage ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueZoneEnd ),
sizeof( QueueHeader ) + sizeof( QueueLockWait ),
sizeof( QueueHeader ) + sizeof( QueueLockObtain ),
sizeof( QueueHeader ) + sizeof( QueueLockRelease ),
sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared
sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared
sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared
sizeof( QueueHeader ) + sizeof( QueueLockName ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // named
sizeof( QueueHeader ) + sizeof( QueueMemFree ),
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // named
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location, callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location, callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial
sizeof( QueueHeader ) + sizeof( QueuePlotData ),
sizeof( QueueHeader ) + sizeof( QueueContextSwitch ),
sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ),
sizeof( QueueHeader ) + sizeof( QueueGpuTime ),
sizeof( QueueHeader ) + sizeof( QueueGpuContextName ),
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ),
sizeof( QueueHeader ) + sizeof( QueueCodeInformation ),
sizeof( QueueHeader ), // ExternalNameMetadata - not for wire transfer
sizeof( QueueHeader ), // SymbolCodeMetadata - not for wire transfer
sizeof( QueueHeader ) + sizeof( QueueFiberEnter ),
sizeof( QueueHeader ) + sizeof( QueueFiberLeave ),
// above items must be first
sizeof( QueueHeader ), // terminate
sizeof( QueueHeader ), // keep alive
sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
sizeof( QueueHeader ), // crash
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueZoneEnd ),
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
sizeof( QueueHeader ) + sizeof( QueueZoneColor ),
sizeof( QueueHeader ) + sizeof( QueueZoneValue ),
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // continuous frames
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // start
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // end
sizeof( QueueHeader ) + sizeof( QueueSourceLocation ),
sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ),
sizeof( QueueHeader ) + sizeof( QueueLockTerminate ),
sizeof( QueueHeader ) + sizeof( QueueLockWait ),
sizeof( QueueHeader ) + sizeof( QueueLockObtain ),
sizeof( QueueHeader ) + sizeof( QueueLockRelease ),
sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared
sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared
sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared
sizeof( QueueHeader ) + sizeof( QueueLockMark ),
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ),
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ),
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack
sizeof( QueueHeader ) + sizeof( QueuePlotData ),
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
sizeof( QueueHeader ) + sizeof( QueueGpuTime ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ),
sizeof( QueueHeader ) + sizeof( QueueMemFree ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ),
sizeof( QueueHeader ) + sizeof( QueueSysTime ),
sizeof( QueueHeader ) + sizeof( QueueTidToPid ),
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache reference
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache miss
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch retired
sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch miss
sizeof( QueueHeader ) + sizeof( QueuePlotConfig ),
sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
sizeof( QueueHeader ), // server query acknowledgement
sizeof( QueueHeader ), // source code not available
sizeof( QueueHeader ), // symbol code not available
sizeof( QueueHeader ) + sizeof( QueueCpuTopology ),
sizeof( QueueHeader ), // single string data
sizeof( QueueHeader ), // second string data
sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ),
sizeof( QueueHeader ) + sizeof( QueueMeshTri ),
sizeof( QueueHeader ), // mesh end
// keep all QueueStringTransfer below
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // custom string data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack alloc payload
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame image data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external thread name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // symbol code
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // source code
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // fiber name
};
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
@@ -846,6 +428,6 @@ static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType:
static_assert( sizeof( void* ) <= sizeof( uint64_t ), "Pointer size > 8 bytes" );
static_assert( sizeof( void* ) == sizeof( uintptr_t ), "Pointer size != uintptr_t" );
}
};
#endif

View File

@@ -1,5 +1,5 @@
#include <algorithm>
#include <assert.h>
#include <inttypes.h>
#include <new>
#include <stdio.h>
#include <stdlib.h>
@@ -8,12 +8,8 @@
#include "TracyAlloc.hpp"
#include "TracySocket.hpp"
#include "TracySystem.hpp"
#ifdef _WIN32
# ifndef NOMINMAX
# define NOMINMAX
# endif
# include <winsock2.h>
# include <ws2tcpip.h>
# ifdef _MSC_VER
@@ -24,10 +20,6 @@
#else
# include <arpa/inet.h>
# include <sys/socket.h>
# include <sys/param.h>
# include <errno.h>
# include <fcntl.h>
# include <netinet/in.h>
# include <netdb.h>
# include <unistd.h>
# include <poll.h>
@@ -66,15 +58,11 @@ void InitWinSock()
}
#endif
enum { BufSize = 128 * 1024 };
Socket::Socket()
: m_buf( (char*)tracy_malloc( BufSize ) )
, m_bufPtr( nullptr )
, m_sock( -1 )
, m_bufLeft( 0 )
, m_ptr( nullptr )
{
#ifdef _WIN32
InitWinSock();
@@ -86,72 +74,21 @@ Socket::Socket( int sock )
, m_bufPtr( nullptr )
, m_sock( sock )
, m_bufLeft( 0 )
, m_ptr( nullptr )
{
}
Socket::~Socket()
{
tracy_free( m_buf );
if( m_sock.load( std::memory_order_relaxed ) != -1 )
if( m_sock != -1 )
{
Close();
}
if( m_ptr )
{
freeaddrinfo( m_res );
#ifdef _WIN32
closesocket( m_connSock );
#else
close( m_connSock );
#endif
}
}
bool Socket::Connect( const char* addr, uint16_t port )
bool Socket::Connect( const char* addr, const char* port )
{
assert( !IsValid() );
if( m_ptr )
{
const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen );
if( c == -1 )
{
#if defined _WIN32
const auto err = WSAGetLastError();
if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false;
if( err != WSAEISCONN )
{
freeaddrinfo( m_res );
closesocket( m_connSock );
m_ptr = nullptr;
return false;
}
#else
const auto err = errno;
if( err == EALREADY || err == EINPROGRESS ) return false;
if( err != EISCONN )
{
freeaddrinfo( m_res );
close( m_connSock );
m_ptr = nullptr;
return false;
}
#endif
}
#if defined _WIN32
u_long nonblocking = 0;
ioctlsocket( m_connSock, FIONBIO, &nonblocking );
#else
int flags = fcntl( m_connSock, F_GETFL, 0 );
fcntl( m_connSock, F_SETFL, flags & ~O_NONBLOCK );
#endif
m_sock.store( m_connSock, std::memory_order_relaxed );
freeaddrinfo( m_res );
m_ptr = nullptr;
return true;
}
assert( m_sock == -1 );
struct addrinfo hints;
struct addrinfo *res, *ptr;
@@ -160,82 +97,7 @@ bool Socket::Connect( const char* addr, uint16_t port )
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
char portbuf[32];
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
int sock = 0;
for( ptr = res; ptr; ptr = ptr->ai_next )
{
if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
#if defined __APPLE__
int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
#if defined _WIN32
u_long nonblocking = 1;
ioctlsocket( sock, FIONBIO, &nonblocking );
#else
int flags = fcntl( sock, F_GETFL, 0 );
fcntl( sock, F_SETFL, flags | O_NONBLOCK );
#endif
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == 0 )
{
break;
}
else
{
#if defined _WIN32
const auto err = WSAGetLastError();
if( err != WSAEWOULDBLOCK )
{
closesocket( sock );
continue;
}
#else
if( errno != EINPROGRESS )
{
close( sock );
continue;
}
#endif
}
m_res = res;
m_ptr = ptr;
m_connSock = sock;
return false;
}
freeaddrinfo( res );
if( !ptr ) return false;
#if defined _WIN32
u_long nonblocking = 0;
ioctlsocket( sock, FIONBIO, &nonblocking );
#else
int flags = fcntl( sock, F_GETFL, 0 );
fcntl( sock, F_SETFL, flags & ~O_NONBLOCK );
#endif
m_sock.store( sock, std::memory_order_relaxed );
return true;
}
bool Socket::ConnectBlocking( const char* addr, uint16_t port )
{
assert( !IsValid() );
assert( !m_ptr );
struct addrinfo hints;
struct addrinfo *res, *ptr;
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
char portbuf[32];
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
if( getaddrinfo( addr, port, &hints, &res ) != 0 ) return false;
int sock = 0;
for( ptr = res; ptr; ptr = ptr->ai_next )
{
@@ -258,31 +120,29 @@ bool Socket::ConnectBlocking( const char* addr, uint16_t port )
freeaddrinfo( res );
if( !ptr ) return false;
m_sock.store( sock, std::memory_order_relaxed );
m_sock = sock;
return true;
}
void Socket::Close()
{
const auto sock = m_sock.load( std::memory_order_relaxed );
assert( sock != -1 );
assert( m_sock != -1 );
#ifdef _WIN32
closesocket( sock );
closesocket( m_sock );
#else
close( sock );
close( m_sock );
#endif
m_sock.store( -1, std::memory_order_relaxed );
m_sock = -1;
}
int Socket::Send( const void* _buf, int len )
{
const auto sock = m_sock.load( std::memory_order_relaxed );
auto buf = (const char*)_buf;
assert( sock != -1 );
assert( m_sock != -1 );
auto start = buf;
while( len > 0 )
{
auto ret = send( sock, buf, len, MSG_NOSIGNAL );
auto ret = send( m_sock, buf, len, MSG_NOSIGNAL );
if( ret == -1 ) return -1;
len -= ret;
buf += ret;
@@ -292,14 +152,13 @@ int Socket::Send( const void* _buf, int len )
int Socket::GetSendBufSize()
{
const auto sock = m_sock.load( std::memory_order_relaxed );
int bufSize;
#if defined _WIN32
#if defined _WIN32 || defined __CYGWIN__
int sz = sizeof( bufSize );
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
getsockopt( m_sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
#else
socklen_t sz = sizeof( bufSize );
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz );
getsockopt( m_sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz );
#endif
return bufSize;
}
@@ -327,7 +186,7 @@ int Socket::RecvBuffered( void* buf, int len, int timeout )
m_bufLeft = Recv( m_buf, BufSize, timeout );
if( m_bufLeft <= 0 ) return m_bufLeft;
const auto sz = len < m_bufLeft ? len : m_bufLeft;
const auto sz = std::min( len, m_bufLeft );
memcpy( buf, m_buf, sz );
m_bufPtr = m_buf + sz;
m_bufLeft -= sz;
@@ -336,16 +195,15 @@ int Socket::RecvBuffered( void* buf, int len, int timeout )
int Socket::Recv( void* _buf, int len, int timeout )
{
const auto sock = m_sock.load( std::memory_order_relaxed );
auto buf = (char*)_buf;
struct pollfd fd;
fd.fd = (socket_t)sock;
fd.fd = (socket_t)m_sock;
fd.events = POLLIN;
if( poll( &fd, 1, timeout ) > 0 )
{
return recv( sock, buf, len, 0 );
return recv( m_sock, buf, len, 0 );
}
else
{
@@ -353,54 +211,33 @@ int Socket::Recv( void* _buf, int len, int timeout )
}
}
int Socket::ReadUpTo( void* _buf, int len, int timeout )
bool Socket::Read( void* _buf, int len, int timeout, std::function<bool()> exitCb )
{
const auto sock = m_sock.load( std::memory_order_relaxed );
auto buf = (char*)_buf;
int rd = 0;
while( len > 0 )
{
const auto res = recv( sock, buf, len, 0 );
if( res == 0 ) break;
if( res == -1 ) return -1;
len -= res;
rd += res;
buf += res;
}
return rd;
}
bool Socket::Read( void* buf, int len, int timeout )
{
auto cbuf = (char*)buf;
while( len > 0 )
{
if( !ReadImpl( cbuf, len, timeout ) ) return false;
}
return true;
}
bool Socket::ReadImpl( char*& buf, int& len, int timeout )
{
const auto sz = RecvBuffered( buf, len, timeout );
switch( sz )
{
case 0:
return false;
case -1:
if( exitCb() ) return false;
const auto sz = RecvBuffered( buf, len, timeout );
switch( sz )
{
case 0:
return false;
case -1:
#ifdef _WIN32
{
auto err = WSAGetLastError();
if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
}
{
auto err = WSAGetLastError();
if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
}
#endif
break;
default:
len -= sz;
buf += sz;
break;
break;
default:
len -= sz;
buf += sz;
break;
}
}
return true;
}
@@ -419,21 +256,15 @@ bool Socket::ReadRaw( void* _buf, int len, int timeout )
bool Socket::HasData()
{
const auto sock = m_sock.load( std::memory_order_relaxed );
if( m_bufLeft > 0 ) return true;
struct pollfd fd;
fd.fd = (socket_t)sock;
fd.fd = (socket_t)m_sock;
fd.events = POLLIN;
return poll( &fd, 1, 0 ) > 0;
}
bool Socket::IsValid() const
{
return m_sock.load( std::memory_order_relaxed ) >= 0;
}
ListenSocket::ListenSocket()
: m_sock( -1 )
@@ -448,62 +279,30 @@ ListenSocket::~ListenSocket()
if( m_sock != -1 ) Close();
}
static int addrinfo_and_socket_for_family( uint16_t port, int ai_family, struct addrinfo** res )
{
struct addrinfo hints;
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = ai_family;
hints.ai_socktype = SOCK_STREAM;
#ifndef TRACY_ONLY_LOCALHOST
const char* onlyLocalhost = GetEnvVar( "TRACY_ONLY_LOCALHOST" );
if( !onlyLocalhost || onlyLocalhost[0] != '1' )
{
hints.ai_flags = AI_PASSIVE;
}
#endif
char portbuf[32];
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( nullptr, portbuf, &hints, res ) != 0 ) return -1;
int sock = socket( (*res)->ai_family, (*res)->ai_socktype, (*res)->ai_protocol );
if (sock == -1) freeaddrinfo( *res );
return sock;
}
bool ListenSocket::Listen( uint16_t port, int backlog )
bool ListenSocket::Listen( const char* port, int backlog )
{
assert( m_sock == -1 );
struct addrinfo* res = nullptr;
struct addrinfo* res;
struct addrinfo hints;
#if !defined TRACY_ONLY_IPV4 && !defined TRACY_ONLY_LOCALHOST
const char* onlyIPv4 = GetEnvVar( "TRACY_ONLY_IPV4" );
if( !onlyIPv4 || onlyIPv4[0] != '1' )
{
m_sock = addrinfo_and_socket_for_family( port, AF_INET6, &res );
}
#endif
if (m_sock == -1)
{
// IPV6 protocol may not be available/is disabled. Try to create a socket
// with the IPV4 protocol
m_sock = addrinfo_and_socket_for_family( port, AF_INET, &res );
if( m_sock == -1 ) return false;
}
#if defined _WIN32
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = AF_INET6;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE;
if( getaddrinfo( nullptr, port, &hints, &res ) != 0 ) return false;
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
#if defined _WIN32 || defined __CYGWIN__
unsigned long val = 0;
setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
#elif defined BSD
int val = 0;
setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
val = 1;
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
#else
int val = 1;
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
#endif
if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
if( listen( m_sock, backlog ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
freeaddrinfo( res );
if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) return false;
if( listen( m_sock, backlog ) == -1 ) return false;
return true;
}
@@ -560,7 +359,7 @@ UdpBroadcast::~UdpBroadcast()
if( m_sock != -1 ) Close();
}
bool UdpBroadcast::Open( const char* addr, uint16_t port )
bool UdpBroadcast::Open( const char* addr, const char* port )
{
assert( m_sock == -1 );
@@ -571,10 +370,7 @@ bool UdpBroadcast::Open( const char* addr, uint16_t port )
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_DGRAM;
char portbuf[32];
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
if( getaddrinfo( addr, port, &hints, &res ) != 0 ) return false;
int sock = 0;
for( ptr = res; ptr; ptr = ptr->ai_next )
{
@@ -583,7 +379,7 @@ bool UdpBroadcast::Open( const char* addr, uint16_t port )
int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
#if defined _WIN32
#if defined _WIN32 || defined __CYGWIN__
unsigned long broadcast = 1;
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
#else
@@ -604,7 +400,6 @@ bool UdpBroadcast::Open( const char* addr, uint16_t port )
if( !ptr ) return false;
m_sock = sock;
inet_pton( AF_INET, addr, &m_addr );
return true;
}
@@ -619,13 +414,13 @@ void UdpBroadcast::Close()
m_sock = -1;
}
int UdpBroadcast::Send( uint16_t port, const void* data, int len )
int UdpBroadcast::Send( int port, const void* data, int len )
{
assert( m_sock != -1 );
struct sockaddr_in addr;
addr.sin_family = AF_INET;
addr.sin_port = htons( port );
addr.sin_addr.s_addr = m_addr;
addr.sin_addr.s_addr = INADDR_BROADCAST;
return sendto( m_sock, (const char*)data, len, MSG_NOSIGNAL, (sockaddr*)&addr, sizeof( addr ) );
}
@@ -641,10 +436,8 @@ IpAddress::~IpAddress()
void IpAddress::Set( const struct sockaddr& addr )
{
#if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 )
struct sockaddr_in tmp;
memcpy( &tmp, &addr, sizeof( tmp ) );
auto ai = &tmp;
#if __MINGW32__
auto ai = (struct sockaddr_in*)&addr;
#else
auto ai = (const struct sockaddr_in*)&addr;
#endif
@@ -665,7 +458,7 @@ UdpListen::~UdpListen()
if( m_sock != -1 ) Close();
}
bool UdpListen::Listen( uint16_t port )
bool UdpListen::Listen( int port )
{
assert( m_sock == -1 );
@@ -676,14 +469,14 @@ bool UdpListen::Listen( uint16_t port )
int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
#if defined _WIN32
#if defined _WIN32 || defined __CYGWIN__
unsigned long reuse = 1;
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) );
#else
int reuse = 1;
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) );
#endif
#if defined _WIN32
#if defined _WIN32 || defined __CYGWIN__
unsigned long broadcast = 1;
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
#else
@@ -729,14 +522,14 @@ void UdpListen::Close()
m_sock = -1;
}
const char* UdpListen::Read( size_t& len, IpAddress& addr, int timeout )
const char* UdpListen::Read( size_t& len, IpAddress& addr )
{
static char buf[2048];
struct pollfd fd;
fd.fd = (socket_t)m_sock;
fd.events = POLLIN;
if( poll( &fd, 1, timeout ) <= 0 ) return nullptr;
if( poll( &fd, 1, 10 ) <= 0 ) return nullptr;
sockaddr sa;
socklen_t salen = sizeof( struct sockaddr );

View File

@@ -1,13 +1,8 @@
#ifndef __TRACYSOCKET_HPP__
#define __TRACYSOCKET_HPP__
#include <atomic>
#include <stddef.h>
#include <stdint.h>
#include <functional>
#include "TracyForceInline.hpp"
struct addrinfo;
struct sockaddr;
namespace tracy
@@ -19,36 +14,22 @@ void InitWinSock();
class Socket
{
enum { BufSize = 128 * 1024 };
public:
Socket();
Socket( int sock );
~Socket();
bool Connect( const char* addr, uint16_t port );
bool ConnectBlocking( const char* addr, uint16_t port );
bool Connect( const char* addr, const char* port );
void Close();
int Send( const void* buf, int len );
int GetSendBufSize();
int ReadUpTo( void* buf, int len, int timeout );
bool Read( void* buf, int len, int timeout );
template<typename ShouldExit>
bool Read( void* buf, int len, int timeout, ShouldExit exitCb )
{
auto cbuf = (char*)buf;
while( len > 0 )
{
if( exitCb() ) return false;
if( !ReadImpl( cbuf, len, timeout ) ) return false;
}
return true;
}
bool Read( void* buf, int len, int timeout, std::function<bool()> exitCb );
bool ReadRaw( void* buf, int len, int timeout );
bool HasData();
bool IsValid() const;
Socket( const Socket& ) = delete;
Socket( Socket&& ) = delete;
@@ -59,16 +40,10 @@ private:
int RecvBuffered( void* buf, int len, int timeout );
int Recv( void* buf, int len, int timeout );
bool ReadImpl( char*& buf, int& len, int timeout );
char* m_buf;
char* m_bufPtr;
std::atomic<int> m_sock;
int m_sock;
int m_bufLeft;
struct addrinfo *m_res;
struct addrinfo *m_ptr;
int m_connSock;
};
class ListenSocket
@@ -77,7 +52,7 @@ public:
ListenSocket();
~ListenSocket();
bool Listen( uint16_t port, int backlog );
bool Listen( const char* port, int backlog );
Socket* Accept();
void Close();
@@ -96,10 +71,10 @@ public:
UdpBroadcast();
~UdpBroadcast();
bool Open( const char* addr, uint16_t port );
bool Open( const char* addr, const char* port );
void Close();
int Send( uint16_t port, const void* data, int len );
int Send( int port, const void* data, int len );
UdpBroadcast( const UdpBroadcast& ) = delete;
UdpBroadcast( UdpBroadcast&& ) = delete;
@@ -108,7 +83,6 @@ public:
private:
int m_sock;
uint32_t m_addr;
};
class IpAddress
@@ -138,10 +112,10 @@ public:
UdpListen();
~UdpListen();
bool Listen( uint16_t port );
bool Listen( int port );
void Close();
const char* Read( size_t& len, IpAddress& addr, int timeout );
const char* Read( size_t& len, IpAddress& addr );
UdpListen( const UdpListen& ) = delete;
UdpListen( UdpListen&& ) = delete;

View File

@@ -1,122 +0,0 @@
#include "TracyStackFrames.hpp"
namespace tracy
{
const char* s_tracyStackFrames_[] = {
"tracy::Callstack",
"tracy::Callstack(int)",
"tracy::GpuCtxScope::{ctor}",
"tracy::Profiler::SendCallstack",
"tracy::Profiler::SendCallstack(int)",
"tracy::Profiler::SendCallstack(int, unsigned long)",
"tracy::Profiler::MemAllocCallstack",
"tracy::Profiler::MemAllocCallstack(void const*, unsigned long, int)",
"tracy::Profiler::MemFreeCallstack",
"tracy::Profiler::MemFreeCallstack(void const*, int)",
"tracy::ScopedZone::{ctor}",
"tracy::ScopedZone::ScopedZone(tracy::SourceLocationData const*, int, bool)",
"tracy::Profiler::Message",
nullptr
};
const char** s_tracyStackFrames = s_tracyStackFrames_;
const StringMatch s_tracySkipSubframes_[] = {
{ "/include/arm_neon.h", 19 },
{ "/include/adxintrin.h", 20 },
{ "/include/ammintrin.h", 20 },
{ "/include/amxbf16intrin.h", 24 },
{ "/include/amxint8intrin.h", 24 },
{ "/include/amxtileintrin.h", 24 },
{ "/include/avx2intrin.h", 21 },
{ "/include/avx5124fmapsintrin.h", 29 },
{ "/include/avx5124vnniwintrin.h", 29 },
{ "/include/avx512bf16intrin.h", 27 },
{ "/include/avx512bf16vlintrin.h", 29 },
{ "/include/avx512bitalgintrin.h", 29 },
{ "/include/avx512bwintrin.h", 25 },
{ "/include/avx512cdintrin.h", 25 },
{ "/include/avx512dqintrin.h", 25 },
{ "/include/avx512erintrin.h", 25 },
{ "/include/avx512fintrin.h", 24 },
{ "/include/avx512ifmaintrin.h", 27 },
{ "/include/avx512ifmavlintrin.h", 29 },
{ "/include/avx512pfintrin.h", 25 },
{ "/include/avx512vbmi2intrin.h", 28 },
{ "/include/avx512vbmi2vlintrin.h", 30 },
{ "/include/avx512vbmiintrin.h", 27 },
{ "/include/avx512vbmivlintrin.h", 29 },
{ "/include/avx512vlbwintrin.h", 27 },
{ "/include/avx512vldqintrin.h", 27 },
{ "/include/avx512vlintrin.h", 25 },
{ "/include/avx512vnniintrin.h", 27 },
{ "/include/avx512vnnivlintrin.h", 29 },
{ "/include/avx512vp2intersectintrin.h", 35 },
{ "/include/avx512vp2intersectvlintrin.h", 37 },
{ "/include/avx512vpopcntdqintrin.h", 32 },
{ "/include/avx512vpopcntdqvlintrin.h", 34 },
{ "/include/avxintrin.h", 20 },
{ "/include/avxvnniintrin.h", 24 },
{ "/include/bmi2intrin.h", 21 },
{ "/include/bmiintrin.h", 20 },
{ "/include/bmmintrin.h", 20 },
{ "/include/cetintrin.h", 20 },
{ "/include/cldemoteintrin.h", 25 },
{ "/include/clflushoptintrin.h", 27 },
{ "/include/clwbintrin.h", 21 },
{ "/include/clzerointrin.h", 23 },
{ "/include/emmintrin.h", 20 },
{ "/include/enqcmdintrin.h", 23 },
{ "/include/f16cintrin.h", 21 },
{ "/include/fma4intrin.h", 21 },
{ "/include/fmaintrin.h", 20 },
{ "/include/fxsrintrin.h", 21 },
{ "/include/gfniintrin.h", 21 },
{ "/include/hresetintrin.h", 23 },
{ "/include/ia32intrin.h", 21 },
{ "/include/immintrin.h", 20 },
{ "/include/keylockerintrin.h", 26 },
{ "/include/lwpintrin.h", 20 },
{ "/include/lzcntintrin.h", 22 },
{ "/include/mmintrin.h", 19 },
{ "/include/movdirintrin.h", 23 },
{ "/include/mwaitxintrin.h", 23 },
{ "/include/nmmintrin.h", 20 },
{ "/include/pconfigintrin.h", 24 },
{ "/include/pkuintrin.h", 20 },
{ "/include/pmmintrin.h", 20 },
{ "/include/popcntintrin.h", 23 },
{ "/include/prfchwintrin.h", 23 },
{ "/include/rdseedintrin.h", 23 },
{ "/include/rtmintrin.h", 20 },
{ "/include/serializeintrin.h", 26 },
{ "/include/sgxintrin.h", 20 },
{ "/include/shaintrin.h", 20 },
{ "/include/smmintrin.h", 20 },
{ "/include/tbmintrin.h", 20 },
{ "/include/tmmintrin.h", 20 },
{ "/include/tsxldtrkintrin.h", 25 },
{ "/include/uintrintrin.h", 22 },
{ "/include/vaesintrin.h", 21 },
{ "/include/vpclmulqdqintrin.h", 27 },
{ "/include/waitpkgintrin.h", 24 },
{ "/include/wbnoinvdintrin.h", 25 },
{ "/include/wmmintrin.h", 20 },
{ "/include/x86gprintrin.h", 23 },
{ "/include/x86intrin.h", 20 },
{ "/include/xmmintrin.h", 20 },
{ "/include/xopintrin.h", 20 },
{ "/include/xsavecintrin.h", 23 },
{ "/include/xsaveintrin.h", 22 },
{ "/include/xsaveoptintrin.h", 25 },
{ "/include/xsavesintrin.h", 23 },
{ "/include/xtestintrin.h", 22 },
{ "/bits/atomic_base.h", 19 },
{ "/atomic", 7 },
{}
};
const StringMatch* s_tracySkipSubframes = s_tracySkipSubframes_;
}

View File

@@ -1,22 +0,0 @@
#ifndef __TRACYSTACKFRAMES_HPP__
#define __TRACYSTACKFRAMES_HPP__
#include <stddef.h>
namespace tracy
{
struct StringMatch
{
const char* str;
size_t len;
};
extern const char** s_tracyStackFrames;
extern const StringMatch* s_tracySkipSubframes;
static constexpr int s_tracySkipSubframesMinLen = 7;
}
#endif

View File

@@ -1,16 +1,13 @@
#ifdef _MSC_VER
# pragma warning(disable:4996)
#if defined _MSC_VER || defined __CYGWIN__ || defined _WIN32
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# endif
# ifndef NOMINMAX
# define NOMINMAX
# endif
#endif
#if defined _WIN32
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# endif
# ifndef NOMINMAX
# define NOMINMAX
# endif
#ifdef _WIN32
# include <windows.h>
# include <malloc.h>
# include "TracyUwp.hpp"
#else
# include <pthread.h>
# include <string.h>
@@ -18,16 +15,10 @@
#endif
#ifdef __linux__
# ifdef __ANDROID__
# include <sys/types.h>
# else
# include <sys/syscall.h>
# endif
# include <fcntl.h>
#elif defined __FreeBSD__
# include <sys/thr.h>
#elif defined __NetBSD__ || defined __DragonFly__
# include <sys/lwp.h>
# ifndef __ANDROID__
# include <syscall.h>
# endif
# include <fcntl.h>
#endif
#ifdef __MINGW32__
@@ -35,16 +26,10 @@
#endif
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include "TracySystem.hpp"
#if defined _WIN32
extern "C" typedef HRESULT (WINAPI *t_SetThreadDescription)( HANDLE, PCWSTR );
extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* );
#endif
#ifdef TRACY_ENABLE
#ifdef TRACY_COLLECT_THREAD_NAMES
# include <atomic>
# include "TracyAlloc.hpp"
#endif
@@ -52,136 +37,93 @@ extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* );
namespace tracy
{
namespace detail
{
TRACY_API uint32_t GetThreadHandleImpl()
{
#if defined _WIN32
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint32_t ), "Thread handle too big to fit in protocol" );
return uint32_t( GetCurrentThreadId() );
#elif defined __APPLE__
uint64_t id;
pthread_threadid_np( pthread_self(), &id );
return uint32_t( id );
#elif defined __ANDROID__
return (uint32_t)gettid();
#elif defined __linux__
return (uint32_t)syscall( SYS_gettid );
#elif defined __FreeBSD__
long id;
thr_self( &id );
return id;
#elif defined __NetBSD__
return _lwp_self();
#elif defined __DragonFly__
return lwp_gettid();
#elif defined __OpenBSD__
return getthrid();
#else
// To add support for a platform, retrieve and return the kernel thread identifier here.
//
// Note that pthread_t (as for example returned by pthread_self()) is *not* a kernel
// thread identifier. It is a pointer to a library-allocated data structure instead.
// Such pointers will be reused heavily, making the pthread_t non-unique. Additionally
// a 64-bit pointer cannot be reliably truncated to 32 bits.
#error "Unsupported platform!"
#endif
}
}
#ifdef TRACY_ENABLE
#ifdef TRACY_COLLECT_THREAD_NAMES
struct ThreadNameData
{
uint32_t id;
uint64_t id;
const char* name;
ThreadNameData* next;
};
std::atomic<ThreadNameData*>& GetThreadNameData();
TRACY_API std::atomic<ThreadNameData*>& GetThreadNameData();
TRACY_API void InitRPMallocThread();
#endif
#ifdef _MSC_VER
# pragma pack( push, 8 )
struct THREADNAME_INFO
void SetThreadName( std::thread& thread, const char* name )
{
DWORD dwType;
LPCSTR szName;
DWORD dwThreadID;
DWORD dwFlags;
};
# pragma pack(pop)
SetThreadName( thread.native_handle(), name );
}
void ThreadNameMsvcMagic( const THREADNAME_INFO& info )
void SetThreadName( std::thread::native_handle_type handle, const char* name )
{
#if defined _WIN32 && !defined PTW32_VERSION && !defined __WINPTHREADS_VERSION
# if defined NTDDI_WIN10_RS2 && NTDDI_VERSION >= NTDDI_WIN10_RS2
wchar_t buf[256];
mbstowcs( buf, name, 256 );
SetThreadDescription( static_cast<HANDLE>( handle ), buf );
# else
const DWORD MS_VC_EXCEPTION=0x406D1388;
# pragma pack( push, 8 )
struct THREADNAME_INFO
{
DWORD dwType;
LPCSTR szName;
DWORD dwThreadID;
DWORD dwFlags;
};
# pragma pack(pop)
DWORD ThreadId = GetThreadId( static_cast<HANDLE>( handle ) );
THREADNAME_INFO info;
info.dwType = 0x1000;
info.szName = name;
info.dwThreadID = ThreadId;
info.dwFlags = 0;
__try
{
RaiseException( 0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
}
__except(EXCEPTION_EXECUTE_HANDLER)
{
}
}
#endif
TRACY_API void SetThreadName( const char* name )
{
#if defined _WIN32
# ifdef TRACY_UWP
static auto _SetThreadDescription = &::SetThreadDescription;
# else
static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" );
# endif
if( _SetThreadDescription )
{
wchar_t buf[256];
mbstowcs( buf, name, 256 );
_SetThreadDescription( GetCurrentThread(), buf );
}
else
{
# if defined _MSC_VER
THREADNAME_INFO info;
info.dwType = 0x1000;
info.szName = name;
info.dwThreadID = GetCurrentThreadId();
info.dwFlags = 0;
ThreadNameMsvcMagic( info );
# endif
}
#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__
{
const auto sz = strlen( name );
if( sz <= 15 )
{
#if defined __APPLE__
pthread_setname_np( name );
#else
pthread_setname_np( pthread_self(), name );
#endif
pthread_setname_np( handle, name );
}
else
{
char buf[16];
memcpy( buf, name, 15 );
buf[15] = '\0';
#if defined __APPLE__
pthread_setname_np( buf );
#else
pthread_setname_np( pthread_self(), buf );
#endif
pthread_setname_np( handle, buf );
}
}
#endif
#ifdef TRACY_ENABLE
#ifdef TRACY_COLLECT_THREAD_NAMES
{
InitRPMallocThread();
const auto sz = strlen( name );
char* buf = (char*)tracy_malloc( sz+1 );
memcpy( buf, name, sz );
buf[sz] = '\0';
auto data = (ThreadNameData*)tracy_malloc_fast( sizeof( ThreadNameData ) );
data->id = detail::GetThreadHandleImpl();
buf[sz+1] = '\0';
auto data = (ThreadNameData*)tracy_malloc( sizeof( ThreadNameData ) );
# ifdef _WIN32
# if defined PTW32_VERSION
data->id = pthread_getw32threadid_np( static_cast<pthread_t>( handle ) );
# elif defined __WINPTHREADS_VERSION
data->id = GetThreadId( pthread_gethandle( static_cast<pthread_t>( handle ) ) );
# else
data->id = GetThreadId( static_cast<HANDLE>( handle ) );
# endif
# elif defined __APPLE__
pthread_threadid_np( handle, &data->id );
# else
data->id = (uint64_t)handle;
# endif
data->name = buf;
data->next = GetThreadNameData().load( std::memory_order_relaxed );
while( !GetThreadNameData().compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {}
@@ -189,10 +131,10 @@ TRACY_API void SetThreadName( const char* name )
#endif
}
TRACY_API const char* GetThreadName( uint32_t id )
const char* GetThreadName( uint64_t id )
{
static char buf[256];
#ifdef TRACY_ENABLE
#ifdef TRACY_COLLECT_THREAD_NAMES
auto ptr = GetThreadNameData().load( std::memory_order_relaxed );
while( ptr )
{
@@ -203,27 +145,26 @@ TRACY_API const char* GetThreadName( uint32_t id )
ptr = ptr->next;
}
#else
# if defined _WIN32
# ifdef TRACY_UWP
static auto _GetThreadDescription = &::GetThreadDescription;
# else
static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
# endif
if( _GetThreadDescription )
# ifdef _WIN32
# if defined NTDDI_WIN10_RS2 && NTDDI_VERSION >= NTDDI_WIN10_RS2
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
if( hnd != 0 )
{
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
if( hnd != 0 )
PWSTR tmp;
GetThreadDescription( hnd, &tmp );
auto ret = wcstombs( buf, tmp, 256 );
CloseHandle( hnd );
if( ret != 0 )
{
PWSTR tmp;
_GetThreadDescription( hnd, &tmp );
auto ret = wcstombs( buf, tmp, 256 );
CloseHandle( hnd );
if( ret != 0 )
{
return buf;
}
return buf;
}
}
# endif
# elif defined __GLIBC__ && !defined __ANDROID__ && !defined __EMSCRIPTEN__
if( pthread_getname_np( (pthread_t)id, buf, 256 ) == 0 )
{
return buf;
}
# elif defined __linux__
int cs, fd;
char path[32];
@@ -233,7 +174,7 @@ TRACY_API const char* GetThreadName( uint32_t id )
int tid = (int) syscall( SYS_gettid );
# endif
snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", tid );
sprintf( buf, "%" PRIu32, id );
sprintf( buf, "%" PRIu64, id );
# ifndef __ANDROID__
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs );
# endif
@@ -255,50 +196,8 @@ TRACY_API const char* GetThreadName( uint32_t id )
return buf;
# endif
#endif
sprintf( buf, "%" PRIu32, id );
sprintf( buf, "%" PRIu64, id );
return buf;
}
TRACY_API const char* GetEnvVar( const char* name )
{
#if defined _WIN32
// unfortunately getenv() on Windows is just fundamentally broken. It caches the entire
// environment block once on startup, then never refreshes it again. If any environment
// strings are added or modified after startup of the CRT, those changes will not be
// seen by getenv(). This removes the possibility of an app using this SDK from
// programmatically setting any of the behaviour controlling envvars here.
//
// To work around this, we'll instead go directly to the Win32 environment strings APIs
// to get the current value.
static char buffer[1024];
DWORD const kBufferSize = DWORD(sizeof(buffer) / sizeof(buffer[0]));
DWORD count = GetEnvironmentVariableA(name, buffer, kBufferSize);
if( count == 0 )
return nullptr;
if( count >= kBufferSize )
{
char* buf = reinterpret_cast<char*>(_alloca(count + 1));
count = GetEnvironmentVariableA(name, buf, count + 1);
memcpy(buffer, buf, kBufferSize);
buffer[kBufferSize - 1] = 0;
}
return buffer;
#else
return getenv(name);
#endif
}
}
#ifdef __cplusplus
extern "C" {
#endif
TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); }
#ifdef __cplusplus
}
#endif

View File

@@ -1,7 +1,22 @@
#ifndef __TRACYSYSTEM_HPP__
#define __TRACYSYSTEM_HPP__
#ifdef TRACY_ENABLE
# if defined __ANDROID__ || defined __CYGWIN__ || defined __APPLE__ || defined _GNU_SOURCE || ( defined _WIN32 && ( !defined NTDDI_WIN10_RS2 || NTDDI_VERSION < NTDDI_WIN10_RS2 ) )
# define TRACY_COLLECT_THREAD_NAMES
# endif
#endif
#ifdef _WIN32
# ifndef _WINDOWS_
extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
# endif
#else
# include <pthread.h>
#endif
#include <stdint.h>
#include <thread>
#include "TracyApi.h"
@@ -10,22 +25,34 @@ namespace tracy
namespace detail
{
TRACY_API uint32_t GetThreadHandleImpl();
static inline uint64_t GetThreadHandleImpl()
{
#ifdef _WIN32
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
return uint64_t( GetCurrentThreadId() );
#elif defined __APPLE__
uint64_t id;
pthread_threadid_np( pthread_self(), &id );
return id;
#else
static_assert( sizeof( decltype( pthread_self() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
return uint64_t( pthread_self() );
#endif
}
}
#ifdef TRACY_ENABLE
TRACY_API uint32_t GetThreadHandle();
TRACY_API uint64_t GetThreadHandle();
#else
static inline uint32_t GetThreadHandle()
static inline uint64_t GetThreadHandle()
{
return detail::GetThreadHandleImpl();
}
#endif
TRACY_API void SetThreadName( const char* name );
TRACY_API const char* GetThreadName( uint32_t id );
TRACY_API const char* GetEnvVar(const char* name);
void SetThreadName( std::thread& thread, const char* name );
void SetThreadName( std::thread::native_handle_type handle, const char* name );
const char* GetThreadName( uint64_t id );
}

View File

@@ -1,11 +0,0 @@
#ifndef __TRACYUWP_HPP__
#define __TRACYUWP_HPP__
#ifdef _WIN32
# include <winapifamily.h>
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
# define TRACY_UWP
# endif
#endif
#endif

View File

@@ -1,28 +0,0 @@
#ifndef __TRACYYIELD_HPP__
#define __TRACYYIELD_HPP__
#if defined __SSE2__ || defined _M_AMD64 || _M_IX86_FP == 2
# include <emmintrin.h>
#else
# include <thread>
#endif
#include "TracyForceInline.hpp"
namespace tracy
{
static tracy_force_inline void YieldThread()
{
#if defined __SSE2__ || defined _M_AMD64 || _M_IX86_FP == 2
_mm_pause();
#elif defined __aarch64__
asm volatile( "isb" : : );
#else
std::this_thread::yield();
#endif
}
}
#endif

View File

@@ -1,21 +0,0 @@
# Extract the actual list of source files from a sibling Visual Studio project.
# Ensure these are simply-substituted variables, without changing their values.
SRC := $(SRC)
SRC2 := $(SRC2)
SRC3 := $(SRC3)
SRC4 := $(SRC4)
# Paths here are relative to the directory in which make was invoked, not to
# this file, so ../win32/$(PROJECT).vcxproj refers to the Visual Studio project
# of whichever tool is including this makefile fragment.
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
BASE4 := $(shell egrep 'None.*S"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
# The tool-specific makefile may request that certain files be omitted.
SRC += $(filter-out $(FILTER),$(BASE))
SRC2 += $(filter-out $(FILTER),$(BASE2))
SRC3 += $(filter-out $(FILTER),$(BASE3))
SRC4 += $(filter-out $(FILTER),$(BASE4))

68
common/tracy_benaphore.h Normal file
View File

@@ -0,0 +1,68 @@
// Copyright (c) 2015 Jeff Preshing
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#ifndef __TRACY_CPP11OM_BENAPHORE_H__
#define __TRACY_CPP11OM_BENAPHORE_H__
#include <cassert>
#include <thread>
#include <atomic>
#include "tracy_sema.h"
namespace tracy
{
class NonRecursiveBenaphore
{
private:
std::atomic<int> m_contentionCount;
DefaultSemaphoreType m_sema;
public:
NonRecursiveBenaphore() : m_contentionCount(0) {}
void lock()
{
if (m_contentionCount.fetch_add(1, std::memory_order_acquire) > 0)
{
m_sema.wait();
}
}
bool try_lock()
{
if (m_contentionCount.load(std::memory_order_relaxed) != 0)
return false;
int expected = 0;
return m_contentionCount.compare_exchange_strong(expected, 1, std::memory_order_acquire);
}
void unlock()
{
int oldCount = m_contentionCount.fetch_sub(1, std::memory_order_release);
assert(oldCount > 0);
if (oldCount > 1)
{
m_sema.signal();
}
}
};
}
#endif // __CPP11OM_BENAPHORE_H__

File diff suppressed because it is too large Load Diff

View File

@@ -33,6 +33,7 @@
- LZ4 source repository : https://github.com/lz4/lz4
*/
#ifndef TRACY_LZ4_H_2983827168210
#define TRACY_LZ4_H_2983827168210
@@ -40,11 +41,13 @@
#include <stddef.h> /* size_t */
#include <stdint.h>
namespace tracy
{
/**
Introduction
LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core,
scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
@@ -56,19 +59,16 @@
- unbounded multiple steps (described as Streaming compression)
lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
Decompressing such a compressed block requires additional metadata.
Exact metadata depends on exact decompression function.
For the typical case of LZ4_decompress_safe(),
metadata includes block's compressed size, and maximum bound of decompressed size.
Decompressing a block requires additional metadata, such as its compressed size.
Each application is free to encode and pass such metadata in whichever way it wants.
lz4.h only handle blocks, it can not generate Frames.
Blocks are different from Frames (doc/lz4_Frame_format.md).
Frames bundle both blocks and metadata in a specified manner.
Embedding metadata is required for compressed data to be self-contained and portable.
This are required for compressed data to be self-contained and portable.
Frame format is delivered through a companion API, declared in lz4frame.h.
The `lz4` CLI can only manage frames.
Note that the `lz4` CLI can only manage frames.
*/
/*^***************************************************************
@@ -98,7 +98,7 @@
/*------ Version ------*/
#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */
#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
@@ -107,9 +107,6 @@
#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
namespace tracy
{
LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */
LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */
@@ -125,7 +122,7 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string;
* Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
*/
#ifndef LZ4_MEMORY_USAGE
# define LZ4_MEMORY_USAGE 14
# define LZ4_MEMORY_USAGE 12
#endif
@@ -133,35 +130,29 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string;
* Simple Functions
**************************************/
/*! LZ4_compress_default() :
* Compresses 'srcSize' bytes from buffer 'src'
* into already allocated 'dst' buffer of size 'dstCapacity'.
* Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
* It also runs faster, so it's a recommended setting.
* If the function cannot compress 'src' into a more limited 'dst' budget,
* compression stops *immediately*, and the function result is zero.
* In which case, 'dst' content is undefined (invalid).
* srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
* dstCapacity : size of buffer 'dst' (which must be already allocated)
* @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
* or 0 if compression fails
* Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
*/
Compresses 'srcSize' bytes from buffer 'src'
into already allocated 'dst' buffer of size 'dstCapacity'.
Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
It also runs faster, so it's a recommended setting.
If the function cannot compress 'src' into a more limited 'dst' budget,
compression stops *immediately*, and the function result is zero.
In which case, 'dst' content is undefined (invalid).
srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
dstCapacity : size of buffer 'dst' (which must be already allocated)
@return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
or 0 if compression fails
Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
*/
LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
/*! LZ4_decompress_safe() :
* compressedSize : is the exact complete size of the compressed block.
* dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
* @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
* If destination buffer is not large enough, decoding will stop and output an error code (negative value).
* If the source stream is detected malformed, the function will stop decoding and return a negative result.
* Note 1 : This function is protected against malicious data packets :
* it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
* even if the compressed block is maliciously modified to order the decoder to do these actions.
* In such case, the decoder stops immediately, and considers the compressed block malformed.
* Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
* The implementation is free to send / store / derive this information in whichever way is most beneficial.
* If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
*/
compressedSize : is the exact complete size of the compressed block.
dstCapacity : is the size of destination buffer, which must be already allocated.
@return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
If destination buffer is not large enough, decoding will stop and output an error code (negative value).
If the source stream is detected malformed, the function will stop decoding and return a negative result.
Note : This function is protected against malicious data packets (never writes outside 'dst' buffer, nor read outside 'source' buffer).
*/
LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
@@ -187,8 +178,7 @@ LZ4LIB_API int LZ4_compressBound(int inputSize);
The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
An acceleration value of "1" is the same as regular LZ4_compress_default()
Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
*/
LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
@@ -214,18 +204,7 @@ LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* d
* New value is necessarily <= input value.
* @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
* or 0 if compression fails.
*
* Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
* the produced compressed content could, in specific circumstances,
* require to be decompressed into a destination buffer larger
* by at least 1 byte than the content to decompress.
* If an application uses `LZ4_compress_destSize()`,
* it's highly recommended to update liblz4 to v1.9.2 or better.
* If this can't be done or ensured,
* the receiving decompression function should provide
* a dstCapacity which is > decompressedSize, by at least 1 byte.
* See https://github.com/lz4/lz4/issues/859 for details
*/
*/
LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
@@ -233,35 +212,25 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt
* Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
* into destination buffer 'dst' of size 'dstCapacity'.
* Up to 'targetOutputSize' bytes will be decoded.
* The function stops decoding on reaching this objective.
* This can be useful to boost performance
* whenever only the beginning of a block is required.
* The function stops decoding on reaching this objective,
* which can boost performance when only the beginning of a block is required.
*
* @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
* @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
* If source stream is detected malformed, function returns a negative result.
*
* Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
* Note : @return can be < targetOutputSize, if compressed block contains less data.
*
* Note 2 : targetOutputSize must be <= dstCapacity
*
* Note 3 : this function effectively stops decoding on reaching targetOutputSize,
* Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
* and expects targetOutputSize <= dstCapacity.
* It effectively stops decoding on reaching targetOutputSize,
* so dstCapacity is kind of redundant.
* This is because in older versions of this function,
* decoding operation would still write complete sequences.
* Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
* This is because in a previous version of this function,
* decoding operation would not "break" a sequence in the middle.
* As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
* it could write more bytes, though only up to dstCapacity.
* Some "margin" used to be required for this operation to work properly.
* Thankfully, this is no longer necessary.
* The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
*
* Note 4 : If srcSize is the exact size of the block,
* then targetOutputSize can be any value,
* including larger than the block's decompressed size.
* The function will, at most, generate block's decompressed size.
*
* Note 5 : If srcSize is _larger_ than block's compressed size,
* then targetOutputSize **MUST** be <= block's decompressed size.
* Otherwise, *silent corruption will occur*.
* This is no longer necessary.
* The function nonetheless keeps its signature, in an effort to not break API.
*/
LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
@@ -420,10 +389,6 @@ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecod
*/
LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
}
#endif /* LZ4_H_2983827168210 */
/*^*************************************
* !!!!!! STATIC LINKING ONLY !!!!!!
@@ -449,19 +414,14 @@ LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int sr
* define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
******************************************************************************/
#ifdef LZ4_STATIC_LINKING_ONLY
#ifndef TRACY_LZ4_STATIC_3504398509
#define TRACY_LZ4_STATIC_3504398509
#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
#define LZ4LIB_STATIC_API LZ4LIB_API
#else
#define LZ4LIB_STATIC_API
#endif
namespace tracy
{
#ifdef LZ4_STATIC_LINKING_ONLY
/*! LZ4_compress_fast_extState_fastReset() :
* A variant of LZ4_compress_fast_extState().
@@ -503,137 +463,78 @@ LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const c
*/
LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
/*! In-place compression and decompression
*
* It's possible to have input and output sharing the same buffer,
* for highly contrained memory environments.
* In both cases, it requires input to lay at the end of the buffer,
* and decompression to start at beginning of the buffer.
* Buffer size must feature some margin, hence be larger than final size.
*
* |<------------------------buffer--------------------------------->|
* |<-----------compressed data--------->|
* |<-----------decompressed size------------------>|
* |<----margin---->|
*
* This technique is more useful for decompression,
* since decompressed size is typically larger,
* and margin is short.
*
* In-place decompression will work inside any buffer
* which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
* This presumes that decompressedSize > compressedSize.
* Otherwise, it means compression actually expanded data,
* and it would be more efficient to store such data with a flag indicating it's not compressed.
* This can happen when data is not compressible (already compressed, or encrypted).
*
* For in-place compression, margin is larger, as it must be able to cope with both
* history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
* and data expansion, which can happen when input is not compressible.
* As a consequence, buffer size requirements are much higher,
* and memory savings offered by in-place compression are more limited.
*
* There are ways to limit this cost for compression :
* - Reduce history size, by modifying LZ4_DISTANCE_MAX.
* Note that it is a compile-time constant, so all compressions will apply this limit.
* Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
* so it's a reasonable trick when inputs are known to be small.
* - Require the compressor to deliver a "maximum compressed size".
* This is the `dstCapacity` parameter in `LZ4_compress*()`.
* When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
* in which case, the return code will be 0 (zero).
* The caller must be ready for these cases to happen,
* and typically design a backup scheme to send data uncompressed.
* The combination of both techniques can significantly reduce
* the amount of margin required for in-place compression.
*
* In-place compression can work in any buffer
* which size is >= (maxCompressedSize)
* with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
* LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
* so it's possible to reduce memory requirements by playing with them.
*/
#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32)
#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
#endif
#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
}
#endif /* LZ4_STATIC_3504398509 */
#endif /* LZ4_STATIC_LINKING_ONLY */
#ifndef TRACY_LZ4_H_98237428734687
#define TRACY_LZ4_H_98237428734687
namespace tracy
{
/*-************************************************************
* Private Definitions
* PRIVATE DEFINITIONS
**************************************************************
* Do not use these definitions directly.
* They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
* Accessing members will expose user code to API and/or ABI break in future versions of the library.
* Accessing members will expose code to API and/or ABI break in future versions of the library.
**************************************************************/
#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
typedef int8_t LZ4_i8;
typedef uint8_t LZ4_byte;
typedef uint16_t LZ4_u16;
typedef uint32_t LZ4_u32;
#else
typedef signed char LZ4_i8;
typedef unsigned char LZ4_byte;
typedef unsigned short LZ4_u16;
typedef unsigned int LZ4_u32;
#endif
#include <stdint.h>
typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
struct LZ4_stream_t_internal {
LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
LZ4_u32 currentOffset;
LZ4_u32 tableType;
const LZ4_byte* dictionary;
uint32_t hashTable[LZ4_HASH_SIZE_U32];
uint32_t currentOffset;
uint16_t dirty;
uint16_t tableType;
const uint8_t* dictionary;
const LZ4_stream_t_internal* dictCtx;
LZ4_u32 dictSize;
uint32_t dictSize;
};
typedef struct {
const LZ4_byte* externalDict;
const uint8_t* externalDict;
size_t extDictSize;
const LZ4_byte* prefixEnd;
const uint8_t* prefixEnd;
size_t prefixSize;
} LZ4_streamDecode_t_internal;
#else
typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
struct LZ4_stream_t_internal {
unsigned int hashTable[LZ4_HASH_SIZE_U32];
unsigned int currentOffset;
unsigned short dirty;
unsigned short tableType;
const unsigned char* dictionary;
const LZ4_stream_t_internal* dictCtx;
unsigned int dictSize;
};
typedef struct {
const unsigned char* externalDict;
const unsigned char* prefixEnd;
size_t extDictSize;
size_t prefixSize;
} LZ4_streamDecode_t_internal;
#endif
/*! LZ4_stream_t :
* Do not use below internal definitions directly !
* Declare or allocate an LZ4_stream_t instead.
* information structure to track an LZ4 stream.
* LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
* The structure definition can be convenient for static allocation
* (on stack, or as part of larger structure).
* Init this structure with LZ4_initStream() before first use.
* note : only use this definition in association with static linking !
* this definition is not API/ABI safe, and may change in future versions.
* this definition is not API/ABI safe, and may change in a future version.
*/
#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */
#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4 + ((sizeof(void*)==16) ? 4 : 0) /*AS-400*/ )
#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
union LZ4_stream_u {
void* table[LZ4_STREAMSIZE_VOIDP];
unsigned long long table[LZ4_STREAMSIZE_U64];
LZ4_stream_t_internal internal_donotuse;
}; /* previously typedef'd to LZ4_stream_t */
} ; /* previously typedef'd to LZ4_stream_t */
/*! LZ4_initStream() : v1.9.0+
* An LZ4_stream_t structure must be initialized at least once.
@@ -667,7 +568,6 @@ union LZ4_streamDecode_u {
} ; /* previously typedef'd to LZ4_streamDecode_t */
/*-************************************
* Obsolete Functions
**************************************/
@@ -686,34 +586,34 @@ union LZ4_streamDecode_u {
#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
# define LZ4_DEPRECATED(message) /* disable deprecation warnings */
#else
# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
# define LZ4_DEPRECATED(message) [[deprecated(message)]]
# elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
# elif (LZ4_GCC_VERSION >= 301)
# define LZ4_DEPRECATED(message) __attribute__((deprecated))
# elif defined(_MSC_VER)
# define LZ4_DEPRECATED(message) __declspec(deprecated(message))
# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
# define LZ4_DEPRECATED(message) __attribute__((deprecated))
# else
# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
# define LZ4_DEPRECATED(message) /* disabled */
# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
# define LZ4_DEPRECATED(message)
# endif
#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
/*! Obsolete compression functions (since v1.7.3) */
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize);
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
/* Obsolete compression functions */
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* source, char* dest, int sourceSize);
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
/*! Obsolete decompression functions (since v1.8.0) */
/* Obsolete decompression functions */
LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
/* Obsolete streaming functions (since v1.7.0)
* degraded functionality; do not use!
/* Obsolete streaming functions; degraded functionality; do not use!
*
* In order to perform streaming compression, these functions depended on data
* that is no longer tracked in the state. They have been preserved as well as
@@ -727,22 +627,23 @@ LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStre
LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer);
LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
/*! Obsolete streaming decoding functions (since v1.7.0) */
/* Obsolete streaming decoding functions */
LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
/*! LZ4_decompress_fast() : **unsafe!**
* These functions used to be faster than LZ4_decompress_safe(),
* but this is no longer the case. They are now slower.
* but it has changed, and they are now slower than LZ4_decompress_safe().
* This is because LZ4_decompress_fast() doesn't know the input size,
* and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
* and therefore must progress more cautiously in the input buffer to not read beyond the end of block.
* On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
* As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
*
* The last remaining LZ4_decompress_fast() specificity is that
* it can decompress a block without knowing its compressed size.
* Such functionality can be achieved in a more secure manner
* by employing LZ4_decompress_safe_partial().
* Such functionality could be achieved in a more secure manner,
* by also providing the maximum size of input buffer,
* but it would require new prototypes, and adaptation of the implementation to this new use case.
*
* Parameters:
* originalSize : is the uncompressed size to regenerate.
@@ -757,6 +658,7 @@ LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4
* But they may happen if input data is invalid (error or intentional tampering).
* As a consequence, use these functions in trusted environments with trusted data **only**.
*/
LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
@@ -774,4 +676,4 @@ LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
}
#endif /* LZ4_H_98237428734687 */
#endif /* LZ4_H_2983827168210 */

View File

@@ -53,7 +53,7 @@
#include "tracy_lz4hc.hpp"
/*=== Common definitions ===*/
/*=== Common LZ4 definitions ===*/
#if defined(__GNUC__)
# pragma GCC diagnostic ignored "-Wunused-function"
#endif
@@ -61,15 +61,21 @@
# pragma clang diagnostic ignored "-Wunused-function"
#endif
namespace tracy
{
/*=== Enums ===*/
typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
}
#define LZ4_COMMONDEFS_ONLY
#ifndef LZ4_SRC_INCLUDED
#include "tracy_lz4.cpp" /* LZ4_count, constants, mem */
#endif
/*=== Enums ===*/
typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
namespace tracy
{
/*=== Constants ===*/
#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
@@ -85,9 +91,6 @@ typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
namespace tracy
{
static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
@@ -96,7 +99,7 @@ static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)
**************************************/
static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
{
MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable));
MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
}
@@ -155,28 +158,13 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
return back;
}
#if defined(_MSC_VER)
# define LZ4HC_rotl32(x,r) _rotl(x,r)
#else
# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r)))
#endif
static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
{
size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
if (bitsToRotate == 0) return pattern;
return LZ4HC_rotl32(pattern, (int)bitsToRotate);
}
/* LZ4HC_countPattern() :
* pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
static unsigned
LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
{
const BYTE* const iStart = ip;
reg_t const pattern = (sizeof(pattern)==8) ?
(reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32;
reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32;
while (likely(ip < iEnd-(sizeof(pattern)-1))) {
reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
@@ -222,16 +210,6 @@ LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
return (unsigned)(iStart - ip);
}
/* LZ4HC_protectDictEnd() :
* Checks if the match is in the last 3 bytes of the dictionary, so reading the
* 4 byte MINMATCH would overflow.
* @returns true if the match index is okay.
*/
static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
{
return ((U32)((dictLimit - 1) - matchIndex) >= 3);
}
typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
@@ -257,7 +235,7 @@ LZ4HC_InsertAndGetWiderMatch (
const U32 dictLimit = hc4->dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const U32 ipIndex = (U32)(ip - base);
const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
const BYTE* const dictBase = hc4->dictBase;
int const lookBackLength = (int)(ip-iLowLimit);
int nbAttempts = maxNbAttempts;
@@ -274,7 +252,7 @@ LZ4HC_InsertAndGetWiderMatch (
DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
matchIndex, lowestMatchIndex);
while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) {
int matchLength=0;
nbAttempts--;
assert(matchIndex < ipIndex);
@@ -316,21 +294,14 @@ LZ4HC_InsertAndGetWiderMatch (
if (chainSwap && matchLength==longest) { /* better match => select a better chain */
assert(lookBackLength==0); /* search forward only */
if (matchIndex + (U32)longest <= ipIndex) {
int const kTrigger = 4;
U32 distanceToNextMatch = 1;
int const end = longest - MINMATCH + 1;
int step = 1;
int accel = 1 << kTrigger;
int pos;
for (pos = 0; pos < end; pos += step) {
for (pos = 0; pos <= longest - MINMATCH; pos++) {
U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
step = (accel++ >> kTrigger);
if (candidateDist > distanceToNextMatch) {
distanceToNextMatch = candidateDist;
matchChainPos = (U32)pos;
accel = 1 << kTrigger;
}
}
} }
if (distanceToNextMatch > 1) {
if (distanceToNextMatch > matchIndex) break; /* avoid overflow */
matchIndex -= distanceToNextMatch;
@@ -349,61 +320,34 @@ LZ4HC_InsertAndGetWiderMatch (
} else {
repeat = rep_not;
} }
if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
&& LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) {
const int extDict = matchCandidateIdx < dictLimit;
const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx;
if ( (repeat == rep_confirmed)
&& (matchCandidateIdx >= dictLimit) ) { /* same segment only */
const BYTE* const matchPtr = base + matchCandidateIdx;
if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
const BYTE* const dictStart = dictBase + hc4->lowLimit;
const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit;
size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
if (extDict && matchPtr + forwardPatternLength == iLimit) {
U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern);
forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern);
}
{ const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr;
size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
size_t currentSegmentLength;
if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) {
U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern);
}
/* Limit backLength not go further than lowestMatchIndex */
backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
assert(matchCandidateIdx - backLength >= lowestMatchIndex);
currentSegmentLength = backLength + forwardPatternLength;
/* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */
if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
&& (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex))
matchIndex = newMatchIndex;
else {
/* Can only happen if started in the prefix */
assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
matchIndex = dictLimit;
size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
const BYTE* const lowestMatchPtr = (lowPrefixPtr + LZ4_DISTANCE_MAX >= ip) ? lowPrefixPtr : ip - LZ4_DISTANCE_MAX;
size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
size_t const currentSegmentLength = backLength + forwardPatternLength;
if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
&& (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
} else {
matchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
if (lookBackLength==0) { /* no back possible */
size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
if ((size_t)longest < maxML) {
assert(base + matchIndex < ip);
if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break;
assert(maxML < 2 GB);
longest = (int)maxML;
*matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
*startpos = ip;
}
} else {
U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) {
assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
matchIndex = dictLimit;
} else {
matchIndex = newMatchIndex;
if (lookBackLength==0) { /* no back possible */
size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
if ((size_t)longest < maxML) {
assert(base + matchIndex != ip);
if ((size_t)(ip - base) - matchIndex > LZ4_DISTANCE_MAX) break;
assert(maxML < 2 GB);
longest = (int)maxML;
*matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
*startpos = ip;
}
{ U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
if (distToNextPattern > matchIndex) break; /* avoid overflow */
matchIndex -= distToNextPattern;
} } } } }
{ U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
if (distToNextPattern > matchIndex) break; /* avoid overflow */
matchIndex -= distToNextPattern;
} } }
continue;
} }
} } /* PA optimization */
@@ -414,7 +358,7 @@ LZ4HC_InsertAndGetWiderMatch (
} /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
if ( dict == usingDictCtxHc
&& nbAttempts > 0
&& nbAttempts
&& ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base);
U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
@@ -464,90 +408,74 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl
* @return : 0 if ok,
* 1 if buffer issue detected */
LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
const BYTE** _ip,
BYTE** _op,
const BYTE** _anchor,
const BYTE** ip,
BYTE** op,
const BYTE** anchor,
int matchLength,
const BYTE* const match,
limitedOutput_directive limit,
BYTE* oend)
{
#define ip (*_ip)
#define op (*_op)
#define anchor (*_anchor)
size_t length;
BYTE* const token = op++;
BYTE* const token = (*op)++;
#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
static const BYTE* start = NULL;
static U32 totalCost = 0;
U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
U32 const ll = (U32)(ip - anchor);
U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start);
U32 const ll = (U32)(*ip - *anchor);
U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
if (start==NULL) start = anchor; /* only works for single segment */
if (start==NULL) start = *anchor; /* only works for single segment */
/* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
DEBUGLOG(6, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u",
pos,
(U32)(ip - anchor), matchLength, (U32)(ip-match),
(U32)(*ip - *anchor), matchLength, (U32)(*ip-match),
cost, totalCost);
totalCost += cost;
#endif
/* Encode Literal length */
length = (size_t)(ip - anchor);
LZ4_STATIC_ASSERT(notLimited == 0);
/* Check output limit */
if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
(int)length, (int)(oend - op));
return 1;
}
length = (size_t)(*ip - *anchor);
if ((limit) && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */
if (length >= RUN_MASK) {
size_t len = length - RUN_MASK;
*token = (RUN_MASK << ML_BITS);
for(; len >= 255 ; len -= 255) *op++ = 255;
*op++ = (BYTE)len;
for(; len >= 255 ; len -= 255) *(*op)++ = 255;
*(*op)++ = (BYTE)len;
} else {
*token = (BYTE)(length << ML_BITS);
}
/* Copy Literals */
LZ4_wildCopy8(op, anchor, op + length);
op += length;
LZ4_wildCopy8(*op, *anchor, (*op) + length);
*op += length;
/* Encode Offset */
assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
assert( (*ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2;
/* Encode MatchLength */
assert(matchLength >= MINMATCH);
length = (size_t)matchLength - MINMATCH;
if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
DEBUGLOG(6, "Not enough room to write match length");
return 1; /* Check output limit */
}
if ((limit) && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */
if (length >= ML_MASK) {
*token += ML_MASK;
length -= ML_MASK;
for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
if (length >= 255) { length -= 255; *op++ = 255; }
*op++ = (BYTE)length;
for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; }
if (length >= 255) { length -= 255; *(*op)++ = 255; }
*(*op)++ = (BYTE)length;
} else {
*token += (BYTE)(length);
}
/* Prepare next loop */
ip += matchLength;
anchor = ip;
*ip += matchLength;
*anchor = *ip;
return 0;
}
#undef ip
#undef op
#undef anchor
LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
LZ4HC_CCtx_internal* const ctx,
@@ -555,7 +483,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
char* const dest,
int* srcSizePtr,
int const maxOutputSize,
int maxNbAttempts,
unsigned maxNbAttempts,
const limitedOutput_directive limit,
const dictCtx_directive dict
)
@@ -585,7 +513,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
/* init */
*srcSizePtr = 0;
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
/* Main Loop */
while (ip <= mflimit) {
@@ -657,11 +585,7 @@ _Search3:
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
ip = start2;
optr = op;
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) {
ml = ml2;
ref = ref2;
goto _dest_overflow;
}
if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) goto _dest_overflow;
continue;
}
@@ -733,18 +657,17 @@ _Search3:
_last_literals:
/* Encode Last Literals */
{ size_t lastRunSize = (size_t)(iend - anchor); /* literals */
size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
size_t const totalSize = 1 + llAdd + lastRunSize;
size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
size_t const totalSize = 1 + litLength + lastRunSize;
if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
if (limit && (op + totalSize > oend)) {
if (limit == limitedOutput) return 0;
if (limit == limitedOutput) return 0; /* Check output limit */
/* adapt lastRunSize to fill 'dest' */
lastRunSize = (size_t)(oend - op) - 1 /*token*/;
llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
lastRunSize -= llAdd;
lastRunSize = (size_t)(oend - op) - 1;
litLength = (lastRunSize + 255 - RUN_MASK) / 255;
lastRunSize -= litLength;
}
DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
ip = anchor + lastRunSize;
if (lastRunSize >= RUN_MASK) {
size_t accumulator = lastRunSize - RUN_MASK;
@@ -764,25 +687,9 @@ _last_literals:
_dest_overflow:
if (limit == fillOutput) {
/* Assumption : ip, anchor, ml and ref must be set correctly */
size_t const ll = (size_t)(ip - anchor);
size_t const ll_addbytes = (ll + 240) / 255;
size_t const ll_totalCost = 1 + ll_addbytes + ll;
BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
DEBUGLOG(6, "Last sequence overflowing");
op = optr; /* restore correct out pointer */
if (op + ll_totalCost <= maxLitPos) {
/* ll validated; now adjust match length */
size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
assert(maxMlSize < INT_MAX); assert(ml >= 0);
if ((size_t)ml > maxMlSize) ml = (int)maxMlSize;
if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) {
LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend);
} }
goto _last_literals;
}
/* compression failed */
return 0;
}
@@ -793,7 +700,7 @@ static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
int const nbSearches, size_t sufficient_len,
const limitedOutput_directive limit, int const fullUpdate,
const dictCtx_directive dict,
const HCfavor_e favorDecSpeed);
HCfavor_e favorDecSpeed);
LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
@@ -810,7 +717,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
typedef struct {
lz4hc_strat_e strat;
int nbSearches;
U32 nbSearches;
U32 targetLength;
} cParams_t;
static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
@@ -829,8 +736,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
{ lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
};
DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
ctx, src, *srcSizePtr, limit);
DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d)", ctx, src, *srcSizePtr);
if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */
if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
@@ -850,7 +756,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
assert(cParam.strat == lz4opt);
result = LZ4HC_compress_optimal(ctx,
src, dst, srcSizePtr, dstCapacity,
cParam.nbSearches, cParam.targetLength, limit,
(int)cParam.nbSearches, cParam.targetLength, limit,
cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */
dict, favor);
}
@@ -923,22 +829,27 @@ LZ4HC_compress_generic (
int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); }
#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
* it reports an aligment of 8-bytes,
* while actually aligning LZ4_streamHC_t on 4 bytes. */
static size_t LZ4_streamHC_t_alignment(void)
{
#if LZ4_ALIGN_TEST
typedef struct { char c; LZ4_streamHC_t t; } t_a;
return sizeof(t_a) - sizeof(LZ4_streamHC_t);
#else
return 1; /* effectively disabled */
#endif
struct { char c; LZ4_streamHC_t t; } t_a;
return sizeof(t_a) - sizeof(t_a.t);
}
#endif
/* state is presumed correctly initialized,
* in which case its size and alignment have already been validate */
int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
{
LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0;
#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
* it reports an aligment of 8-bytes,
* while actually aligning LZ4_streamHC_t on 4 bytes. */
assert(((size_t)state & (LZ4_streamHC_t_alignment() - 1)) == 0); /* check alignment */
#endif
if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */
LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
LZ4HC_init_internal (ctx, (const BYTE*)src);
if (dstCapacity < LZ4_compressBound(srcSize))
@@ -987,11 +898,10 @@ int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* s
/* allocation */
LZ4_streamHC_t* LZ4_createStreamHC(void)
{
LZ4_streamHC_t* const state =
(LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t));
if (state == NULL) return NULL;
LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT);
return state;
LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
if (LZ4_streamHCPtr==NULL) return NULL;
LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); /* full initialization, malloc'ed buffer can be full of garbage */
return LZ4_streamHCPtr;
}
int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
@@ -1006,16 +916,22 @@ int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size)
{
LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer;
/* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size);
/* check conditions */
if (buffer == NULL) return NULL;
if (size < sizeof(LZ4_streamHC_t)) return NULL;
if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL;
/* init */
{ LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse);
MEM_INIT(hcstate, 0, sizeof(*hcstate)); }
#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
* it reports an aligment of 8-bytes,
* while actually aligning LZ4_streamHC_t on 4 bytes. */
if (((size_t)buffer) & (LZ4_streamHC_t_alignment() - 1)) return NULL; /* alignment check */
#endif
/* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", LZ4_streamHCPtr, (unsigned)size);
/* end-base will trigger a clearTable on starting compression */
LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t)-1;
LZ4_streamHCPtr->internal_donotuse.base = NULL;
LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0;
LZ4_streamHCPtr->internal_donotuse.dirty = 0;
LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
return LZ4_streamHCPtr;
}
@@ -1060,7 +976,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
const char* dictionary, int dictSize)
{
LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize);
DEBUGLOG(4, "LZ4_loadDictHC(%p, %p, %d)", LZ4_streamHCPtr, dictionary, dictSize);
assert(LZ4_streamHCPtr != NULL);
if (dictSize > 64 KB) {
dictionary += (size_t)dictSize - 64 KB;
@@ -1096,20 +1012,16 @@ static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBl
ctxPtr->base = newBlock - ctxPtr->dictLimit;
ctxPtr->end = newBlock;
ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
/* cannot reference an extDict and a dictCtx at the same time */
ctxPtr->dictCtx = NULL;
}
static int
LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
const char* src, char* dst,
int* srcSizePtr, int dstCapacity,
limitedOutput_directive limit)
static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
const char* src, char* dst,
int* srcSizePtr, int dstCapacity,
limitedOutput_directive limit)
{
LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
LZ4_streamHCPtr, src, *srcSizePtr, limit);
DEBUGLOG(4, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d)",
LZ4_streamHCPtr, src, *srcSizePtr);
assert(ctxPtr != NULL);
/* auto-init if forgotten */
if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
@@ -1133,7 +1045,8 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
if (sourceEnd > dictEnd) sourceEnd = dictEnd;
ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
} }
}
}
return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
}
@@ -1153,30 +1066,23 @@ int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const ch
/* LZ4_saveDictHC :
* save history content
* into a user-provided buffer
* which is then used to continue compression
*/
/* dictionary saving */
int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
{
LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
assert(prefixSize >= 0);
DEBUGLOG(4, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
if (dictSize > 64 KB) dictSize = 64 KB;
if (dictSize < 4) dictSize = 0;
if (dictSize > prefixSize) dictSize = prefixSize;
if (safeBuffer == NULL) assert(dictSize == 0);
if (dictSize > 0)
memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
{ U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
streamPtr->end = (const BYTE*)safeBuffer + dictSize;
streamPtr->base = streamPtr->end - endIndex;
streamPtr->dictLimit = endIndex - (U32)dictSize;
streamPtr->lowLimit = endIndex - (U32)dictSize;
if (streamPtr->nextToUpdate < streamPtr->dictLimit)
streamPtr->nextToUpdate = streamPtr->dictLimit;
if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit;
}
return dictSize;
}
@@ -1326,13 +1232,8 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
const dictCtx_directive dict,
const HCfavor_e favorDecSpeed)
{
int retval = 0;
#define TRAILING_LITERALS 3
#ifdef LZ4HC_HEAPMODE
LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
#else
LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */
#endif
const BYTE* ip = (const BYTE*) source;
const BYTE* anchor = ip;
@@ -1342,19 +1243,15 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
BYTE* op = (BYTE*) dst;
BYTE* opSaved = (BYTE*) dst;
BYTE* oend = op + dstCapacity;
int ovml = MINMATCH; /* overflow - last sequence */
const BYTE* ovref = NULL;
/* init */
#ifdef LZ4HC_HEAPMODE
if (opt == NULL) goto _return_label;
#endif
DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
*srcSizePtr = 0;
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
/* Main Loop */
assert(ip - anchor < LZ4_MAX_INPUT_SIZE);
while (ip <= mflimit) {
int const llen = (int)(ip - anchor);
int best_mlen, best_off;
@@ -1368,11 +1265,8 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
int const firstML = firstMatch.len;
const BYTE* const matchPos = ip - firstMatch.off;
opSaved = op;
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */
ovml = firstML;
ovref = matchPos;
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */
goto _dest_overflow;
}
continue;
}
@@ -1514,7 +1408,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
best_off = opt[last_match_pos].off;
cur = last_match_pos - best_mlen;
encode: /* cur, last_match_pos, best_mlen, best_off must be set */
encode: /* cur, last_match_pos, best_mlen, best_off must be set */
assert(cur < LZ4_OPT_NUM);
assert(last_match_pos >= 1); /* == 1 when only one candidate */
DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
@@ -1544,31 +1438,25 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */
assert(ml >= MINMATCH);
assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
opSaved = op;
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */
ovml = ml;
ovref = ip - offset;
if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */
goto _dest_overflow;
} } }
} }
} /* while (ip <= mflimit) */
_last_literals:
_last_literals:
/* Encode Last Literals */
{ size_t lastRunSize = (size_t)(iend - anchor); /* literals */
size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
size_t const totalSize = 1 + llAdd + lastRunSize;
size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
size_t const totalSize = 1 + litLength + lastRunSize;
if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
if (limit && (op + totalSize > oend)) {
if (limit == limitedOutput) { /* Check output limit */
retval = 0;
goto _return_label;
}
if (limit == limitedOutput) return 0; /* Check output limit */
/* adapt lastRunSize to fill 'dst' */
lastRunSize = (size_t)(oend - op) - 1 /*token*/;
llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
lastRunSize -= llAdd;
lastRunSize = (size_t)(oend - op) - 1;
litLength = (lastRunSize + 255 - RUN_MASK) / 255;
lastRunSize -= litLength;
}
DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
ip = anchor + lastRunSize;
if (lastRunSize >= RUN_MASK) {
size_t accumulator = lastRunSize - RUN_MASK;
@@ -1584,37 +1472,14 @@ _last_literals:
/* End */
*srcSizePtr = (int) (((const char*)ip) - source);
retval = (int) ((char*)op-dst);
goto _return_label;
return (int) ((char*)op-dst);
_dest_overflow:
if (limit == fillOutput) {
/* Assumption : ip, anchor, ovml and ovref must be set correctly */
size_t const ll = (size_t)(ip - anchor);
size_t const ll_addbytes = (ll + 240) / 255;
size_t const ll_totalCost = 1 + ll_addbytes + ll;
BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved));
op = opSaved; /* restore correct out pointer */
if (op + ll_totalCost <= maxLitPos) {
/* ll validated; now adjust match length */
size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
assert(maxMlSize < INT_MAX); assert(ovml >= 0);
if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize;
if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend);
DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
} }
goto _last_literals;
}
_return_label:
#ifdef LZ4HC_HEAPMODE
FREEMEM(opt);
#endif
return retval;
}
_dest_overflow:
if (limit == fillOutput) {
op = opSaved; /* restore correct out pointer */
goto _last_literals;
}
return 0;
}
}
}

View File

@@ -38,6 +38,8 @@
/* note : lz4hc requires lz4.h/lz4.c for compilation */
#include "tracy_lz4.hpp" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
namespace tracy
{
/* --- Useful constants --- */
#define LZ4HC_CLEVEL_MIN 3
@@ -45,8 +47,6 @@
#define LZ4HC_CLEVEL_OPT_MIN 10
#define LZ4HC_CLEVEL_MAX 12
namespace tracy
{
/*-************************************
* Block Compression
@@ -196,32 +196,57 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in
#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
#include <stdint.h>
typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
struct LZ4HC_CCtx_internal
{
LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE];
LZ4_u16 chainTable[LZ4HC_MAXD];
const LZ4_byte* end; /* next block here to continue on current prefix */
const LZ4_byte* base; /* All index relative to this position */
const LZ4_byte* dictBase; /* alternate base for extDict */
LZ4_u32 dictLimit; /* below that point, need extDict */
LZ4_u32 lowLimit; /* below that point, no more dict */
LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */
short compressionLevel;
LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set,
otherwise, favor compression ratio */
LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */
uint32_t hashTable[LZ4HC_HASHTABLESIZE];
uint16_t chainTable[LZ4HC_MAXD];
const uint8_t* end; /* next block here to continue on current prefix */
const uint8_t* base; /* All index relative to this position */
const uint8_t* dictBase; /* alternate base for extDict */
uint32_t dictLimit; /* below that point, need extDict */
uint32_t lowLimit; /* below that point, no more dict */
uint32_t nextToUpdate; /* index from which to continue dictionary update */
short compressionLevel;
int8_t favorDecSpeed; /* favor decompression speed if this flag set,
otherwise, favor compression ratio */
int8_t dirty; /* stream has to be fully reset if this flag is set */
const LZ4HC_CCtx_internal* dictCtx;
};
#else
typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
struct LZ4HC_CCtx_internal
{
unsigned int hashTable[LZ4HC_HASHTABLESIZE];
unsigned short chainTable[LZ4HC_MAXD];
const unsigned char* end; /* next block here to continue on current prefix */
const unsigned char* base; /* All index relative to this position */
const unsigned char* dictBase; /* alternate base for extDict */
unsigned int dictLimit; /* below that point, need extDict */
unsigned int lowLimit; /* below that point, no more dict */
unsigned int nextToUpdate; /* index from which to continue dictionary update */
short compressionLevel;
char favorDecSpeed; /* favor decompression speed if this flag set,
otherwise, favor compression ratio */
char dirty; /* stream has to be fully reset if this flag is set */
const LZ4HC_CCtx_internal* dictCtx;
};
#endif
/* Do not use these definitions directly !
* Declare or allocate an LZ4_streamHC_t instead.
*/
#define LZ4_STREAMHCSIZE 262200 /* static size, for inter-version compatibility */
#define LZ4_STREAMHCSIZE_VOIDP (LZ4_STREAMHCSIZE / sizeof(void*))
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56 + ((sizeof(void*)==16) ? 56 : 0) /* AS400*/ ) /* 262200 or 262256*/
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
union LZ4_streamHC_u {
void* table[LZ4_STREAMHCSIZE_VOIDP];
size_t table[LZ4_STREAMHCSIZE_SIZET];
LZ4HC_CCtx_internal internal_donotuse;
}; /* previously typedef'd to LZ4_streamHC_t */
@@ -289,6 +314,7 @@ LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStre
*/
LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
}
#endif /* LZ4_HC_H_19834876238432 */
@@ -303,11 +329,8 @@ LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionL
* after successfull usage in real-life scenarios.
***************************************************/
#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
#ifndef TRACY_LZ4_HC_SLO_098092834
#define TRACY_LZ4_HC_SLO_098092834
#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */
#include "tracy_lz4.hpp"
#ifndef LZ4_HC_SLO_098092834
#define LZ4_HC_SLO_098092834
namespace tracy
{

255
common/tracy_sema.h Normal file
View File

@@ -0,0 +1,255 @@
// Copyright (c) 2015 Jeff Preshing
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#ifndef __TRACY_CPP11OM_SEMAPHORE_H__
#define __TRACY_CPP11OM_SEMAPHORE_H__
#include <atomic>
#include <cassert>
#if defined(__MACH__)
#include <mach/mach.h>
#elif defined(__unix__)
#include <semaphore.h>
#endif
namespace tracy
{
#if defined(_WIN32)
//---------------------------------------------------------
// Semaphore (Windows)
//---------------------------------------------------------
#ifndef MAXLONG
enum { MAXLONG = 0x7fffffff };
#endif
#ifndef INFINITE
enum { INFINITE = 0xFFFFFFFF };
#endif
#ifndef _WINDOWS_
typedef void* HANDLE;
extern "C" __declspec(dllimport) HANDLE __stdcall CreateSemaphoreA( void*, long, long, const char* );
extern "C" __declspec(dllimport) int __stdcall CloseHandle( HANDLE );
extern "C" __declspec(dllimport) unsigned long __stdcall WaitForSingleObject( HANDLE, unsigned long );
extern "C" __declspec(dllimport) int __stdcall ReleaseSemaphore( HANDLE, long, long* );
#endif
class Semaphore
{
private:
HANDLE m_hSema;
Semaphore(const Semaphore& other) = delete;
Semaphore& operator=(const Semaphore& other) = delete;
public:
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
m_hSema = CreateSemaphoreA(NULL, initialCount, MAXLONG, NULL);
}
~Semaphore()
{
CloseHandle(m_hSema);
}
void wait()
{
WaitForSingleObject(m_hSema, INFINITE);
}
void signal(int count = 1)
{
ReleaseSemaphore(m_hSema, count, NULL);
}
};
#elif defined(__MACH__)
//---------------------------------------------------------
// Semaphore (Apple iOS and OSX)
// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
//---------------------------------------------------------
class Semaphore
{
private:
semaphore_t m_sema;
Semaphore(const Semaphore& other) = delete;
Semaphore& operator=(const Semaphore& other) = delete;
public:
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
}
~Semaphore()
{
semaphore_destroy(mach_task_self(), m_sema);
}
void wait()
{
semaphore_wait(m_sema);
}
void signal()
{
semaphore_signal(m_sema);
}
void signal(int count)
{
while (count-- > 0)
{
semaphore_signal(m_sema);
}
}
};
#elif defined(__unix__)
//---------------------------------------------------------
// Semaphore (POSIX, Linux)
//---------------------------------------------------------
class Semaphore
{
private:
sem_t m_sema;
Semaphore(const Semaphore& other) = delete;
Semaphore& operator=(const Semaphore& other) = delete;
public:
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
sem_init(&m_sema, 0, initialCount);
}
~Semaphore()
{
sem_destroy(&m_sema);
}
void wait()
{
// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
int rc;
do
{
rc = sem_wait(&m_sema);
}
while (rc == -1 && errno == EINTR);
}
void signal()
{
sem_post(&m_sema);
}
void signal(int count)
{
while (count-- > 0)
{
sem_post(&m_sema);
}
}
};
#else
#error Unsupported platform!
#endif
//---------------------------------------------------------
// LightweightSemaphore
//---------------------------------------------------------
class LightweightSemaphore
{
private:
std::atomic<int> m_count;
Semaphore m_sema;
void waitWithPartialSpinning()
{
int oldCount;
// Is there a better way to set the initial spin count?
// If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
// as threads start hitting the kernel semaphore.
int spin = 10000;
while (spin--)
{
oldCount = m_count.load(std::memory_order_relaxed);
if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire))
return;
std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop.
}
oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
if (oldCount <= 0)
{
m_sema.wait();
}
}
public:
LightweightSemaphore(int initialCount = 0) : m_count(initialCount)
{
assert(initialCount >= 0);
}
bool tryWait()
{
int oldCount = m_count.load(std::memory_order_relaxed);
return (oldCount > 0 && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire));
}
void wait()
{
if (!tryWait())
waitWithPartialSpinning();
}
void signal(int count = 1)
{
int oldCount = m_count.fetch_add(count, std::memory_order_release);
int toRelease = -oldCount < count ? -oldCount : count;
if (toRelease > 0)
{
m_sema.signal(toRelease);
}
}
};
typedef LightweightSemaphore DefaultSemaphoreType;
}
#endif // __CPP11OM_SEMAPHORE_H__

View File

@@ -1,13 +0,0 @@
ARCH := $(shell uname -m)
ifeq (1,$(shell $(CC) --version | grep clang > /dev/null && echo 1 || echo 0))
ifeq (1,$(shell ld.mold --version > /dev/null 2> /dev/null && echo 1 || echo 0))
LDFLAGS := -fuse-ld=mold
endif
endif
ifndef TRACY_NO_ISA_EXTENSIONS
ifeq ($(ARCH),x86_64)
CFLAGS += -msse4.1
endif
endif

View File

@@ -1,19 +0,0 @@
ARCH := $(shell uname -m)
ifeq (0,$(shell $(CC) --version | grep clang > /dev/null && echo 1 || echo 0))
CFLAGS += -s
else
ifeq (1,$(shell ld.mold --version > /dev/null 2> /dev/null && echo 1 || echo 0))
LDFLAGS := -s -fuse-ld=mold
else
LDFLAGS := -s
endif
endif
ifndef TRACY_NO_ISA_EXTENSIONS
ifneq (,$(filter $(ARCH),aarch64 arm64))
CFLAGS += -mcpu=native
else
CFLAGS += -march=native
endif
endif

View File

@@ -1,82 +0,0 @@
# Common code needed by most Tracy Unix Makefiles.
# Ensure these are simply-substituted variables, without changing their values.
LIBS := $(LIBS)
# Tracy does not use TBB directly, but the implementation of parallel algorithms
# in some versions of libstdc++ depends on TBB. When it does, you must
# explicitly link against -ltbb.
#
# Some distributions have pgk-config files for TBB, others don't.
ifeq (0,$(shell pkg-config --libs tbb >/dev/null 2>&1; echo $$?))
LIBS += $(shell pkg-config --libs tbb)
else ifeq (0,$(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?))
LIBS += -ltbb
endif
OBJDIRBASE := obj/$(BUILD)
OBJDIR := $(OBJDIRBASE)/o/o/o
OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
OBJ3 := $(addprefix $(OBJDIR)/,$(SRC3:%.m=%.o))
OBJ4 := $(addprefix $(OBJDIR)/,$(SRC4:%.S=%.o))
all: $(IMAGE)
$(OBJDIR)/%.o: %.cpp
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.cpp
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
$(OBJDIR)/%.o: %.c
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.c
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
$(OBJDIR)/%.o: %.m
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.m
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
$(OBJDIR)/%.o: %.S
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.S
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
ifeq (yes,$(SHARED_LIBRARY))
$(IMAGE): $(OBJ) $(OBJ2) $(OBJ4)
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ4) $(LIBS) -shared -o $@
else
$(IMAGE): $(OBJ) $(OBJ2) $(OBJ3) $(OBJ4)
$(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ3) $(OBJ4) $(LIBS) -o $@
endif
ifneq "$(MAKECMDGOALS)" "clean"
-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) $(addprefix $(OBJDIR)/,$(SRC2:.c=.d)) $(addprefix $(OBJDIR)/,$(SRC3:.m=.d)) $(addprefix $(OBJDIR)/,$(SRC4:.S=.d))
endif
clean:
rm -rf $(OBJDIRBASE) $(IMAGE)*
.PHONY: clean all

View File

@@ -1,16 +0,0 @@
all: release
debug:
@+make -f debug.mk all
release:
@+make -f release.mk all
clean:
@+make -f build.mk clean
db: clean
@bear -- $(MAKE) -f debug.mk all
@mv -f compile_commands.json ../../../
.PHONY: all clean debug release db

View File

@@ -1,12 +0,0 @@
CFLAGS +=
CXXFLAGS := $(CFLAGS) -std=gnu++17
# DEFINES += -DTRACY_NO_STATISTICS
INCLUDES := $(shell pkg-config --cflags capstone)
LIBS := $(shell pkg-config --libs capstone) -lpthread
PROJECT := csvexport
IMAGE := $(PROJECT)-$(BUILD)
FILTER :=
include ../../../common/src-from-vcxproj.mk
include ../../../common/unix.mk

View File

@@ -1,6 +0,0 @@
CFLAGS := -g3 -Wall
DEFINES := -DDEBUG
BUILD := debug
include ../../../common/unix-debug.mk
include build.mk

View File

@@ -1,9 +0,0 @@
CFLAGS := -O3
ifndef TRACY_NO_LTO
CFLAGS += -flto
endif
DEFINES := -DNDEBUG
BUILD := release
include ../../../common/unix-release.mk
include build.mk

View File

@@ -1,25 +0,0 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30907.101
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "csvexport", "csvexport.vcxproj", "{447D58BF-94CD-4469-BB90-549C05D03E00}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.ActiveCfg = Debug|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.Build.0 = Debug|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.ActiveCfg = Release|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {3E51386C-43EA-44AC-9F24-AFAFE4D63ADE}
EndGlobalSection
EndGlobal

View File

@@ -1,206 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{447D58BF-94CD-4469-BB90-549C05D03E00}</ProjectGuid>
<RootNamespace>capture</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
<VcpkgTriplet>x64-windows-static</VcpkgTriplet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<PropertyGroup Label="Vcpkg">
<VcpkgEnableManifest>true</VcpkgEnableManifest>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<PreprocessorDefinitions>_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<LanguageStandard>stdcpplatest</LanguageStandard>
<AdditionalIncludeDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include;$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include\capstone;$(VcpkgManifestRoot)\vcpkg_installed\$(VcpkgTriplet)\$(VcpkgTriplet)\include\capstone;$(VcpkgRoot)\installed\$(VcpkgTriplet)\include\capstone</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\debug\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<LanguageStandard>stdcpplatest</LanguageStandard>
<AdditionalIncludeDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include;$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include\capstone;$(VcpkgManifestRoot)\vcpkg_installed\$(VcpkgTriplet)\$(VcpkgTriplet)\include\capstone;$(VcpkgRoot)\installed\$(VcpkgTriplet)\include\capstone</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\..\..\common\TracySocket.cpp" />
<ClCompile Include="..\..\..\common\TracyStackFrames.cpp" />
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp" />
<ClCompile Include="..\..\..\getopt\getopt.c" />
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
<ClCompile Include="..\..\..\server\TracyMmap.cpp" />
<ClCompile Include="..\..\..\server\TracyPrint.cpp" />
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp" />
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp" />
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp" />
<ClCompile Include="..\..\..\server\TracyWorker.cpp" />
<ClCompile Include="..\..\..\zstd\common\debug.c" />
<ClCompile Include="..\..\..\zstd\common\entropy_common.c" />
<ClCompile Include="..\..\..\zstd\common\error_private.c" />
<ClCompile Include="..\..\..\zstd\common\fse_decompress.c" />
<ClCompile Include="..\..\..\zstd\common\pool.c" />
<ClCompile Include="..\..\..\zstd\common\threading.c" />
<ClCompile Include="..\..\..\zstd\common\xxhash.c" />
<ClCompile Include="..\..\..\zstd\common\zstd_common.c" />
<ClCompile Include="..\..\..\zstd\compress\fse_compress.c" />
<ClCompile Include="..\..\..\zstd\compress\hist.c" />
<ClCompile Include="..\..\..\zstd\compress\huf_compress.c" />
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_compress.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_literals.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_sequences.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_superblock.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_double_fast.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_fast.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_lazy.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_ldm.c" />
<ClCompile Include="..\..\..\zstd\compress\zstd_opt.c" />
<ClCompile Include="..\..\..\zstd\decompress\huf_decompress.c" />
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c" />
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c" />
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
<ClCompile Include="..\..\src\csvexport.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
<ClInclude Include="..\..\..\common\TracyAlloc.hpp" />
<ClInclude Include="..\..\..\common\TracyColor.hpp" />
<ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
<ClInclude Include="..\..\..\common\TracyStackFrames.hpp" />
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp" />
<ClInclude Include="..\..\..\getopt\getopt.h" />
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
<ClInclude Include="..\..\..\server\TracyFileRead.hpp" />
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp" />
<ClInclude Include="..\..\..\server\TracyMemory.hpp" />
<ClInclude Include="..\..\..\server\TracyMmap.hpp" />
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp" />
<ClInclude Include="..\..\..\server\TracyPrint.hpp" />
<ClInclude Include="..\..\..\server\TracySlab.hpp" />
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp" />
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp" />
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp" />
<ClInclude Include="..\..\..\server\TracyVector.hpp" />
<ClInclude Include="..\..\..\server\TracyWorker.hpp" />
<ClInclude Include="..\..\..\zstd\common\bitstream.h" />
<ClInclude Include="..\..\..\zstd\common\compiler.h" />
<ClInclude Include="..\..\..\zstd\common\cpu.h" />
<ClInclude Include="..\..\..\zstd\common\debug.h" />
<ClInclude Include="..\..\..\zstd\common\error_private.h" />
<ClInclude Include="..\..\..\zstd\common\fse.h" />
<ClInclude Include="..\..\..\zstd\common\huf.h" />
<ClInclude Include="..\..\..\zstd\common\mem.h" />
<ClInclude Include="..\..\..\zstd\common\pool.h" />
<ClInclude Include="..\..\..\zstd\common\portability_macros.h" />
<ClInclude Include="..\..\..\zstd\common\threading.h" />
<ClInclude Include="..\..\..\zstd\common\xxhash.h" />
<ClInclude Include="..\..\..\zstd\common\zstd_deps.h" />
<ClInclude Include="..\..\..\zstd\common\zstd_internal.h" />
<ClInclude Include="..\..\..\zstd\common\zstd_trace.h" />
<ClInclude Include="..\..\..\zstd\compress\clevels.h" />
<ClInclude Include="..\..\..\zstd\compress\hist.h" />
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_internal.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_literals.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_sequences.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_superblock.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_cwksp.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_double_fast.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_fast.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_lazy.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm_geartab.h" />
<ClInclude Include="..\..\..\zstd\compress\zstd_opt.h" />
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h" />
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h" />
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h" />
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h" />
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h" />
<ClInclude Include="..\..\..\zstd\zdict.h" />
<ClInclude Include="..\..\..\zstd\zstd.h" />
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@@ -1,359 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="src">
<UniqueIdentifier>{729c80ee-4d26-4a5e-8f1f-6c075783eb56}</UniqueIdentifier>
</Filter>
<Filter Include="server">
<UniqueIdentifier>{cf23ef7b-7694-4154-830b-00cf053350ea}</UniqueIdentifier>
</Filter>
<Filter Include="common">
<UniqueIdentifier>{e39d3623-47cd-4752-8da9-3ea324f964c1}</UniqueIdentifier>
</Filter>
<Filter Include="getopt">
<UniqueIdentifier>{ee9737d2-69c7-44da-b9c7-539d18f9d4b4}</UniqueIdentifier>
</Filter>
<Filter Include="zstd">
<UniqueIdentifier>{44ea1742-0fd9-40ba-879c-031868509600}</UniqueIdentifier>
</Filter>
<Filter Include="zstd\common">
<UniqueIdentifier>{2d065bba-d78e-4e44-87f7-b44cf3248260}</UniqueIdentifier>
</Filter>
<Filter Include="zstd\compress">
<UniqueIdentifier>{a19ca3bc-2a17-49c5-a0d9-5916213de774}</UniqueIdentifier>
</Filter>
<Filter Include="zstd\decompress">
<UniqueIdentifier>{d4181058-2198-4931-ae31-b7eda0312458}</UniqueIdentifier>
</Filter>
<Filter Include="zstd\dictBuilder">
<UniqueIdentifier>{873c22fe-b4d7-480d-ad67-48271296f4c1}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\TracySocket.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\TracySystem.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyMemory.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyWorker.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyPrint.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyThreadCompress.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyTaskDispatch.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyMmap.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyTextureCompression.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\getopt\getopt.c">
<Filter>getopt</Filter>
</ClCompile>
<ClCompile Include="..\..\src\csvexport.cpp">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\debug.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\entropy_common.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\error_private.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\fse_decompress.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\pool.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\threading.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\xxhash.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\common\zstd_common.c">
<Filter>zstd\common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\decompress\huf_decompress.c">
<Filter>zstd\decompress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\decompress\zstd_ddict.c">
<Filter>zstd\decompress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress.c">
<Filter>zstd\decompress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\decompress\zstd_decompress_block.c">
<Filter>zstd\decompress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\fse_compress.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\hist.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\huf_compress.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_compress.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_literals.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_sequences.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_compress_superblock.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_double_fast.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_fast.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_lazy.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_ldm.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstd_opt.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\compress\zstdmt_compress.c">
<Filter>zstd\compress</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\dictBuilder\cover.c">
<Filter>zstd\dictBuilder</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c">
<Filter>zstd\dictBuilder</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c">
<Filter>zstd\dictBuilder</Filter>
</ClCompile>
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c">
<Filter>zstd\dictBuilder</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\TracyStackFrames.cpp">
<Filter>common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyAlloc.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyColor.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyForceInline.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyProtocol.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyQueue.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracySocket.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracySystem.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyEvent.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyMemory.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracySlab.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyVector.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyAlign.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyPrint.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyThreadCompress.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyTaskDispatch.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyFileRead.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyMmap.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyTextureCompression.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\getopt\getopt.h">
<Filter>getopt</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zstd_errors.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\bitstream.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\compiler.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\cpu.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\debug.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\error_private.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\fse.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\huf.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\mem.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\pool.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\threading.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\xxhash.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\zstd_deps.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\zstd_internal.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\zstd_trace.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\decompress\zstd_ddict.h">
<Filter>zstd\decompress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_block.h">
<Filter>zstd\decompress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\decompress\zstd_decompress_internal.h">
<Filter>zstd\decompress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\hist.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_internal.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_literals.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_sequences.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_compress_superblock.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_cwksp.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_double_fast.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_fast.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_lazy.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_ldm_geartab.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstd_opt.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\zstdmt_compress.h">
<Filter>zstd\compress</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\zdict.h">
<Filter>zstd</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\dictBuilder\cover.h">
<Filter>zstd\dictBuilder</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\dictBuilder\divsufsort.h">
<Filter>zstd\dictBuilder</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyStackFrames.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\common\portability_macros.h">
<Filter>zstd\common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\zstd\compress\clevels.h">
<Filter>zstd\compress</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S">
<Filter>zstd\decompress</Filter>
</None>
</ItemGroup>
</Project>

View File

@@ -1,311 +0,0 @@
#ifdef _WIN32
# include <windows.h>
#endif
#include <algorithm>
#include <cctype>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include "../../server/TracyFileRead.hpp"
#include "../../server/TracyWorker.hpp"
#include "../../getopt/getopt.h"
void print_usage_exit(int e)
{
fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
fprintf(stderr, "Usage:\n");
fprintf(stderr, " extract [OPTION...] <trace file>\n");
fprintf(stderr, "\n");
fprintf(stderr, " -h, --help Print usage\n");
fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n");
fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n");
fprintf(stderr, " -c, --case Case sensitive filtering\n");
fprintf(stderr, " -e, --self Get self times\n");
fprintf(stderr, " -u, --unwrap Report each zone event\n");
exit(e);
}
struct Args {
const char* filter;
const char* separator;
const char* trace_file;
bool case_sensitive;
bool self_time;
bool unwrap;
};
Args parse_args(int argc, char** argv)
{
if (argc == 1)
{
print_usage_exit(1);
}
Args args = { "", ",", "", false, false, false };
struct option long_opts[] = {
{ "help", no_argument, NULL, 'h' },
{ "filter", optional_argument, NULL, 'f' },
{ "sep", optional_argument, NULL, 's' },
{ "case", no_argument, NULL, 'c' },
{ "self", no_argument, NULL, 'e' },
{ "unwrap", no_argument, NULL, 'u' },
{ NULL, 0, NULL, 0 }
};
int c;
while ((c = getopt_long(argc, argv, "hf:s:ceu", long_opts, NULL)) != -1)
{
switch (c)
{
case 'h':
print_usage_exit(0);
break;
case 'f':
args.filter = optarg;
break;
case 's':
args.separator = optarg;
break;
case 'c':
args.case_sensitive = true;
break;
case 'e':
args.self_time = true;
break;
case 'u':
args.unwrap = true;
break;
default:
print_usage_exit(1);
break;
}
}
if (argc != optind + 1)
{
print_usage_exit(1);
}
args.trace_file = argv[optind];
return args;
}
bool is_substring(
const char* term,
const char* s,
bool case_sensitive = false
){
auto new_term = std::string(term);
auto new_s = std::string(s);
if (!case_sensitive) {
std::transform(
new_term.begin(),
new_term.end(),
new_term.begin(),
[](unsigned char c){ return std::tolower(c); }
);
std::transform(
new_s.begin(),
new_s.end(),
new_s.begin(),
[](unsigned char c){ return std::tolower(c); }
);
}
return new_s.find(new_term) != std::string::npos;
}
const char* get_name(int32_t id, const tracy::Worker& worker)
{
auto& srcloc = worker.GetSourceLocation(id);
return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function);
}
template <typename T>
std::string join(const T& v, const char* sep) {
std::ostringstream s;
for (const auto& i : v) {
if (&i != &v[0]) {
s << sep;
}
s << i;
}
return s.str();
}
// From TracyView.cpp
int64_t GetZoneChildTimeFast(
const tracy::Worker& worker,
const tracy::ZoneEvent& zone
){
int64_t time = 0;
if( zone.HasChildren() )
{
auto& children = worker.GetZoneChildren( zone.Child() );
if( children.is_magic() )
{
auto& vec = *(tracy::Vector<tracy::ZoneEvent>*)&children;
for( auto& v : vec )
{
assert( v.IsEndValid() );
time += v.End() - v.Start();
}
}
else
{
for( auto& v : children )
{
assert( v->IsEndValid() );
time += v->End() - v->Start();
}
}
}
return time;
}
int main(int argc, char** argv)
{
#ifdef _WIN32
if (!AttachConsole(ATTACH_PARENT_PROCESS))
{
AllocConsole();
SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07);
}
#endif
Args args = parse_args(argc, argv);
auto f = std::unique_ptr<tracy::FileRead>(
tracy::FileRead::Open(args.trace_file)
);
if (!f)
{
fprintf(stderr, "Could not open file %s\n", args.trace_file);
return 1;
}
auto worker = tracy::Worker(*f);
while (!worker.AreSourceLocationZonesReady())
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
auto& slz = worker.GetSourceLocationZones();
tracy::Vector<decltype(slz.begin())> slz_selected;
slz_selected.reserve(slz.size());
uint32_t total_cnt = 0;
for(auto it = slz.begin(); it != slz.end(); ++it)
{
if(it->second.total != 0)
{
++total_cnt;
if(args.filter[0] == '\0')
{
slz_selected.push_back_no_space_check(it);
}
else
{
auto name = get_name(it->first, worker);
if(is_substring(args.filter, name, args.case_sensitive))
{
slz_selected.push_back_no_space_check(it);
}
}
}
}
std::vector<const char*> columns;
if (args.unwrap)
{
columns = {
"name", "src_file", "src_line", "ns_since_start", "exec_time_ns"
};
}
else
{
columns = {
"name", "src_file", "src_line", "total_ns", "total_perc",
"counts", "mean_ns", "min_ns", "max_ns", "std_ns"
};
}
std::string header = join(columns, args.separator);
printf("%s\n", header.data());
const auto last_time = worker.GetLastTime();
for(auto& it : slz_selected)
{
std::vector<std::string> values(columns.size());
values[0] = get_name(it->first, worker);
const auto& srcloc = worker.GetSourceLocation(it->first);
values[1] = worker.GetString(srcloc.file);
values[2] = std::to_string(srcloc.line);
const auto& zone_data = it->second;
if (args.unwrap)
{
int i = 0;
for (const auto& zone_thread_data : zone_data.zones) {
const auto zone_event = zone_thread_data.Zone();
const auto start = zone_event->Start();
const auto end = zone_event->End();
values[3] = std::to_string(start);
auto timespan = end - start;
if (args.self_time) {
timespan -= GetZoneChildTimeFast(worker, *zone_event);
}
values[4] = std::to_string(timespan);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
else
{
const auto time = args.self_time ? zone_data.selfTotal : zone_data.total;
values[3] = std::to_string(time);
values[4] = std::to_string(100. * time / last_time);
values[5] = std::to_string(zone_data.zones.size());
const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
/ zone_data.zones.size();
values[6] = std::to_string(avg);
const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max;
values[7] = std::to_string(tmin);
values[8] = std::to_string(tmax);
const auto sz = zone_data.zones.size();
const auto ss = zone_data.sumSq
- 2. * zone_data.total * avg
+ avg * avg * sz;
const auto std = sqrt(ss / (sz - 1));
values[9] = std::to_string(std);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
return 0;
}

BIN
doc/compare.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
doc/histogram.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

BIN
doc/locks.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.3 KiB

BIN
doc/messages.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 KiB

BIN
doc/plot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 261 KiB

After

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 156 KiB

Some files were not shown because too many files have changed in this diff Show More