Compare commits

..

1 Commits

Author SHA1 Message Date
Bartosz Taudul
9618131ce9 Histogram zooming prototype. 2018-09-03 23:18:26 +02:00
548 changed files with 75519 additions and 472966 deletions

19
.appveyor.yml Normal file
View File

@@ -0,0 +1,19 @@
version: '{build}'
platform:
- x64
image:
- Visual Studio 2017
- Ubuntu
before_build:
- cmd: cd profiler\build\win32
- cmd: nuget restore
- cmd: cd ..\..\..
build_script:
- cmd: msbuild .\update\build\win32\update.vcxproj
- cmd: msbuild .\profiler\build\win32\Tracy.vcxproj
- cmd: msbuild .\capture\build\win32\capture.vcxproj
- sh: sudo apt-get update && sudo apt-get -y install libglfw3-dev libgtk2.0-dev
- sh: make -C update/build/unix debug release
- sh: make -C profiler/build/unix debug release
- sh: make -C capture/build/unix debug release
test: off

View File

@@ -1,62 +0,0 @@
---
Checks:
'
clang-diagnostic-*,
clang-analyzer-*,
bugprone-*,
google-*,
misc-*,
modernize-*,
performance-*,
readability-*,
-bugprone-easily-swappable-parameters,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions,
-bugprone-reserved-identifier,
-google-readability-braces-around-statements,
-google-readability-casting,
-google-readability-function-size,
-google-readability-todo,
-google-readability-namespace-comments,
-misc-confusable-identifiers,
-misc-no-recursion,
-modernize-avoid-c-arrays,
-modernize-deprecated-headers,
-modernize-use-default-member-init,
-modernize-use-trailing-return-type,
-performance-no-int-to-ptr,
-readability-braces-around-statements,
-readability-else-after-return,
-readability-function-cognitive-complexity,
-readability-identifier-length,
-readability-implicit-bool-conversion,
-readability-isolate-declaration,
-readability-magic-numbers,
-readability-qualified-auto,
-readability-uppercase-literal-suffix
'
WarningsAsErrors: ''
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
FormatStyle: none
CheckOptions:
llvm-else-after-return.WarnOnConditionVariables: 'false'
modernize-loop-convert.MinConfidence: reasonable
modernize-replace-auto-ptr.IncludeStyle: llvm
modernize-pass-by-value.IncludeStyle: llvm
google-readability-namespace-comments.ShortNamespaceLines: '10'
google-readability-namespace-comments.SpacesBeforeComments: '2'
cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: 'true'
google-readability-braces-around-statements.ShortStatementLines: '1'
cert-err33-c.CheckedFunctions: '::aligned_alloc;::asctime_s;::at_quick_exit;::atexit;::bsearch;::bsearch_s;::btowc;::c16rtomb;::c32rtomb;::calloc;::clock;::cnd_broadcast;::cnd_init;::cnd_signal;::cnd_timedwait;::cnd_wait;::ctime_s;::fclose;::fflush;::fgetc;::fgetpos;::fgets;::fgetwc;::fopen;::fopen_s;::fprintf;::fprintf_s;::fputc;::fputs;::fputwc;::fputws;::fread;::freopen;::freopen_s;::fscanf;::fscanf_s;::fseek;::fsetpos;::ftell;::fwprintf;::fwprintf_s;::fwrite;::fwscanf;::fwscanf_s;::getc;::getchar;::getenv;::getenv_s;::gets_s;::getwc;::getwchar;::gmtime;::gmtime_s;::localtime;::localtime_s;::malloc;::mbrtoc16;::mbrtoc32;::mbsrtowcs;::mbsrtowcs_s;::mbstowcs;::mbstowcs_s;::memchr;::mktime;::mtx_init;::mtx_lock;::mtx_timedlock;::mtx_trylock;::mtx_unlock;::printf_s;::putc;::putwc;::raise;::realloc;::remove;::rename;::scanf;::scanf_s;::setlocale;::setvbuf;::signal;::snprintf;::snprintf_s;::sprintf;::sprintf_s;::sscanf;::sscanf_s;::strchr;::strerror_s;::strftime;::strpbrk;::strrchr;::strstr;::strtod;::strtof;::strtoimax;::strtok;::strtok_s;::strtol;::strtold;::strtoll;::strtoul;::strtoull;::strtoumax;::strxfrm;::swprintf;::swprintf_s;::swscanf;::swscanf_s;::thrd_create;::thrd_detach;::thrd_join;::thrd_sleep;::time;::timespec_get;::tmpfile;::tmpfile_s;::tmpnam;::tmpnam_s;::tss_create;::tss_get;::tss_set;::ungetc;::ungetwc;::vfprintf;::vfprintf_s;::vfscanf;::vfscanf_s;::vfwprintf;::vfwprintf_s;::vfwscanf;::vfwscanf_s;::vprintf_s;::vscanf;::vscanf_s;::vsnprintf;::vsnprintf_s;::vsprintf;::vsprintf_s;::vsscanf;::vsscanf_s;::vswprintf;::vswprintf_s;::vswscanf;::vswscanf_s;::vwprintf_s;::vwscanf;::vwscanf_s;::wcrtomb;::wcschr;::wcsftime;::wcspbrk;::wcsrchr;::wcsrtombs;::wcsrtombs_s;::wcsstr;::wcstod;::wcstof;::wcstoimax;::wcstok;::wcstok_s;::wcstol;::wcstold;::wcstoll;::wcstombs;::wcstombs_s;::wcstoul;::wcstoull;::wcstoumax;::wcsxfrm;::wctob;::wctrans;::wctype;::wmemchr;::wprintf_s;::wscanf;::wscanf_s;'
modernize-loop-convert.MaxCopySize: '16'
cert-dcl16-c.NewSuffixes: 'L;LL;LU;LLU'
cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField: 'false'
cert-str34-c.DiagnoseSignedUnsignedCharComparisons: 'false'
modernize-use-nullptr.NullMacros: 'NULL'
llvm-qualified-auto.AddConstToQualified: 'false'
modernize-loop-convert.NamingStyle: CamelCase
llvm-else-after-return.WarnOnUnfixable: 'false'
google-readability-function-size.StatementThreshold: '800'
...

1
.github/FUNDING.yml vendored
View File

@@ -1 +0,0 @@
github: wolfpld

BIN
.github/sponsor.png vendored

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

View File

@@ -1,82 +0,0 @@
name: build
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build:
strategy:
matrix:
os: [ windows-latest, macos-latest ]
runs-on: ${{ matrix.os }}
continue-on-error: true
steps:
- uses: actions/checkout@v4
- if: startsWith(matrix.os, 'windows')
uses: microsoft/setup-msbuild@v2
- if: startsWith(matrix.os, 'windows')
uses: actions/setup-python@v2
with:
python-version: '3.x'
- if: startsWith(matrix.os, 'windows')
run: pip install meson ninja
- if: startsWith(matrix.os, 'macos')
name: Install macos dependencies
run: brew install pkg-config glfw meson
- name: Profiler GUI
run: |
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release
cmake --build profiler/build --parallel --config Release
- name: Update utility
run: |
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release
cmake --build update/build --parallel --config Release
- name: Capture utility
run: |
cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release
cmake --build capture/build --parallel --config Release
- name: Csvexport utility
run: |
cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release
cmake --build csvexport/build --parallel --config Release
- name: Import-chrome utility
run: |
cmake -B import-chrome/build -S import-chrome -DCMAKE_BUILD_TYPE=Release
cmake --build import-chrome/build --parallel --config Release
- name: Import-fuchsia utility
run: |
cmake -B import-fuchsia/build -S import-fuchsia -DCMAKE_BUILD_TYPE=Release
cmake --build import-fuchsia/build --parallel --config Release
- if: ${{ !startsWith(matrix.os, 'windows') }}
name: Library
run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib build && meson compile -C build && meson install -C build
- if: ${{ !startsWith(matrix.os, 'windows') }}
name: Find Artifacts
id: find_artifacts
run: |
mkdir -p bin
cp profiler/build/tracy-profiler bin
cp update/build/tracy-update bin
cp capture/build/tracy-capture bin
cp csvexport/build/tracy-csvexport bin
cp import-chrome/build/tracy-import-chrome bin
cp import-fuchsia/build/tracy-import-fuchsia bin
- if: startsWith(matrix.os, 'windows')
name: Find Artifacts
id: find_artifacts_windows
run: |
mkdir bin
copy profiler\build\Release\tracy-profiler.exe bin
copy update\build\Release\tracy-update.exe bin
copy capture\build\Release\tracy-capture.exe bin
copy csvexport\build\Release\tracy-csvexport.exe bin
copy import-chrome\build\Release\tracy-import-chrome.exe bin
copy import-fuchsia\build\Release\tracy-import-fuchsia.exe bin
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.os }}
path: bin

View File

@@ -1,27 +0,0 @@
name: Manual
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Fix stupidity
run: |
cp LICENSE LICENSE.
- name: Compile LaTeX
uses: xu-cheng/latex-action@v3
with:
working_directory: manual
root_file: tracy.tex
- uses: actions/upload-artifact@v4
with:
name: manual
path: manual/tracy.pdf

View File

@@ -1,79 +0,0 @@
name: linux
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
container: archlinux:base-devel
steps:
- name: Install dependencies
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 tbb debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
- uses: actions/checkout@v4
- name: Profiler GUI
run: |
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release
cmake --build profiler/build --parallel
- name: Update utility
run: |
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release
cmake --build update/build --parallel
- name: Capture utility
run: |
cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release
cmake --build capture/build --parallel
- name: Csvexport utility
run: |
cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release
cmake --build csvexport/build --parallel
- name: Import-chrome utility
run: |
cmake -B import-chrome/build -S import-chrome -DCMAKE_BUILD_TYPE=Release
cmake --build import-chrome/build --parallel
- name: Import-fuchsia utility
run: |
cmake -B import-fuchsia/build -S import-fuchsia -DCMAKE_BUILD_TYPE=Release
cmake --build import-fuchsia/build --parallel
- name: Library
run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib build && meson compile -C build && meson install -C build
- name: Test application
run: |
# test compilation with different flags
# we clean the build folder to reset cached variables between runs
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release
cmake --build test/build --parallel
rm -rf test/build
# same with TRACY_ON_DEMAND
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_ON_DEMAND=ON .
cmake --build test/build --parallel
rm -rf test/build
# same with TRACY_DELAYED_INIT TRACY_MANUAL_LIFETIME
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DELAYED_INIT=ON -DTRACY_MANUAL_LIFETIME=ON .
cmake --build test/build --parallel
rm -rf test/build
# same with TRACY_DEMANGLE
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DEMANGLE=ON .
cmake --build test/build --parallel
rm -rf test/build
- name: Find Artifacts
id: find_artifacts
run: |
mkdir -p bin
cp profiler/build/tracy-profiler bin
cp update/build/tracy-update bin
cp capture/build/tracy-capture bin
cp csvexport/build/tracy-csvexport bin
cp import-chrome/build/tracy-import-chrome bin
cp import-fuchsia/build/tracy-import-fuchsia bin
strip bin/tracy-*
- uses: actions/upload-artifact@v4
with:
name: arch-linux
path: bin

39
.gitignore vendored
View File

@@ -1,36 +1,21 @@
.vs
_build
_compiler
tools/*
*.opendb
*.db
*.vcxproj.user
x64
Release
Debug
*.d
*.o
*.so
*.swp
*.obj
imgui.ini
test/tracy_test
test/tracy_test.exe
*/build/unix/*-*
manual/t*.aux
manual/t*.log
manual/t*.out
manual/t*.pdf
manual/t*.synctex.gz
manual/t*.toc
manual/t*.bbl
manual/t*.blg
manual/t*.fdb_latexmk
manual/t*.fls
manual/tracy.aux
manual/tracy.log
manual/tracy.out
manual/tracy.pdf
manual/tracy.synctex.gz
manual/tracy.toc
profiler/build/win32/packages
profiler/build/win32/Tracy.aps
.deps/
.dirstamp
/_*/**
/**/__pycache__/**
extra/vswhere.exe
extra/tracy-build
.cache
compile_commands.json
profiler/build/wasm/Tracy-release.*
profiler/build/wasm/Tracy-debug.*
profiler/build/wasm/embed.tracy

View File

@@ -1,7 +0,0 @@
{
"recommendations": [
"llvm-vs-code-extensions.vscode-clangd",
"vadimcn.vscode-lldb",
"ms-vscode.cmake-tools"
]
}

14
.vscode/launch.json vendored
View File

@@ -1,14 +0,0 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Launch",
"type": "lldb",
"request": "launch",
"program": "${command:cmake.launchTargetPath}",
"args": [],
"cwd": "${workspaceFolder}",
"terminal": "console"
}
]
}

21
.vscode/settings.json vendored
View File

@@ -1,21 +0,0 @@
{
"cmake.configureOnOpen": true,
"cmake.sourceDirectory": [
"${workspaceFolder}/profiler",
"${workspaceFolder}/capture",
"${workspaceFolder}/csvexport",
"${workspaceFolder}/import-chrome",
"${workspaceFolder}/import-fuchsia",
"${workspaceFolder}/update",
"${workspaceFolder}/test",
"${workspaceFolder}",
],
"cmake.buildDirectory": "${sourceDirectory}/build",
"cmake.autoSelectActiveFolder": false,
"cmake.options.advanced": {
"folder": { "statusBarVisibility": "visible" },
"variant": { "statusBarVisibility": "compact" }
},
"cmake.copyCompileCommands": "${workspaceFolder}/compile_commands.json",
"lldb.launch.initCommands": ["command script import ${workspaceRoot}/extra/natvis.py"],
}

6
AUTHORS Normal file
View File

@@ -0,0 +1,6 @@
Bartosz Taudul <wolf.pld@gmail.com>
Kamil Klimek <kamil.klimek@sharkbits.com>
Bartosz Szreder <zgredder@gmail.com>
Arvid Gerstmann <dev@arvid-g.de>
Rokas Kupstys <rokups@zoho.com>
Till Rathmann <till.rathmann@gmx.de>

View File

@@ -1,181 +0,0 @@
cmake_minimum_required(VERSION 3.10)
# Run version helper script
include(cmake/version.cmake)
project(Tracy LANGUAGES CXX VERSION ${TRACY_VERSION_STRING})
file(GENERATE OUTPUT .gitignore CONTENT "*")
if(${BUILD_SHARED_LIBS})
set(DEFAULT_STATIC OFF)
else()
set(DEFAULT_STATIC ON)
endif()
option(TRACY_STATIC "Whether to build Tracy as a static library" ${DEFAULT_STATIC})
find_package(Threads REQUIRED)
set(TRACY_PUBLIC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/public)
if(TRACY_STATIC)
set(TRACY_VISIBILITY "STATIC")
else()
set(TRACY_VISIBILITY "SHARED")
endif()
add_library(TracyClient ${TRACY_VISIBILITY} "${TRACY_PUBLIC_DIR}/TracyClient.cpp")
target_compile_features(TracyClient PUBLIC cxx_std_11)
target_include_directories(TracyClient SYSTEM PUBLIC
$<BUILD_INTERFACE:${TRACY_PUBLIC_DIR}>
$<INSTALL_INTERFACE:include>)
target_link_libraries(
TracyClient
PUBLIC
Threads::Threads
${CMAKE_DL_LIBS}
)
# Public dependency on some libraries required when using Mingw
if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} MATCHES "GNU")
target_link_libraries(TracyClient PUBLIC ws2_32 dbghelp)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
find_library(EXECINFO_LIBRARY NAMES execinfo REQUIRED)
target_link_libraries(TracyClient PUBLIC ${EXECINFO_LIBRARY})
endif()
add_library(Tracy::TracyClient ALIAS TracyClient)
macro(set_option option help value)
option(${option} ${help} ${value})
if(${option})
message(STATUS "${option}: ON")
target_compile_definitions(TracyClient PUBLIC ${option})
else()
message(STATUS "${option}: OFF")
endif()
endmacro()
set_option(TRACY_ENABLE "Enable profiling" ON)
set_option(TRACY_ON_DEMAND "On-demand profiling" OFF)
set_option(TRACY_CALLSTACK "Enforce callstack collection for tracy regions" OFF)
set_option(TRACY_NO_CALLSTACK "Disable all callstack related functionality" OFF)
set_option(TRACY_NO_CALLSTACK_INLINES "Disables the inline functions in callstacks" OFF)
set_option(TRACY_ONLY_LOCALHOST "Only listen on the localhost interface" OFF)
set_option(TRACY_NO_BROADCAST "Disable client discovery by broadcast to local network" OFF)
set_option(TRACY_ONLY_IPV4 "Tracy will only accept connections on IPv4 addresses (disable IPv6)" OFF)
set_option(TRACY_NO_CODE_TRANSFER "Disable collection of source code" OFF)
set_option(TRACY_NO_CONTEXT_SWITCH "Disable capture of context switches" OFF)
set_option(TRACY_NO_EXIT "Client executable does not exit until all profile data is sent to server" OFF)
set_option(TRACY_NO_SAMPLING "Disable call stack sampling" OFF)
set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF)
set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF)
set_option(TRACY_NO_FRAME_IMAGE "Disable the frame image support and its thread" OFF)
set_option(TRACY_NO_SYSTEM_TRACING "Disable systrace sampling" OFF)
set_option(TRACY_PATCHABLE_NOPSLEDS "Enable nopsleds for efficient patching by system-level tools (e.g. rr)" OFF)
set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF)
set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF)
set_option(TRACY_FIBERS "Enable fibers support" OFF)
set_option(TRACY_NO_CRASH_HANDLER "Disable crash handling" OFF)
set_option(TRACY_TIMER_FALLBACK "Use lower resolution timers" OFF)
set_option(TRACY_LIBUNWIND_BACKTRACE "Use libunwind backtracing where supported" OFF)
set_option(TRACY_SYMBOL_OFFLINE_RESOLVE "Instead of full runtime symbol resolution, only resolve the image path and offset to enable offline symbol resolution" OFF)
set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF)
# advanced
set_option(TRACY_DEMANGLE "[advanced] Don't use default demangling function - You'll need to provide your own" OFF)
mark_as_advanced(TRACY_DEMANGLE)
if(NOT TRACY_STATIC)
target_compile_definitions(TracyClient PRIVATE TRACY_EXPORTS)
target_compile_definitions(TracyClient PUBLIC TRACY_IMPORTS)
endif()
include(CMakePackageConfigHelpers)
include(GNUInstallDirs)
set_target_properties(TracyClient PROPERTIES VERSION ${PROJECT_VERSION})
set(tracy_includes
${TRACY_PUBLIC_DIR}/tracy/TracyC.h
${TRACY_PUBLIC_DIR}/tracy/Tracy.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyD3D11.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyD3D12.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyLua.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyOpenCL.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyOpenGL.hpp
${TRACY_PUBLIC_DIR}/tracy/TracyVulkan.hpp)
set(client_includes
${TRACY_PUBLIC_DIR}/client/tracy_concurrentqueue.h
${TRACY_PUBLIC_DIR}/client/tracy_rpmalloc.hpp
${TRACY_PUBLIC_DIR}/client/tracy_SPSCQueue.h
${TRACY_PUBLIC_DIR}/client/TracyKCore.hpp
${TRACY_PUBLIC_DIR}/client/TracyArmCpuTable.hpp
${TRACY_PUBLIC_DIR}/client/TracyCallstack.h
${TRACY_PUBLIC_DIR}/client/TracyCallstack.hpp
${TRACY_PUBLIC_DIR}/client/TracyCpuid.hpp
${TRACY_PUBLIC_DIR}/client/TracyDebug.hpp
${TRACY_PUBLIC_DIR}/client/TracyDxt1.hpp
${TRACY_PUBLIC_DIR}/client/TracyFastVector.hpp
${TRACY_PUBLIC_DIR}/client/TracyLock.hpp
${TRACY_PUBLIC_DIR}/client/TracyProfiler.hpp
${TRACY_PUBLIC_DIR}/client/TracyRingBuffer.hpp
${TRACY_PUBLIC_DIR}/client/TracyScoped.hpp
${TRACY_PUBLIC_DIR}/client/TracyStringHelpers.hpp
${TRACY_PUBLIC_DIR}/client/TracySysPower.hpp
${TRACY_PUBLIC_DIR}/client/TracySysTime.hpp
${TRACY_PUBLIC_DIR}/client/TracySysTrace.hpp
${TRACY_PUBLIC_DIR}/client/TracyThread.hpp)
set(common_includes
${TRACY_PUBLIC_DIR}/common/tracy_lz4.hpp
${TRACY_PUBLIC_DIR}/common/tracy_lz4hc.hpp
${TRACY_PUBLIC_DIR}/common/TracyAlign.hpp
${TRACY_PUBLIC_DIR}/common/TracyAlloc.hpp
${TRACY_PUBLIC_DIR}/common/TracyApi.h
${TRACY_PUBLIC_DIR}/common/TracyColor.hpp
${TRACY_PUBLIC_DIR}/common/TracyForceInline.hpp
${TRACY_PUBLIC_DIR}/common/TracyMutex.hpp
${TRACY_PUBLIC_DIR}/common/TracyProtocol.hpp
${TRACY_PUBLIC_DIR}/common/TracyQueue.hpp
${TRACY_PUBLIC_DIR}/common/TracySocket.hpp
${TRACY_PUBLIC_DIR}/common/TracyStackFrames.hpp
${TRACY_PUBLIC_DIR}/common/TracySystem.hpp
${TRACY_PUBLIC_DIR}/common/TracyUwp.hpp
${TRACY_PUBLIC_DIR}/common/TracyYield.hpp)
install(TARGETS TracyClient
EXPORT TracyConfig
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(FILES ${tracy_includes}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tracy)
install(FILES ${client_includes}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/client)
install(FILES ${common_includes}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common)
install(EXPORT TracyConfig
NAMESPACE Tracy::
FILE TracyTargets.cmake
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/Tracy)
include(CMakePackageConfigHelpers)
configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/TracyConfig.cmake"
INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/Tracy)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TracyConfig.cmake
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/Tracy)
option(TRACY_CLIENT_PYTHON "Whether to build Tracy python client library" OFF)
if(TRACY_CLIENT_PYTHON)
if(TRACY_STATIC)
message(FATAL_ERROR "Python-bindings require a shared client library")
endif()
add_subdirectory(python)
endif()

View File

@@ -1,6 +0,0 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(Threads REQUIRED)
include("${CMAKE_CURRENT_LIST_DIR}/TracyTargets.cmake")

77
FAQ.md Normal file
View File

@@ -0,0 +1,77 @@
# A quick tracy FAQ
### I already use VTune/perf/Very Sleepy/callgrind/MSVC profiler.
These are statistical profilers, which can be used to find hot spots in the code. This is very useful, but it won't show you the underlying reason for semi-random frame stutter that may occur every couple of seconds.
### You can use Telemetry for that.
Telemetry license costs about 8000 $ per year. Tracy is open source software. Telemetry doesn't have Lua bindings.
### You can use the free Brofiler. Crytek does use it, so it has to be good.
After a cursory look at the Brofiler code I can tell that the timer resolution there is at 300 ns. Tracy can achieve 5 ns timer resolution. Brofiler event logging infrastructure seems to be over-engineered. Brofiler can't track lock contention, nor does it have Lua bindings.
### So tracy is supposedly faster?
My measurements show that logging a single zone with tracy takes only 15 ns. In theory, if the program was doing nothing else, tracy should be able to log 66 million zones per second.
### Bullshit, RAD is advertising that they are able only to log about a million zones, over the network nevertheless: "Capture over a million timing zones per second in real-time!"
Tracy can perform network transfer of 15 million zones per second. Should the client and server be on separate machines, this number will be even higher, but you will need more than a gigabit link to achieve the maximum throughput. [Click here for a video of a max-throughput capture.](https://www.youtube.com/watch?v=DSMIHShKGAc)
### Can I connect to my application at any time and start profiling at this moment?
By default no, all events are registered from the beginning of program execution and are waiting in a queue. There's a separate on-demand mode, enabled by using a `TRACY_ON_DEMAND` macro.
### Am I seeing correctly that the profiler allocates one gigabyte of memory per second?
Only in extreme cases. Normal usage has much lower memory pressure.
### Why do you do magic with the static initialization order? Everyone says that's a bad practice.
It allows tracking construction of static objects and memory allocations performed before main() is entered.
### There's no support for consoles.
Welp. But there's mobile support.
### I do need console support.
The code is open. Write your own, then send a patch.
### I don't believe you can capture a zone in 15 ns. Show me the code!
Following is the annotated assembly code (generated from C++ sources) that's responsible for logging start of the zone:
```
call qword ptr [__imp_GetCurrentThreadId]
mov r14d,eax
mov qword ptr [rsp+0F0h],r14 // save thread id for later use
mov r12d,10h
mov rax,qword ptr gs:[58h] // TLS
mov r15,qword ptr [rax] // queue address
mov rdi,qword ptr [r12+r15] // data address
mov rbp,qword ptr [rdi+20h] // buffer counter
mov rbx,rbp
and ebx,7Fh // 128 item buffer
jne Application::InnerLoop+66h --+
mov rdx,rbp |
mov rcx,rdi |
call enqueue_begin_alloc | // reclaim/alloc next buffer
shl rbx,5 <---------------------+ // buffer items are 32 bytes
add rbx,qword ptr [rdi+40h]
mov byte ptr [rbx],4 // queue item type
rdtscp
mov dword ptr [rbx+19h],ecx // cpu id
shl rdx,20h
or rax,rdx // 64 bit timestamp
mov qword ptr [rbx+1],rax
mov qword ptr [rbx+9],r14 // thread id
lea rax,[__tracy_source_location] // static struct address
mov qword ptr [rbx+11h],rax
lea rax,[rbp+1] // increment buffer counter
mov qword ptr [rdi+20h],rax
```
There's also a second code block, for the end of the zone. It's similar, but a bit smaller, as it can use some of the variables that were retrieved above.

View File

@@ -1,7 +1,7 @@
Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the
Tracy Profiler (https://bitbucket.org/wolfpld/tracy) is licensed under the
3-clause BSD license.
Copyright (c) 2017-2024, Bartosz Taudul <wolf@nereid.pl>
Copyright (c) 2017-2018, Bartosz Taudul <wolf.pld@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without

995
NEWS

File diff suppressed because it is too large Load Diff

View File

@@ -1,28 +1,68 @@
# Tracy Profiler
[![Sponsor](.github/sponsor.png)](https://github.com/sponsors/wolfpld/)
[![Build status](https://ci.appveyor.com/api/projects/status/968a88arq06gm3el/branch/master?svg=true)](https://ci.appveyor.com/project/wolfpld/tracy/branch/master)
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
Tracy supports profiling CPU (Direct support is provided for C, C++, Lua and Python integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/nektro/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, OpenCL.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
- [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
- [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
- [Changelog](NEWS)
- [Interactive demo](https://tracy.nereid.pl/)
Tracy is a real time, nanosecond resolution frame profiler that can be used for remote or embedded telemetry of your application. It can profile CPU (C++, Lua), GPU (OpenGL, Vulkan) and memory. It also can display locks held by threads and their interactions with each other.
![](doc/profiler.png)
![](doc/profiler2.png)
Tracy requires compiler support for C++11, Thread Local Storage and a way to workaround static initialization order fiasco. There are no other requirements. The following platforms are confirmed to be working (this is not a complete list):
![](doc/profiler3.png)
- Windows (x86, x64)
- Linux (x86, x64, ARM, ARM64)
- Android (ARM, x86)
- FreeBSD (x64)
- Cygwin (x64)
- WSL (x64)
- OSX (x64)
[An Introduction to Tracy Profiler in C++ - Marcos Slomp - CppCon 2023](https://youtu.be/ghXk3Bk5F2U?t=37)
The following compilers are supported:
- MSVC
- gcc
- clang
[Introduction to Tracy Profiler v0.2](https://www.youtube.com/watch?v=fB5B46lbapc)
[New features in Tracy Profiler v0.3](https://www.youtube.com/watch?v=3SXpDpDh2Uo)
[New features in Tracy Profiler v0.4](https://www.youtube.com/watch?v=eAkgkaO8B9o)
[New features in Tracy Profiler v0.5](https://www.youtube.com/watch?v=P6E7qLMmzTQ)
[New features in Tracy Profiler v0.6](https://www.youtube.com/watch?v=uJkrFgriuOo)
[New features in Tracy Profiler v0.7](https://www.youtube.com/watch?v=_hU7vw00MZ4)
[New features in Tracy Profiler v0.8](https://www.youtube.com/watch?v=30wpRpHTTag)
[New features in Tracy Profiler v0.3](https://www.youtube.com/watch?v=3SXpDpDh2Uo)
[A quick FAQ.](FAQ.md)
[List of changes.](NEWS)
### High-level overview
![](doc/design.svg)
Tracy is split into client and server side. The client side collects events using a high-efficiency queue and awaits for an incoming connection. The server part connects to client and receives collected data from the client, which is then reconstructed into a viewable timeline. The transfer is performed using a TCP connection.
### Performance impact
To check how much slowdown is introduced by using Tracy, I have profiled [etcpak](https://bitbucket.org/wolfpld/etcpak), which is the fastest ETC texture compression utility there is. I used an 8192×8192 test image as input data and instrumented everything down to the 4×4 pixel block compression function (that's 4 million blocks to compress). It should be noted that Tracy needs to calibrate its internal timers at each run. This introduces a delay of 115 ms (on my machine), which is negligible when doing lengthy profiling runs, but it skews the results of etcpak timing. The following times have this delay subtracted, to give focus on zone collection impact, which is the thing that really matters here.
| Scenario | Zones | Clean run | Profiling run | Difference |
|-------------------------------------------------------|---------|-----------|---------------|------------|
| Compression of an image to ETC1 format | 4194568 | 0.94 s | 1.003 s | +0.063 s |
| Compression of an image to ETC2 format, with mip-maps | 5592822 | 1.034 s | 1.119 s | +0.085 s |
In both scenarios the per-zone time cost is at ~15 ns. This is in line with the measured 8 ns single event collection time (each zone has to report start and end event).
## Usage instructions
The user manual for Tracy is available [at the following address](https://bitbucket.org/wolfpld/tracy/downloads/tracy.pdf). It provides information about the integration process, required code markup and so on.
## Features
#### Marking locks
![](doc/locks.png)
#### Plotting data
![](doc/plot.png)
#### Message log
![](doc/messages.png)
#### Approximation of capture cost
![](doc/cost.png)

126
Tracy.hpp Normal file
View File

@@ -0,0 +1,126 @@
#ifndef __TRACY_HPP__
#define __TRACY_HPP__
#include "common/TracyColor.hpp"
#include "common/TracySystem.hpp"
#ifndef TRACY_ENABLE
#define ZoneNamed(x,y)
#define ZoneNamedN(x,y,z)
#define ZoneNamedC(x,y,z)
#define ZoneNamedNC(x,y,z,w)
#define ZoneScoped
#define ZoneScopedN(x)
#define ZoneScopedC(x)
#define ZoneScopedNC(x,y)
#define ZoneText(x,y)
#define ZoneName(x,y)
#define FrameMark
#define FrameMarkNamed(x)
#define FrameMarkStart(x)
#define FrameMarkEnd(x)
#define TracyLockable( type, varname ) type varname;
#define TracyLockableN( type, varname, desc ) type varname;
#define TracySharedLockable( type, varname ) type varname;
#define TracySharedLockableN( type, varname, desc ) type varname;
#define LockableBase( type ) type
#define SharedLockableBase( type ) type
#define LockMark(x) (void)x;
#define TracyPlot(x,y)
#define TracyMessage(x,y)
#define TracyMessageL(x)
#define TracyAlloc(x,y)
#define TracyFree(x)
#define ZoneNamedS(x,y,z)
#define ZoneNamedNS(x,y,z,w)
#define ZoneNamedCS(x,y,z,w)
#define ZoneNamedNCS(x,y,z,w,a)
#define ZoneScopedS(x)
#define ZoneScopedNS(x,y)
#define ZoneScopedCS(x,y)
#define ZoneScopedNCS(x,y,z)
#define TracyAllocS(x,y,z)
#define TracyFreeS(x,y)
#else
#include "client/TracyLock.hpp"
#include "client/TracyProfiler.hpp"
#include "client/TracyScoped.hpp"
#define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
#define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
#define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
#define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true )
#define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true )
#define ZoneScopedC( color ) ZoneNamedC( ___tracy_scoped_zone, color, true )
#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true )
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size );
#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size );
#define FrameMark tracy::Profiler::SendFrameMark();
#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsg );
#define FrameMarkStart( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart );
#define FrameMarkEnd( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd );
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define LockableBase( type ) tracy::Lockable<type>
#define SharedLockableBase( type ) tracy::SharedLockable<type>
#define LockMark( varname ) static const tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val );
#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size );
#define TracyMessageL( txt ) tracy::Profiler::Message( txt );
#define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size );
#define TracyFree( ptr ) tracy::Profiler::MemFree( ptr );
#ifdef TRACY_HAS_CALLSTACK
# define ZoneNamedS( varname, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedCS( varname, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color depth, true )
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth );
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth );
#else
# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active )
# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active )
# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active )
# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active )
# define ZoneScopedS( depth ) ZoneScoped
# define ZoneScopedNS( name, depth ) ZoneScopedN( name )
# define ZoneScopedCS( color, depth ) ZoneScopedC( color )
# define ZoneScopedNCS( name, color, depth ) ZoneScopedNC( name, color )
# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size )
# define TracyFreeS( ptr, depth ) TracyFree( ptr )
#endif
#endif
#endif

29
TracyClient.cpp Normal file
View File

@@ -0,0 +1,29 @@
//
// Tracy profiler
// ----------------
//
// For fast integration, compile and
// link with this source file (and none
// other) in your executable (or in the
// main DLL / shared object on multi-DLL
// projects).
//
// Define TRACY_ENABLE to enable profiler.
#include "common/TracySystem.cpp"
#ifdef TRACY_ENABLE
#include "client/TracyProfiler.cpp"
#include "client/TracyCallstack.cpp"
#include "common/tracy_lz4.cpp"
#include "common/TracySocket.cpp"
#include "client/tracy_rpmalloc.cpp"
#ifdef _MSC_VER
# pragma comment(lib, "ws2_32.lib")
# pragma comment(lib, "dbghelp.lib")
#endif
#endif

73
TracyClientDLL.cpp Normal file
View File

@@ -0,0 +1,73 @@
//
// Tracy profiler
// ----------------
//
// On multi-DLL projects compile and
// link with this source file (and none
// other) in the executable and in
// DLLs / shared objects that link to
// the main DLL.
//
// Define TRACY_ENABLE to enable profiler.
#include "common/TracySystem.cpp"
#ifdef TRACY_ENABLE
#include "client/TracyProfiler.hpp"
#include "client/concurrentqueue.h"
#include "common/TracyQueue.hpp"
namespace tracy
{
#ifdef _MSC_VER
# define DLL_IMPORT __declspec(dllimport)
#else
# define DLL_IMPORT
#endif
DLL_IMPORT moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* get_token();
DLL_IMPORT void*(*get_rpmalloc())(size_t size);
DLL_IMPORT void(*get_rpfree())(void* ptr);
DLL_IMPORT Profiler& get_profiler();
#if defined TRACY_HW_TIMER && __ARM_ARCH >= 6
DLL_IMPORT int64_t(*get_GetTimeImpl())();
int64_t(*GetTimeImpl)() = get_GetTimeImpl();
#endif
#ifdef TRACY_COLLECT_THREAD_NAMES
DLL_IMPORT std::atomic<ThreadNameData*>& get_threadNameData();
DLL_IMPORT void(*get_rpmalloc_thread_initialize())();
std::atomic<ThreadNameData*>& s_threadNameData = get_threadNameData();
void(*rpmalloc_thread_initialize_fpt)() = get_rpmalloc_thread_initialize();
void rpmalloc_thread_initialize(void)
{
rpmalloc_thread_initialize_fpt();
}
#endif
static void*(*rpmalloc_fpt)(size_t size) = get_rpmalloc();
static void(*rpfree_fpt)(void* ptr) = get_rpfree();
RPMALLOC_RESTRICT void* rpmalloc(size_t size)
{
return rpmalloc_fpt(size);
}
void rpfree(void* ptr)
{
rpfree_fpt(ptr);
}
Profiler& s_profiler = get_profiler();
thread_local ProducerWrapper s_token { get_token() };
}
#endif

376
TracyLua.hpp Normal file
View File

@@ -0,0 +1,376 @@
#ifndef __TRACYLUA_HPP__
#define __TRACYLUA_HPP__
// Include this file after you include lua headers.
#ifndef TRACY_ENABLE
#include <string.h>
namespace tracy
{
namespace detail
{
static inline int noop( lua_State* L ) { return 0; }
}
static inline void LuaRegister( lua_State* L )
{
lua_newtable( L );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneBegin" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneBeginN" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneEnd" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneText" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "ZoneName" );
lua_pushcfunction( L, detail::noop );
lua_setfield( L, -2, "Message" );
lua_setglobal( L, "tracy" );
}
static inline char* FindEnd( char* ptr )
{
unsigned int cnt = 1;
while( cnt != 0 )
{
if( *ptr == '(' ) cnt++;
else if( *ptr == ')' ) cnt--;
ptr++;
}
return ptr;
}
static inline void LuaRemove( char* script )
{
while( *script )
{
if( strncmp( script, "tracy.", 6 ) == 0 )
{
if( strncmp( script + 6, "Zone", 4 ) == 0 )
{
if( strncmp( script + 10, "End()", 5 ) == 0 )
{
memset( script, ' ', 15 );
script += 15;
}
else if( strncmp( script + 10, "Begin()", 7 ) == 0 )
{
memset( script, ' ', 17 );
script += 17;
}
else if( strncmp( script + 10, "Text(", 5 ) == 0 )
{
auto end = FindEnd( script + 15 );
memset( script, ' ', end - script );
script = end;
}
else if( strncmp( script + 10, "Name(", 5 ) == 0 )
{
auto end = FindEnd( script + 15 );
memset( script, ' ', end - script );
script = end;
}
else if( strncmp( script + 10, "BeginN(", 7 ) == 0 )
{
auto end = FindEnd( script + 17 );
memset( script, ' ', end - script );
script = end;
}
else
{
script += 10;
}
}
else if( strncmp( script + 6, "Message(", 8 ) == 0 )
{
auto end = FindEnd( script + 14 );
memset( script, ' ', end - script );
script = end;
}
else
{
script += 6;
}
}
else
{
script++;
}
}
}
}
#else
#include <assert.h>
#include "common/TracyColor.hpp"
#include "common/TracyAlign.hpp"
#include "common/TracySystem.hpp"
#include "client/TracyProfiler.hpp"
namespace tracy
{
#ifdef TRACY_ON_DEMAND
extern thread_local LuaZoneState s_luaZoneState;
#endif
namespace detail
{
static inline int LuaZoneBegin( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
const auto zoneCnt = s_luaZoneState.counter++;
if( zoneCnt != 0 && !s_luaZoneState.active ) return 0;
s_luaZoneState.active = s_profiler.IsConnected();
if( !s_luaZoneState.active ) return 0;
#endif
const uint32_t color = Color::DeepSkyBlue3;
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
const uint32_t line = dbg.currentline;
const auto func = dbg.name ? dbg.name : dbg.short_src;
const auto fsz = strlen( func );
const auto ssz = strlen( dbg.source );
// Data layout:
// 4b payload size
// 4b color
// 4b source line
// fsz function name
// 1b null terminator
// ssz source file name
// 1b null terminator
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 );
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 );
memcpy( ptr + 4, &color, 4 );
memcpy( ptr + 8, &line, 4 );
memcpy( ptr + 12, func, fsz+1 );
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
MemWrite( &item->zoneBegin.thread, GetThreadHandle() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
}
static inline int LuaZoneBeginN( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
const auto zoneCnt = s_luaZoneState.counter++;
if( zoneCnt != 0 && !s_luaZoneState.active ) return 0;
s_luaZoneState.active = s_profiler.IsConnected();
if( !s_luaZoneState.active ) return 0;
#endif
const uint32_t color = Color::DeepSkyBlue3;
lua_Debug dbg;
lua_getstack( L, 1, &dbg );
lua_getinfo( L, "Snl", &dbg );
const uint32_t line = dbg.currentline;
const auto func = dbg.name ? dbg.name : dbg.short_src;
size_t nsz;
const auto name = lua_tolstring( L, 1, &nsz );
const auto fsz = strlen( func );
const auto ssz = strlen( dbg.source );
// Data layout:
// 4b payload size
// 4b color
// 4b source line
// fsz function name
// 1b null terminator
// ssz source file name
// 1b null terminator
// nsz zone name
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 + nsz );
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 );
memcpy( ptr + 4, &color, 4 );
memcpy( ptr + 8, &line, 4 );
memcpy( ptr + 12, func, fsz+1 );
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nsz );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
MemWrite( &item->zoneBegin.thread, GetThreadHandle() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
}
static inline int LuaZoneEnd( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
assert( s_luaZoneState.counter != 0 );
s_luaZoneState.counter--;
if( !s_luaZoneState.active ) return 0;
if( !s_profiler.IsConnected() )
{
s_luaZoneState.active = false;
return 0;
}
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneEnd.time, Profiler::GetTime( item->zoneEnd.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneEnd.cpu, cpu );
#endif
MemWrite( &item->zoneEnd.thread, GetThreadHandle() );
tail.store( magic + 1, std::memory_order_release );
return 0;
}
static inline int LuaZoneText( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
if( !s_luaZoneState.active ) return 0;
if( !s_profiler.IsConnected() )
{
s_luaZoneState.active = false;
return 0;
}
#endif
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
Magic magic;
auto& token = s_token.ptr;
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneText );
MemWrite( &item->zoneText.thread, GetThreadHandle() );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
}
static inline int LuaZoneName( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
if( !s_luaZoneState.active ) return 0;
if( !s_profiler.IsConnected() )
{
s_luaZoneState.active = false;
return 0;
}
#endif
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
Magic magic;
auto& token = s_token.ptr;
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneName );
MemWrite( &item->zoneText.thread, GetThreadHandle() );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
}
static inline int LuaMessage( lua_State* L )
{
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return 0;
#endif
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
Magic magic;
auto& token = s_token.ptr;
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::Message );
MemWrite( &item->message.time, Profiler::GetTime() );
MemWrite( &item->message.thread, GetThreadHandle() );
MemWrite( &item->message.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
return 0;
}
}
static inline void LuaRegister( lua_State* L )
{
lua_newtable( L );
lua_pushcfunction( L, detail::LuaZoneBegin );
lua_setfield( L, -2, "ZoneBegin" );
lua_pushcfunction( L, detail::LuaZoneBeginN );
lua_setfield( L, -2, "ZoneBeginN" );
lua_pushcfunction( L, detail::LuaZoneEnd );
lua_setfield( L, -2, "ZoneEnd" );
lua_pushcfunction( L, detail::LuaZoneText );
lua_setfield( L, -2, "ZoneText" );
lua_pushcfunction( L, detail::LuaZoneName );
lua_setfield( L, -2, "ZoneName" );
lua_pushcfunction( L, detail::LuaMessage );
lua_setfield( L, -2, "Message" );
lua_setglobal( L, "tracy" );
}
static inline void LuaRemove( char* script ) {}
}
#endif
#endif

265
TracyOpenGL.hpp Normal file
View File

@@ -0,0 +1,265 @@
#ifndef __TRACYOPENGL_HPP__
#define __TRACYOPENGL_HPP__
// Include this file after you include OpenGL 3.2 headers.
#if !defined TRACY_ENABLE || defined __APPLE__
#define TracyGpuContext
#define TracyGpuNamedZone(x,y)
#define TracyGpuNamedZoneC(x,y,z)
#define TracyGpuZone(x)
#define TracyGpuZoneC(x,y)
#define TracyGpuCollect
#define TracyGpuNamedZoneS(x,y,z)
#define TracyGpuNamedZoneCS(x,y,z,w)
#define TracyGpuZoneS(x,y)
#define TracyGpuZoneCS(x,y,z)
#else
#include <atomic>
#include <assert.h>
#include <stdlib.h>
#include "Tracy.hpp"
#include "client/TracyProfiler.hpp"
#include "client/TracyCallstack.hpp"
#include "common/TracyAlign.hpp"
#include "common/TracyAlloc.hpp"
#define TracyGpuContext tracy::s_gpuCtx.ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::s_gpuCtx.ptr) tracy::GpuCtx;
#define TracyGpuNamedZone( varname, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) );
#define TracyGpuNamedZoneC( varname, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) );
#define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name )
#define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color )
#define TracyGpuCollect tracy::s_gpuCtx.ptr->Collect();
#ifdef TRACY_HAS_CALLSTACK
# define TracyGpuNamedZoneS( varname, name, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth );
# define TracyGpuNamedZoneCS( varname, name, color, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth );
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth )
#else
# define TracyGpuNamedZoneS( varname, name, depth ) TracyGpuNamedZone( varname, name )
# define TracyGpuNamedZoneCS( varname, name, color, depth ) TracyGpuNamedZoneC( varname, name, color )
# define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
#endif
namespace tracy
{
extern std::atomic<uint8_t> s_gpuCtxCounter;
class GpuCtx
{
friend class GpuCtxScope;
enum { QueryCount = 64 * 1024 };
public:
GpuCtx()
: m_context( s_gpuCtxCounter.fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
{
assert( m_context != 255 );
glGenQueries( QueryCount, m_query );
int64_t tgpu;
glGetInteger64v( GL_TIMESTAMP, &tgpu );
int64_t tcpu = Profiler::GetTime();
GLint bits;
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
const float period = 1.f;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
MemWrite( &item->gpuNewContext.thread, GetThreadHandle() );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits );
#ifdef TRACY_ON_DEMAND
s_profiler.DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
}
void Collect()
{
ZoneScopedC( Color::Red4 );
if( m_tail == m_head ) return;
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() )
{
m_head = m_tail = 0;
return;
}
#endif
auto start = m_tail;
auto end = m_head + QueryCount;
auto cnt = ( end - start ) % QueryCount;
while( cnt > 1 )
{
auto mid = start + cnt / 2;
GLint available;
glGetQueryObjectiv( m_query[mid % QueryCount], GL_QUERY_RESULT_AVAILABLE, &available );
if( available )
{
start = mid;
}
else
{
end = mid;
}
cnt = ( end - start ) % QueryCount;
}
start %= QueryCount;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
while( m_tail != start )
{
uint64_t time;
glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuTime );
MemWrite( &item->gpuTime.gpuTime, (int64_t)time );
MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail );
MemWrite( &item->gpuTime.context, m_context );
tail.store( magic + 1, std::memory_order_release );
m_tail = ( m_tail + 1 ) % QueryCount;
}
}
private:
tracy_force_inline unsigned int NextQueryId()
{
const auto id = m_head;
m_head = ( m_head + 1 ) % QueryCount;
assert( m_head != m_tail );
return id;
}
tracy_force_inline unsigned int TranslateOpenGlQueryId( unsigned int id )
{
return m_query[id];
}
tracy_force_inline uint8_t GetId() const
{
return m_context;
}
unsigned int m_query[QueryCount];
uint8_t m_context;
unsigned int m_head;
unsigned int m_tail;
};
extern thread_local GpuCtxWrapper s_gpuCtx;
class GpuCtxScope
{
public:
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc )
#ifdef TRACY_ON_DEMAND
: m_active( s_profiler.IsConnected() )
#endif
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
const auto queryId = s_gpuCtx.ptr->NextQueryId();
glQueryCounter( s_gpuCtx.ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBegin );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, s_gpuCtx.ptr->GetId() );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth )
#ifdef TRACY_ON_DEMAND
: m_active( s_profiler.IsConnected() )
#endif
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
const auto queryId = s_gpuCtx.ptr->NextQueryId();
glQueryCounter( s_gpuCtx.ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
const auto thread = GetThreadHandle();
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, thread );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, s_gpuCtx.ptr->GetId() );
tail.store( magic + 1, std::memory_order_release );
s_profiler.SendCallstack( depth, thread );
}
tracy_force_inline ~GpuCtxScope()
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
const auto queryId = s_gpuCtx.ptr->NextQueryId();
glQueryCounter( s_gpuCtx.ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneEnd );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, s_gpuCtx.ptr->GetId() );
tail.store( magic + 1, std::memory_order_release );
}
private:
#ifdef TRACY_ON_DEMAND
const bool m_active;
#endif
};
}
#endif
#endif

305
TracyVulkan.hpp Normal file
View File

@@ -0,0 +1,305 @@
#ifndef __TRACYVULKAN_HPP__
#define __TRACYVULKAN_HPP__
#if !defined TRACY_ENABLE
#define TracyVkContext(x,y,z,w)
#define TracyVkDestroy
#define TracyVkNamedZone(x,y,z)
#define TracyVkNamedZoneC(x,y,z,w)
#define TracyVkZone(x,y)
#define TracyVkZoneC(x,y,z)
#define TracyVkCollect(x)
#define TracyVkNamedZoneS(x,y,z,w)
#define TracyVkNamedZoneCS(x,y,z,w,v)
#define TracyVkZoneS(x,y,z)
#define TracyVkZoneCS(x,y,z,w)
#else
#include <assert.h>
#include <stdlib.h>
#include <vulkan/vulkan.h>
#include "Tracy.hpp"
#include "client/TracyProfiler.hpp"
#include "client/TracyCallstack.hpp"
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::s_vkCtx.ptr = (tracy::VkCtx*)tracy::tracy_malloc( sizeof( tracy::VkCtx ) ); new(tracy::s_vkCtx.ptr) tracy::VkCtx( physdev, device, queue, cmdbuf );
#define TracyVkDestroy tracy::s_vkCtx.ptr->~VkCtx(); tracy::tracy_free( tracy::s_vkCtx.ptr ); tracy::s_vkCtx.ptr = nullptr;
#define TracyVkNamedZone( varname, cmdbuf, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf );
#define TracyVkNamedZoneC( varname, cmdbuf, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf );
#define TracyVkZone( cmdbuf, name ) TracyVkNamedZone( ___tracy_gpu_zone, cmdbuf, name )
#define TracyVkZoneC( cmdbuf, name, color ) TracyVkNamedZoneC( ___tracy_gpu_zone, cmdbuf, name, color )
#define TracyVkCollect( cmdbuf ) tracy::s_vkCtx.ptr->Collect( cmdbuf );
#ifdef TRACY_HAS_CALLSTACK
# define TracyVkNamedZoneS( varname, cmdbuf, name, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth );
# define TracyVkNamedZoneCS( varname, cmdbuf, name, color, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth );
# define TracyVkZoneS( cmdbuf, name, depth ) TracyVkNamedZoneS( ___tracy_gpu_zone, cmdbuf, name, depth )
# define TracyVkZoneCS( cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ___tracy_gpu_zone, cmdbuf, name, color, depth )
#else
# define TracyVkNamedZoneS( varname, cmdbuf, name, depth ) TracyVkNamedZone( varname, cmdbuf, name )
# define TracyVkNamedZoneCS( varname, cmdbuf, name, color, depth ) TracyVkNamedZoneC( varname, cmdbuf, name, color )
# define TracyVkZoneS( cmdbuf, name, depth ) TracyVkZone( cmdbuf, name )
# define TracyVkZoneCS( cmdbuf, name, color, depth ) TracyVkZoneC( cmdbuf, name, color )
#endif
namespace tracy
{
extern std::atomic<uint8_t> s_gpuCtxCounter;
class VkCtx
{
friend class VkCtxScope;
enum { QueryCount = 64 * 1024 };
public:
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
: m_device( device )
, m_queue( queue )
, m_context( s_gpuCtxCounter.fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
, m_oldCnt( 0 )
{
assert( m_context != 255 );
VkPhysicalDeviceProperties prop;
vkGetPhysicalDeviceProperties( physdev, &prop );
const float period = prop.limits.timestampPeriod;
VkQueryPoolCreateInfo poolInfo = {};
poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
poolInfo.queryCount = QueryCount;
poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
vkCreateQueryPool( device, &poolInfo, nullptr, &m_query );
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &cmdbuf;
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdResetQueryPool( cmdbuf, m_query, 0, QueryCount );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
int64_t tcpu = Profiler::GetTime();
int64_t tgpu;
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) );
#ifdef TRACY_ON_DEMAND
s_profiler.DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
}
~VkCtx()
{
vkDestroyQueryPool( m_device, m_query, nullptr );
}
void Collect( VkCommandBuffer cmdbuf )
{
ZoneScopedC( Color::Red4 );
if( m_tail == m_head ) return;
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() )
{
vkCmdResetQueryPool( cmdbuf, m_query, 0, QueryCount );
m_head = m_tail = 0;
return;
}
#endif
unsigned int cnt;
if( m_oldCnt != 0 )
{
cnt = m_oldCnt;
m_oldCnt = 0;
}
else
{
cnt = m_head < m_tail ? QueryCount - m_tail : m_head - m_tail;
}
int64_t res[QueryCount];
if( vkGetQueryPoolResults( m_device, m_query, m_tail, cnt, sizeof( res ), res, sizeof( *res ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY )
{
m_oldCnt = cnt;
return;
}
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
for( unsigned int idx=0; idx<cnt; idx++ )
{
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuTime );
MemWrite( &item->gpuTime.gpuTime, res[idx] );
MemWrite( &item->gpuTime.queryId, uint16_t( m_tail + idx ) );
MemWrite( &item->gpuTime.context, m_context );
tail.store( magic + 1, std::memory_order_release );
}
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
m_tail += cnt;
if( m_tail == QueryCount ) m_tail = 0;
}
private:
tracy_force_inline unsigned int NextQueryId()
{
const auto id = m_head;
m_head = ( m_head + 1 ) % QueryCount;
assert( m_head != m_tail );
return id;
}
tracy_force_inline uint8_t GetId() const
{
return m_context;
}
VkDevice m_device;
VkQueue m_queue;
VkQueryPool m_query;
uint8_t m_context;
unsigned int m_head;
unsigned int m_tail;
unsigned int m_oldCnt;
};
extern VkCtxWrapper s_vkCtx;
class VkCtxScope
{
public:
tracy_force_inline VkCtxScope( const SourceLocationData* srcloc, VkCommandBuffer cmdbuf )
: m_cmdbuf( cmdbuf )
#ifdef TRACY_ON_DEMAND
, m_active( s_profiler.IsConnected() )
#endif
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
auto ctx = s_vkCtx.ptr;
const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBegin );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline VkCtxScope( const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth )
: m_cmdbuf( cmdbuf )
#ifdef TRACY_ON_DEMAND
, m_active( s_profiler.IsConnected() )
#endif
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
const auto thread = GetThreadHandle();
auto ctx = s_vkCtx.ptr;
const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, thread );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
tail.store( magic + 1, std::memory_order_release );
s_profiler.SendCallstack( depth, thread );
}
tracy_force_inline ~VkCtxScope()
{
#ifdef TRACY_ON_DEMAND
if( !m_active ) return;
#endif
auto ctx = s_vkCtx.ptr;
const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, ctx->m_query, queryId );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneEnd );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, ctx->GetId() );
tail.store( magic + 1, std::memory_order_release );
}
private:
VkCommandBuffer m_cmdbuf;
#ifdef TRACY_ON_DEMAND
const bool m_active;
#endif
};
}
#endif
#endif

View File

@@ -1,27 +0,0 @@
cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_STATISTICS "Disable calculation of statistics" ON)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
set(CMAKE_CXX_STANDARD 20)
project(
tracy-capture
LANGUAGES C CXX
VERSION ${TRACY_VERSION_STRING}
)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
set(PROGRAM_FILES
src/capture.cpp
)
add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})

View File

@@ -0,0 +1,12 @@
all: debug
debug:
@+make -f debug.mk all
release:
@+make -f release.mk all
clean:
@+make -f build.mk clean
.PHONY: all clean debug release

View File

@@ -0,0 +1,55 @@
CFLAGS +=
CXXFLAGS := $(CFLAGS) -std=gnu++17
DEFINES += -DTRACY_NO_STATISTICS
INCLUDES :=
LIBS := -lpthread
PROJECT := capture
IMAGE := $(PROJECT)-$(BUILD)
FILTER :=
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
SRC := $(filter-out $(FILTER),$(BASE))
SRC2 := $(filter-out $(FILTER),$(BASE2))
OBJDIRBASE := obj/$(BUILD)
OBJDIR := $(OBJDIRBASE)/o/o/o
OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
all: $(IMAGE)
$(OBJDIR)/%.o: %.cpp
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.cpp
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
$(OBJDIR)/%.o: %.c
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
$(OBJDIR)/%.d : %.c
@echo Resolving dependencies of $<
@mkdir -p $(@D)
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
rm -f $@.$$$$
$(IMAGE): $(OBJ) $(OBJ2)
$(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -o $@
ifneq "$(MAKECMDGOALS)" "clean"
-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) %(addprefix $(OBJDIR)/,$(SRC2:.c=.d))
endif
clean:
rm -rf $(OBJDIRBASE) $(IMAGE)*
.PHONY: clean all

View File

@@ -4,10 +4,8 @@ CFLAGS := -g3 -Wall
DEFINES := -DDEBUG
BUILD := debug
ifndef TRACY_NO_ISA_EXTENSIONS
ifeq ($(ARCH),x86_64)
CFLAGS += -msse4.1
endif
endif
include build.mk

View File

@@ -1,13 +1,11 @@
ARCH := $(shell uname -m)
CFLAGS := -O3 -s
CFLAGS := -O3 -s -fomit-frame-pointer
DEFINES := -DNDEBUG
BUILD := release
ifndef TRACY_NO_ISA_EXTENSIONS
ifeq ($(ARCH),x86_64)
CFLAGS += -msse4.1
endif
endif
include build.mk

View File

@@ -0,0 +1,25 @@
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.27428.2002
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "capture", "capture.vcxproj", "{447D58BF-94CD-4469-BB90-549C05D03E00}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.ActiveCfg = Debug|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.Build.0 = Debug|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.ActiveCfg = Release|x64
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {3E51386C-43EA-44AC-9F24-AFAFE4D63ADE}
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,168 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>15.0</VCProjectVersion>
<ProjectGuid>{447D58BF-94CD-4469-BB90-549C05D03E00}</ProjectGuid>
<RootNamespace>capture</RootNamespace>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<PreprocessorDefinitions>TRACY_NO_STATISTICS;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<LanguageStandard>stdcpplatest</LanguageStandard>
</ClCompile>
<Link>
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<PreprocessorDefinitions>TRACY_NO_STATISTICS;NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<LanguageStandard>stdcpplatest</LanguageStandard>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\..\..\common\TracySocket.cpp" />
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp" />
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
<ClCompile Include="..\..\..\server\TracyWorker.cpp" />
<ClCompile Include="..\..\src\capture.cpp" />
<ClCompile Include="..\..\src\getopt.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\TracyAlign.hpp" />
<ClInclude Include="..\..\..\common\TracyAlloc.hpp" />
<ClInclude Include="..\..\..\common\TracyColor.hpp" />
<ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
<ClInclude Include="..\..\..\common\tracy_benaphore.h" />
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp" />
<ClInclude Include="..\..\..\common\tracy_sema.h" />
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp" />
<ClInclude Include="..\..\..\server\TracyMemory.hpp" />
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp" />
<ClInclude Include="..\..\..\server\TracySlab.hpp" />
<ClInclude Include="..\..\..\server\TracyVector.hpp" />
<ClInclude Include="..\..\..\server\TracyWorker.hpp" />
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp" />
<ClInclude Include="..\..\src\getopt.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@@ -0,0 +1,108 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="src">
<UniqueIdentifier>{729c80ee-4d26-4a5e-8f1f-6c075783eb56}</UniqueIdentifier>
</Filter>
<Filter Include="server">
<UniqueIdentifier>{cf23ef7b-7694-4154-830b-00cf053350ea}</UniqueIdentifier>
</Filter>
<Filter Include="common">
<UniqueIdentifier>{e39d3623-47cd-4752-8da9-3ea324f964c1}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\TracySocket.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\TracySystem.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyMemory.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\..\server\TracyWorker.cpp">
<Filter>server</Filter>
</ClCompile>
<ClCompile Include="..\..\src\capture.cpp">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\..\src\getopt.c">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\..\..\common\tracy_lz4hc.cpp">
<Filter>common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyAlloc.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyColor.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyForceInline.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyProtocol.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyQueue.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracySocket.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracySystem.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyEvent.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyMemory.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracySlab.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyVector.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
<Filter>server</Filter>
</ClInclude>
<ClInclude Include="..\..\src\getopt.h">
<Filter>src</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\TracyAlign.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\tracy_benaphore.h">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\tracy_sema.h">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\..\common\tracy_lz4hc.hpp">
<Filter>common</Filter>
</ClInclude>
</ItemGroup>
</Project>

View File

@@ -1,98 +1,104 @@
#ifdef _WIN32
# include <windows.h>
# include <io.h>
#else
# include <unistd.h>
#endif
#include <atomic>
#include <chrono>
#include <inttypes.h>
#include <mutex>
#include <signal.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include "../../public/common/TracyProtocol.hpp"
#include "../../public/common/TracyStackFrames.hpp"
#include "../../server/TracyFileWrite.hpp"
#include "../../server/TracyMemory.hpp"
#include "../../server/TracyPrint.hpp"
#include "../../server/TracySysUtil.hpp"
#include "../../server/TracyWorker.hpp"
#include "getopt.h"
#ifdef _WIN32
# include "../../getopt/getopt.h"
#endif
// This atomic is written by a signal handler (SigInt). Traditionally that would
// have had to be `volatile sig_atomic_t`, and annoyingly, `bool` was
// technically not allowed there, even though in practice it would work.
// The good thing with C++11 atomics is that we can use atomic<bool> instead
// here and be on the actually supported path.
static std::atomic<bool> s_disconnect { false };
void SigInt( int )
static const char* TimeToString( int64_t ns )
{
// Relaxed order is closest to a traditional `volatile` write.
// We don't need stronger ordering since this signal handler doesn't do
// anything else that would need to be ordered relatively to this.
s_disconnect.store(true, std::memory_order_relaxed);
}
enum { Pool = 8 };
static char bufpool[Pool][64];
static int bufsel = 0;
char* buf = bufpool[bufsel];
bufsel = ( bufsel + 1 ) % Pool;
static bool s_isStdoutATerminal = false;
void InitIsStdoutATerminal() {
#ifdef _WIN32
s_isStdoutATerminal = _isatty( fileno( stdout ) );
#else
s_isStdoutATerminal = isatty( fileno( stdout ) );
#endif
}
bool IsStdoutATerminal() { return s_isStdoutATerminal; }
#define ANSI_RESET "\033[0m"
#define ANSI_BOLD "\033[1m"
#define ANSI_BLACK "\033[30m"
#define ANSI_RED "\033[31m"
#define ANSI_GREEN "\033[32m"
#define ANSI_YELLOW "\033[33m"
#define ANSI_BLUE "\033[34m"
#define ANSI_MAGENTA "\033[35m"
#define ANSI_CYAN "\033[36m"
#define ANSI_ERASE_LINE "\033[2K"
// Like printf, but if stdout is a terminal, prepends the output with
// the given `ansiEscape` and appends ANSI_RESET.
void AnsiPrintf( const char* ansiEscape, const char* format, ... ) {
if( IsStdoutATerminal() )
const char* sign = "";
if( ns < 0 )
{
// Prepend ansiEscape and append ANSI_RESET.
char buf[256];
va_list args;
va_start( args, format );
vsnprintf( buf, sizeof buf, format, args );
va_end( args );
printf( "%s%s" ANSI_RESET, ansiEscape, buf );
sign = "-";
ns = -ns;
}
if( ns < 1000 )
{
sprintf( buf, "%s%" PRIi64 " ns", sign, ns );
}
else if( ns < 1000ll * 1000 )
{
sprintf( buf, "%s%.2f us", sign, ns / 1000. );
}
else if( ns < 1000ll * 1000 * 1000 )
{
sprintf( buf, "%s%.2f ms", sign, ns / ( 1000. * 1000. ) );
}
else if( ns < 1000ll * 1000 * 1000 * 60 )
{
sprintf( buf, "%s%.2f s", sign, ns / ( 1000. * 1000. * 1000. ) );
}
else
{
// Just a normal printf.
va_list args;
va_start( args, format );
vfprintf( stdout, format, args );
va_end( args );
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) );
const auto s = int64_t( ns - m * ( 1000ll * 1000 * 1000 * 60 ) );
sprintf( buf, "%s%" PRIi64 ":%04.1f", sign, m, s / ( 1000. * 1000. * 1000. ) );
}
return buf;
}
[[noreturn]] void Usage()
static const char* RealToString( double val, bool separator )
{
printf( "Usage: capture -o output.tracy [-a address] [-p port] [-f] [-s seconds] [-m memlimit]\n" );
enum { Pool = 8 };
static char bufpool[Pool][64];
static int bufsel = 0;
char* buf = bufpool[bufsel];
bufsel = ( bufsel + 1 ) % Pool;
sprintf( buf, "%f", val );
auto ptr = buf;
if( *ptr == '-' ) ptr++;
const auto vbegin = ptr;
if( separator )
{
while( *ptr != '\0' && *ptr != ',' && *ptr != '.' ) ptr++;
auto end = ptr;
while( *end != '\0' ) end++;
auto sz = end - ptr;
while( ptr - vbegin > 3 )
{
ptr -= 3;
memmove( ptr+1, ptr, sz );
*ptr = ',';
sz += 4;
}
}
while( *ptr != '\0' && *ptr != ',' && *ptr != '.' ) ptr++;
if( *ptr == '\0' ) return buf;
while( *ptr != '\0' ) ptr++;
ptr--;
while( *ptr == '0' && *ptr != ',' && *ptr != '.' ) ptr--;
if( *ptr != '.' && *ptr != ',' ) ptr++;
*ptr = '\0';
return buf;
}
void Usage()
{
printf( "Usage: capture -a address -o output.tracy\n" );
exit( 1 );
}
@@ -106,17 +112,11 @@ int main( int argc, char** argv )
}
#endif
InitIsStdoutATerminal();
bool overwrite = false;
const char* address = "127.0.0.1";
const char* address = nullptr;
const char* output = nullptr;
int port = 8086;
int seconds = -1;
int64_t memoryLimit = -1;
int c;
while( ( c = getopt( argc, argv, "a:o:p:fs:m:" ) ) != -1 )
while( ( c = getopt( argc, argv, "a:o:" ) ) != -1 )
{
switch( c )
{
@@ -126,18 +126,6 @@ int main( int argc, char** argv )
case 'o':
output = optarg;
break;
case 'p':
port = atoi( optarg );
break;
case 'f':
overwrite = true;
break;
case 's':
seconds = atoi(optarg);
break;
case 'm':
memoryLimit = std::clamp( atoll( optarg ), 1ll, 999ll ) * tracy::GetPhysicalMemorySize() / 100;
break;
default:
Usage();
break;
@@ -146,218 +134,46 @@ int main( int argc, char** argv )
if( !address || !output ) Usage();
struct stat st;
if( stat( output, &st ) == 0 && !overwrite )
{
printf( "Output file %s already exists! Use -f to force overwrite.\n", output );
return 4;
}
FILE* test = fopen( output, "wb" );
if( !test )
{
printf( "Cannot open output file %s for writing!\n", output );
return 5;
}
fclose( test );
unlink( output );
printf( "Connecting to %s:%i...", address, port );
printf( "Connecting to %s...", address );
fflush( stdout );
tracy::Worker worker( address, port, memoryLimit );
while( !worker.HasData() )
{
const auto handshake = worker.GetHandshakeStatus();
if( handshake == tracy::HandshakeProtocolMismatch )
{
printf( "\nThe client you are trying to connect to uses incompatible protocol version.\nMake sure you are using the same Tracy version on both client and server.\n" );
return 1;
}
if( handshake == tracy::HandshakeNotAvailable )
{
printf( "\nThe client you are trying to connect to is no longer able to sent profiling data,\nbecause another server was already connected to it.\nYou can do the following:\n\n 1. Restart the client application.\n 2. Rebuild the client application with on-demand mode enabled.\n" );
return 2;
}
if( handshake == tracy::HandshakeDropped )
{
printf( "\nThe client you are trying to connect to has disconnected during the initial\nconnection handshake. Please check your network configuration.\n" );
return 3;
}
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
}
printf( "\nQueue delay: %s\nTimer resolution: %s\n", tracy::TimeToString( worker.GetDelay() ), tracy::TimeToString( worker.GetResolution() ) );
tracy::Worker worker( address );
while( !worker.HasData() ) std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
printf( "\nQueue delay: %s\nTimer resolution: %s\n", TimeToString( worker.GetDelay() ), TimeToString( worker.GetResolution() ) );
#ifdef _WIN32
signal( SIGINT, SigInt );
#else
struct sigaction sigint, oldsigint;
memset( &sigint, 0, sizeof( sigint ) );
sigint.sa_handler = SigInt;
sigaction( SIGINT, &sigint, &oldsigint );
#endif
const auto firstTime = worker.GetFirstTime();
auto& lock = worker.GetMbpsDataLock();
const auto t0 = std::chrono::high_resolution_clock::now();
while( worker.IsConnected() )
{
// Relaxed order is sufficient here because `s_disconnect` is only ever
// set by this thread or by the SigInt handler, and that handler does
// nothing else than storing `s_disconnect`.
if( s_disconnect.load( std::memory_order_relaxed ) )
{
worker.Disconnect();
// Relaxed order is sufficient because only this thread ever reads
// this value.
s_disconnect.store(false, std::memory_order_relaxed );
break;
}
lock.lock();
const auto mbps = worker.GetMbpsData().back();
const auto compRatio = worker.GetCompRatio();
const auto netTotal = worker.GetDataTransferred();
lock.unlock();
// Output progress info only if destination is a TTY to avoid bloating
// log files (so this is not just about usage of ANSI color codes).
if( IsStdoutATerminal() )
if( mbps < 0.1f )
{
const char* unit = "Mbps";
float unitsPerMbps = 1.f;
if( mbps < 0.1f )
{
unit = "Kbps";
unitsPerMbps = 1000.f;
}
AnsiPrintf( ANSI_ERASE_LINE ANSI_CYAN ANSI_BOLD, "\r%7.2f %s", mbps * unitsPerMbps, unit );
printf( " /");
AnsiPrintf( ANSI_CYAN ANSI_BOLD, "%5.1f%%", compRatio * 100.f );
printf( " =");
AnsiPrintf( ANSI_YELLOW ANSI_BOLD, "%7.2f Mbps", mbps / compRatio );
printf( " | ");
AnsiPrintf( ANSI_YELLOW, "Tx: ");
AnsiPrintf( ANSI_GREEN, "%s", tracy::MemSizeToString( netTotal ) );
printf( " | ");
AnsiPrintf( ANSI_RED ANSI_BOLD, "%s", tracy::MemSizeToString( tracy::memUsage.load( std::memory_order_relaxed ) ) );
if( memoryLimit > 0 )
{
printf( " / " );
AnsiPrintf( ANSI_BLUE ANSI_BOLD, "%s", tracy::MemSizeToString( memoryLimit ) );
}
printf( " | ");
AnsiPrintf( ANSI_RED, "%s", tracy::TimeToString( worker.GetLastTime() - firstTime ) );
fflush( stdout );
printf( "\33[2K\r\033[36;1m%7.2f Kbps", mbps * 1000.f );
}
else
{
printf( "\33[2K\r\033[36;1m%7.2f Mbps", mbps );
}
printf( " \033[0m| Ratio: \033[36;1m%5.1f%% \033[0m| Real: \033[33;1m%7.2f Mbps \033[0m| Mem: \033[31;1m%.2f MB\033[0m", compRatio * 100.f, mbps / compRatio, tracy::memUsage.load( std::memory_order_relaxed ) / ( 1024.f * 1024.f ) );
fflush( stdout );
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
if( seconds != -1 )
{
const auto dur = std::chrono::high_resolution_clock::now() - t0;
if( std::chrono::duration_cast<std::chrono::seconds>(dur).count() >= seconds )
{
// Relaxed order is sufficient because only this thread ever reads
// this value.
s_disconnect.store(true, std::memory_order_relaxed );
}
}
}
const auto t1 = std::chrono::high_resolution_clock::now();
const auto& failure = worker.GetFailureType();
if( failure != tracy::Worker::Failure::None )
{
AnsiPrintf( ANSI_RED ANSI_BOLD, "\nInstrumentation failure: %s", tracy::Worker::GetFailureString( failure ) );
auto& fd = worker.GetFailureData();
if( !fd.message.empty() )
{
printf( "\nContext: %s", fd.message.c_str() );
}
if( fd.callstack != 0 )
{
AnsiPrintf( ANSI_BOLD, "\n%sFailure callstack:%s\n" );
auto& cs = worker.GetCallstack( fd.callstack );
int fidx = 0;
for( auto& entry : cs )
{
auto frameData = worker.GetCallstackFrame( entry );
if( !frameData )
{
printf( "%3i. %p\n", fidx++, (void*)worker.GetCanonicalPointer( entry ) );
}
else
{
const auto fsz = frameData->size;
for( uint8_t f=0; f<fsz; f++ )
{
const auto& frame = frameData->data[f];
auto txt = worker.GetString( frame.name );
if( fidx == 0 && f != fsz-1 )
{
auto test = tracy::s_tracyStackFrames;
bool match = false;
do
{
if( strcmp( txt, *test ) == 0 )
{
match = true;
break;
}
}
while( *++test );
if( match ) continue;
}
if( f == fsz-1 )
{
printf( "%3i. ", fidx++ );
}
else
{
AnsiPrintf( ANSI_BLACK ANSI_BOLD, "inl. " );
}
AnsiPrintf( ANSI_CYAN, "%s ", txt );
txt = worker.GetString( frame.file );
if( frame.line == 0 )
{
AnsiPrintf( ANSI_YELLOW, "(%s)", txt );
}
else
{
AnsiPrintf( ANSI_YELLOW, "(%s:%" PRIu32 ")", txt, frame.line );
}
if( frameData->imageName.Active() )
{
AnsiPrintf( ANSI_MAGENTA, " %s\n", worker.GetString( frameData->imageName ) );
}
else
{
printf( "\n" );
}
}
}
}
}
}
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nElapsed time: %s\nSaving trace...",
worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() - firstTime ), tracy::RealToString( worker.GetZoneCount() ),
tracy::TimeToString( std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count() ) );
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nSaving trace...", worker.GetFrameCount( *worker.GetFramesBase() ), TimeToString( worker.GetLastTime() - worker.GetTimeBegin() ), RealToString( worker.GetZoneCount(), true ) );
fflush( stdout );
auto f = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output, tracy::FileCompression::Zstd, 3, 4 ) );
auto f = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output ) );
if( f )
{
worker.Write( *f, false );
AnsiPrintf( ANSI_GREEN ANSI_BOLD, " done!\n" );
f->Finish();
const auto stats = f->GetCompressionStatistics();
printf( "Trace size %s (%.2f%% ratio)\n", tracy::MemSizeToString( stats.second ), 100.f * stats.second / stats.first );
worker.Write( *f );
printf( " \033[32;1mdone!\033[0m\n" );
}
else
{
AnsiPrintf( ANSI_RED ANSI_BOLD, " failed!\n");
printf( " \033[31;1failed!\033[0m\n" );
}
return 0;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2012-2023, Kim Grasman <kim.grasman@gmail.com>
* Copyright (c) 2012-2017, Kim Grasman <kim.grasman@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -159,7 +159,6 @@ int getopt_long(int argc, char* const argv[], const char* optstring,
const struct option* match = NULL;
int num_matches = 0;
size_t argument_name_length = 0;
size_t option_length = 0;
const char* current_argument = NULL;
int retval = -1;
@@ -176,16 +175,6 @@ int getopt_long(int argc, char* const argv[], const char* optstring,
current_argument = argv[optind] + 2;
argument_name_length = strcspn(current_argument, "=");
for (; o->name; ++o) {
/* Check for exact match first. */
option_length = strlen(o->name);
if (option_length == argument_name_length &&
strncmp(o->name, current_argument, option_length) == 0) {
match = o;
num_matches = 1;
break;
}
/* If not exact, count the number of abbreviated matches. */
if (strncmp(o->name, current_argument, argument_name_length) == 0) {
match = o;
++num_matches;

View File

@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2012-2023, Kim Grasman <kim.grasman@gmail.com>
* Copyright (c) 2012-2017, Kim Grasman <kim.grasman@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

253
client/TracyCallstack.cpp Normal file
View File

@@ -0,0 +1,253 @@
#include <stdio.h>
#include "TracyCallstack.hpp"
#ifdef TRACY_HAS_CALLSTACK
#if TRACY_HAS_CALLSTACK == 1
# include <windows.h>
# pragma warning( push )
# pragma warning( disable : 4091 )
# include <dbghelp.h>
# pragma warning( pop )
#elif TRACY_HAS_CALLSTACK >= 2
# include <dlfcn.h>
# include <cxxabi.h>
#endif
namespace tracy
{
#if TRACY_HAS_CALLSTACK == 1
extern "C" t_RtlWalkFrameChain RtlWalkFrameChain = 0;
void InitCallstack()
{
#ifdef UNICODE
RtlWalkFrameChain = (t_RtlWalkFrameChain)GetProcAddress( GetModuleHandle( L"ntdll.dll" ), "RtlWalkFrameChain" );
#else
RtlWalkFrameChain = (t_RtlWalkFrameChain)GetProcAddress( GetModuleHandle( "ntdll.dll" ), "RtlWalkFrameChain" );
#endif
SymInitialize( GetCurrentProcess(), nullptr, true );
SymSetOptions( SYMOPT_LOAD_LINES );
}
CallstackEntry DecodeCallstackPtr( uint64_t ptr )
{
CallstackEntry ret;
const auto proc = GetCurrentProcess();
char buf[sizeof( SYMBOL_INFO ) + 1024];
auto si = (SYMBOL_INFO*)buf;
si->SizeOfStruct = sizeof( SYMBOL_INFO );
si->MaxNameLen = 1024;
if( SymFromAddr( proc, ptr, nullptr, si ) == 0 )
{
memcpy( si->Name, "[unknown]", 10 );
si->NameLen = 9;
}
auto name = (char*)tracy_malloc( si->NameLen + 1 );
memcpy( name, si->Name, si->NameLen );
name[si->NameLen] = '\0';
ret.name = name;
const char* filename;
IMAGEHLP_LINE64 line;
DWORD displacement = 0;
line.SizeOfStruct = sizeof( IMAGEHLP_LINE64 );
if( SymGetLineFromAddr64( proc, ptr, &displacement, &line ) == 0 )
{
filename = "[unknown]";
ret.line = 0;
}
else
{
filename = line.FileName;
ret.line = line.LineNumber;
}
const auto fsz = strlen( filename );
auto file = (char*)tracy_malloc( fsz + 1 );
memcpy( file, filename, fsz );
file[fsz] = '\0';
ret.file = file;
return ret;
}
#elif TRACY_HAS_CALLSTACK == 2
CallstackEntry DecodeCallstackPtr( uint64_t ptr )
{
CallstackEntry ret;
ret.line = 0;
char* demangled = nullptr;
const char* symname = nullptr;
const char* symloc = nullptr;
auto vptr = (void*)ptr;
char** sym = nullptr;
ptrdiff_t symoff = 0;
Dl_info dlinfo;
if( dladdr( vptr, &dlinfo ) )
{
symloc = dlinfo.dli_fname;
symname = dlinfo.dli_sname;
symoff = (char*)ptr - (char*)dlinfo.dli_saddr;
if( symname && symname[0] == '_' )
{
size_t len = 0;
int status;
demangled = abi::__cxa_demangle( symname, nullptr, &len, &status );
if( status == 0 )
{
symname = demangled;
}
}
}
if( !symname )
{
symname = "[unknown]";
}
if( !symloc )
{
symloc = "[unknown]";
}
if( symoff == 0 )
{
const auto namelen = strlen( symname );
auto name = (char*)tracy_malloc( namelen + 1 );
memcpy( name, symname, namelen );
name[namelen] = '\0';
ret.name = name;
}
else
{
char buf[32];
sprintf( buf, " + %td", symoff );
const auto offlen = strlen( buf );
const auto namelen = strlen( symname );
auto name = (char*)tracy_malloc( namelen + offlen + 1 );
memcpy( name, symname, namelen );
memcpy( name + namelen, buf, offlen );
name[namelen + offlen] = '\0';
ret.name = name;
}
char buf[32];
sprintf( buf, " [%p]", (void*)ptr );
const auto addrlen = strlen( buf );
const auto loclen = strlen( symloc );
auto loc = (char*)tracy_malloc( loclen + addrlen + 1 );
memcpy( loc, symloc, loclen );
memcpy( loc + loclen, buf, addrlen );
loc[loclen + addrlen] = '\0';
ret.file = loc;
if( sym ) free( sym );
if( demangled ) free( demangled );
return ret;
}
#elif TRACY_HAS_CALLSTACK == 3
CallstackEntry DecodeCallstackPtr( uint64_t ptr )
{
CallstackEntry ret;
ret.line = 0;
char* demangled = nullptr;
const char* symname = nullptr;
const char* symloc = nullptr;
auto vptr = (void*)ptr;
char** sym = nullptr;
ptrdiff_t symoff = 0;
Dl_info dlinfo;
if( dladdr( vptr, &dlinfo ) )
{
symloc = dlinfo.dli_fname;
symname = dlinfo.dli_sname;
symoff = (char*)ptr - (char*)dlinfo.dli_saddr;
if( symname && symname[0] == '_' )
{
size_t len = 0;
int status;
demangled = abi::__cxa_demangle( symname, nullptr, &len, &status );
if( status == 0 )
{
symname = demangled;
}
}
}
if( !symname )
{
sym = backtrace_symbols( &vptr, 1 );
if( !sym )
{
symname = "[unknown]";
}
else
{
symname = *sym;
}
}
if( !symloc )
{
symloc = "[unknown]";
}
if( symoff == 0 )
{
const auto namelen = strlen( symname );
auto name = (char*)tracy_malloc( namelen + 1 );
memcpy( name, symname, namelen );
name[namelen] = '\0';
ret.name = name;
}
else
{
char buf[32];
sprintf( buf, " + %td", symoff );
const auto offlen = strlen( buf );
const auto namelen = strlen( symname );
auto name = (char*)tracy_malloc( namelen + offlen + 1 );
memcpy( name, symname, namelen );
memcpy( name + namelen, buf, offlen );
name[namelen + offlen] = '\0';
ret.name = name;
}
char buf[32];
sprintf( buf, " [%p]", (void*)ptr );
const auto addrlen = strlen( buf );
const auto loclen = strlen( symloc );
auto loc = (char*)tracy_malloc( loclen + addrlen + 1 );
memcpy( loc, symloc, loclen );
memcpy( loc + loclen, buf, addrlen );
loc[loclen + addrlen] = '\0';
ret.file = loc;
if( sym ) free( sym );
if( demangled ) free( demangled );
return ret;
}
#endif
}
#endif

117
client/TracyCallstack.hpp Normal file
View File

@@ -0,0 +1,117 @@
#ifndef __TRACYCALLSTACK_HPP__
#define __TRACYCALLSTACK_HPP__
#if defined _WIN32 || defined __CYGWIN__
# define TRACY_HAS_CALLSTACK 1
extern "C"
{
typedef unsigned long (__stdcall *t_RtlWalkFrameChain)( void**, unsigned long, unsigned long );
extern t_RtlWalkFrameChain RtlWalkFrameChain;
}
#elif defined __ANDROID__
# define TRACY_HAS_CALLSTACK 2
# include <unwind.h>
#elif defined __linux
# if defined _GNU_SOURCE && defined __GLIBC__
# define TRACY_HAS_CALLSTACK 3
# include <execinfo.h>
# else
# define TRACY_HAS_CALLSTACK 2
# include <unwind.h>
# endif
#endif
#ifdef TRACY_HAS_CALLSTACK
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "../common/TracyAlloc.hpp"
#include "../common/TracyForceInline.hpp"
namespace tracy
{
struct CallstackEntry
{
const char* name;
const char* file;
uint32_t line;
};
CallstackEntry DecodeCallstackPtr( uint64_t ptr );
#if TRACY_HAS_CALLSTACK == 1
void InitCallstack();
static tracy_force_inline void* Callstack( int depth )
{
assert( depth >= 1 && depth < 63 );
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
const auto num = RtlWalkFrameChain( (void**)( trace + 1 ), depth, 0 );
*trace = num;
return trace;
}
#elif TRACY_HAS_CALLSTACK == 2
static tracy_force_inline void InitCallstack() {}
struct BacktraceState
{
void** current;
void** end;
};
static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, void* arg )
{
auto state = (BacktraceState*)arg;
uintptr_t pc = _Unwind_GetIP( ctx );
if( pc )
{
if( state->current == state->end ) return _URC_END_OF_STACK;
*state->current++ = (void*)pc;
}
return _URC_NO_REASON;
}
static tracy_force_inline void* Callstack( int depth )
{
assert( depth >= 1 && depth < 63 );
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
BacktraceState state = { (void**)(trace+1), (void**)(trace+1+depth) };
_Unwind_Backtrace( tracy_unwind_callback, &state );
*trace = (uintptr_t*)state.current - trace + 1;
return trace;
}
#elif TRACY_HAS_CALLSTACK == 3
static tracy_force_inline void InitCallstack() {}
static tracy_force_inline void* Callstack( int depth )
{
assert( depth >= 1 );
auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) );
const auto num = backtrace( (void**)(trace+1), depth );
*trace = num;
return trace;
}
#endif
}
#endif
#endif

View File

@@ -1,7 +1,6 @@
#ifndef __TRACYFASTVECTOR_HPP__
#define __TRACYFASTVECTOR_HPP__
#include <assert.h>
#include <stddef.h>
#include "../common/TracyAlloc.hpp"
@@ -22,7 +21,6 @@ public:
, m_write( m_ptr )
, m_end( m_ptr + capacity )
{
assert( capacity != 0 );
}
FastVector( const FastVector& ) = delete;
@@ -98,11 +96,11 @@ public:
private:
tracy_no_inline void AllocMore()
{
const auto cap = size_t( m_end - m_ptr ) * 2;
const auto size = size_t( m_write - m_ptr );
const auto cap = ( m_end - m_ptr ) * 2;
const auto size = m_write - m_ptr;
T* ptr = (T*)tracy_malloc( sizeof( T ) * cap );
memcpy( ptr, m_ptr, size * sizeof( T ) );
tracy_free_fast( m_ptr );
tracy_free( m_ptr );
m_ptr = ptr;
m_write = m_ptr + size;
m_end = m_ptr + cap;

473
client/TracyLock.hpp Normal file
View File

@@ -0,0 +1,473 @@
#ifndef __TRACYLOCK_HPP__
#define __TRACYLOCK_HPP__
#include <atomic>
#include <limits>
#include "../common/TracySystem.hpp"
#include "../common/TracyAlign.hpp"
#include "TracyProfiler.hpp"
namespace tracy
{
extern std::atomic<uint32_t> s_lockCounter;
template<class T>
class Lockable
{
public:
tracy_force_inline Lockable( const SourceLocationData* srcloc )
: m_id( s_lockCounter.fetch_add( 1, std::memory_order_relaxed ) )
#ifdef TRACY_ON_DEMAND
, m_lockCount( 0 )
, m_active( false )
#endif
{
assert( m_id != std::numeric_limits<uint32_t>::max() );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
MemWrite( &item->lockAnnounce.type, LockType::Lockable );
#ifdef TRACY_ON_DEMAND
s_profiler.DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
}
Lockable( const Lockable& ) = delete;
Lockable& operator=( const Lockable& ) = delete;
tracy_force_inline void lock()
{
#ifdef TRACY_ON_DEMAND
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = s_profiler.IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue )
{
m_lockable.lock();
return;
}
#endif
const auto thread = GetThreadHandle();
{
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockWait );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.thread, thread );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::Lockable );
tail.store( magic + 1, std::memory_order_release );
}
m_lockable.lock();
{
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.thread, thread );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
}
tracy_force_inline void unlock()
{
m_lockable.unlock();
#ifdef TRACY_ON_DEMAND
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
if( !m_active.load( std::memory_order_relaxed ) ) return;
if( !s_profiler.IsConnected() )
{
m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockRelease );
MemWrite( &item->lockRelease.id, m_id );
MemWrite( &item->lockRelease.thread, GetThreadHandle() );
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline bool try_lock()
{
const auto ret = m_lockable.try_lock();
#ifdef TRACY_ON_DEMAND
if( !ret ) return ret;
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = s_profiler.IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return ret;
#endif
if( ret )
{
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
return ret;
}
tracy_force_inline void Mark( const SourceLocationData* srcloc )
{
#ifdef TRACY_ON_DEMAND
const auto active = m_active.load( std::memory_order_relaxed );
if( !active ) return;
const auto connected = s_profiler.IsConnected();
if( !connected )
{
if( active ) m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockMark );
MemWrite( &item->lockMark.id, m_id );
MemWrite( &item->lockMark.thread, GetThreadHandle() );
MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc );
tail.store( magic + 1, std::memory_order_release );
}
private:
T m_lockable;
uint32_t m_id;
#ifdef TRACY_ON_DEMAND
std::atomic<uint32_t> m_lockCount;
std::atomic<bool> m_active;
#endif
};
template<class T>
class SharedLockable
{
public:
tracy_force_inline SharedLockable( const SourceLocationData* srcloc )
: m_id( s_lockCounter.fetch_add( 1, std::memory_order_relaxed ) )
#ifdef TRACY_ON_DEMAND
, m_lockCount( 0 )
, m_active( false )
#endif
{
assert( m_id != std::numeric_limits<uint32_t>::max() );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
MemWrite( &item->lockAnnounce.type, LockType::SharedLockable );
#ifdef TRACY_ON_DEMAND
s_profiler.DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
}
SharedLockable( const SharedLockable& ) = delete;
SharedLockable& operator=( const SharedLockable& ) = delete;
tracy_force_inline void lock()
{
#ifdef TRACY_ON_DEMAND
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = s_profiler.IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue )
{
m_lockable.lock();
return;
}
#endif
const auto thread = GetThreadHandle();
{
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockWait );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.thread, thread );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::SharedLockable );
tail.store( magic + 1, std::memory_order_release );
}
m_lockable.lock();
{
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.thread, thread );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
}
tracy_force_inline void unlock()
{
m_lockable.unlock();
#ifdef TRACY_ON_DEMAND
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
if( !m_active.load( std::memory_order_relaxed ) ) return;
if( !s_profiler.IsConnected() )
{
m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockRelease );
MemWrite( &item->lockRelease.id, m_id );
MemWrite( &item->lockRelease.thread, GetThreadHandle() );
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline bool try_lock()
{
const auto ret = m_lockable.try_lock();
#ifdef TRACY_ON_DEMAND
if( !ret ) return ret;
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = s_profiler.IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return ret;
#endif
if( ret )
{
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
return ret;
}
tracy_force_inline void lock_shared()
{
#ifdef TRACY_ON_DEMAND
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = s_profiler.IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue )
{
m_lockable.lock_shared();
return;
}
#endif
const auto thread = GetThreadHandle();
{
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockSharedWait );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.thread, thread );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::SharedLockable );
tail.store( magic + 1, std::memory_order_release );
}
m_lockable.lock_shared();
{
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockSharedObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.thread, thread );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
}
tracy_force_inline void unlock_shared()
{
m_lockable.unlock_shared();
#ifdef TRACY_ON_DEMAND
m_lockCount.fetch_sub( 1, std::memory_order_relaxed );
if( !m_active.load( std::memory_order_relaxed ) ) return;
if( !s_profiler.IsConnected() )
{
m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockSharedRelease );
MemWrite( &item->lockRelease.id, m_id );
MemWrite( &item->lockRelease.thread, GetThreadHandle() );
MemWrite( &item->lockRelease.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline bool try_lock_shared()
{
const auto ret = m_lockable.try_lock_shared();
#ifdef TRACY_ON_DEMAND
if( !ret ) return ret;
bool queue = false;
const auto locks = m_lockCount.fetch_add( 1, std::memory_order_relaxed );
const auto active = m_active.load( std::memory_order_relaxed );
if( locks == 0 || active )
{
const bool connected = s_profiler.IsConnected();
if( active != connected ) m_active.store( connected, std::memory_order_relaxed );
if( connected ) queue = true;
}
if( !queue ) return ret;
#endif
if( ret )
{
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockSharedObtain );
MemWrite( &item->lockObtain.id, m_id );
MemWrite( &item->lockObtain.thread, GetThreadHandle() );
MemWrite( &item->lockObtain.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
}
return ret;
}
tracy_force_inline void Mark( const SourceLocationData* srcloc )
{
#ifdef TRACY_ON_DEMAND
const auto active = m_active.load( std::memory_order_relaxed );
if( !active ) return;
const auto connected = s_profiler.IsConnected();
if( !connected )
{
if( active ) m_active.store( false, std::memory_order_relaxed );
return;
}
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::LockMark );
MemWrite( &item->lockMark.id, m_id );
MemWrite( &item->lockMark.thread, GetThreadHandle() );
MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc );
tail.store( magic + 1, std::memory_order_release );
}
private:
T m_lockable;
uint32_t m_id;
#ifdef TRACY_ON_DEMAND
std::atomic<uint32_t> m_lockCount;
std::atomic<bool> m_active;
#endif
};
};
#endif

1588
client/TracyProfiler.cpp Normal file

File diff suppressed because it is too large Load Diff

459
client/TracyProfiler.hpp Normal file
View File

@@ -0,0 +1,459 @@
#ifndef __TRACYPROFILER_HPP__
#define __TRACYPROFILER_HPP__
#include <assert.h>
#include <atomic>
#include <chrono>
#include <stdint.h>
#include <string.h>
#include "concurrentqueue.h"
#include "TracyCallstack.hpp"
#include "TracyFastVector.hpp"
#include "../common/tracy_lz4.hpp"
#include "../common/TracyQueue.hpp"
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
#include "../common/TracyMutex.hpp"
#include "../common/TracySystem.hpp"
#if defined _MSC_VER || defined __CYGWIN__
# include <intrin.h>
#endif
#if defined _MSC_VER || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ ) || __ARM_ARCH >= 6
# define TRACY_HW_TIMER
# if defined _MSC_VER || defined __CYGWIN__
// Enable optimization for MSVC __rdtscp() intrin, saving one LHS of a cpu value on the stack.
// This comes at the cost of an unaligned memory write.
# define TRACY_RDTSCP_OPT
# endif
#endif
#define TracyConcat(x,y) TracyConcatIndirect(x,y)
#define TracyConcatIndirect(x,y) x##y
namespace tracy
{
class Socket;
struct SourceLocationData
{
const char* name;
const char* function;
const char* file;
uint32_t line;
uint32_t color;
};
struct ProducerWrapper
{
tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
};
extern thread_local ProducerWrapper s_token;
class GpuCtx;
struct GpuCtxWrapper
{
GpuCtx* ptr;
};
class VkCtx;
struct VkCtxWrapper
{
VkCtx* ptr;
};
#ifdef TRACY_ON_DEMAND
struct LuaZoneState
{
uint32_t counter;
bool active;
};
#endif
using Magic = tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t;
#if __ARM_ARCH >= 6
extern int64_t (*GetTimeImpl)();
#endif
class Profiler;
extern Profiler& s_profiler;
class Profiler
{
public:
Profiler();
~Profiler();
static tracy_force_inline int64_t GetTime( uint32_t& cpu )
{
#ifdef TRACY_HW_TIMER
# if __ARM_ARCH >= 6
cpu = 0xFFFFFFFF;
return GetTimeImpl();
# elif defined _MSC_VER || defined __CYGWIN__
const auto t = int64_t( __rdtscp( &cpu ) );
return t;
# elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
uint32_t eax, edx;
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (cpu) :: );
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
# endif
#else
cpu = 0xFFFFFFFF;
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
#endif
}
static tracy_force_inline int64_t GetTime()
{
#ifdef TRACY_HW_TIMER
# if __ARM_ARCH >= 6
return GetTimeImpl();
# elif defined _MSC_VER || defined __CYGWIN__
unsigned int dontcare;
const auto t = int64_t( __rdtscp( &dontcare ) );
return t;
# elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
uint32_t eax, edx;
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx) :: "%ecx" );
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
# endif
#else
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
#endif
}
static tracy_force_inline void SendFrameMark()
{
#ifdef TRACY_ON_DEMAND
s_profiler.m_frameCount.fetch_add( 1, std::memory_order_relaxed );
if( !s_profiler.IsConnected() ) return;
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::FrameMarkMsg );
MemWrite( &item->frameMark.time, GetTime() );
MemWrite( &item->frameMark.name, uint64_t( 0 ) );
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void SendFrameMark( const char* name, QueueType type )
{
assert( type == QueueType::FrameMarkMsg || type == QueueType::FrameMarkMsgStart || type == QueueType::FrameMarkMsgEnd );
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, type );
MemWrite( &item->frameMark.time, GetTime() );
MemWrite( &item->frameMark.name, uint64_t( name ) );
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void PlotData( const char* name, int64_t val )
{
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::PlotData );
MemWrite( &item->plotData.name, (uint64_t)name );
MemWrite( &item->plotData.time, GetTime() );
MemWrite( &item->plotData.type, PlotDataType::Int );
MemWrite( &item->plotData.data.i, val );
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void PlotData( const char* name, float val )
{
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::PlotData );
MemWrite( &item->plotData.name, (uint64_t)name );
MemWrite( &item->plotData.time, GetTime() );
MemWrite( &item->plotData.type, PlotDataType::Float );
MemWrite( &item->plotData.data.f, val );
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void PlotData( const char* name, double val )
{
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::PlotData );
MemWrite( &item->plotData.name, (uint64_t)name );
MemWrite( &item->plotData.time, GetTime() );
MemWrite( &item->plotData.type, PlotDataType::Double );
MemWrite( &item->plotData.data.d, val );
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void Message( const char* txt, size_t size )
{
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
#endif
Magic magic;
auto& token = s_token.ptr;
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::Message );
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.thread, GetThreadHandle() );
MemWrite( &item->message.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void Message( const char* txt )
{
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
#endif
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::MessageLiteral );
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.thread, GetThreadHandle() );
MemWrite( &item->message.text, (uint64_t)txt );
tail.store( magic + 1, std::memory_order_release );
}
static tracy_force_inline void MemAlloc( const void* ptr, size_t size )
{
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
#endif
const auto thread = GetThreadHandle();
s_profiler.m_serialLock.lock();
SendMemAlloc( QueueType::MemAlloc, thread, ptr, size );
s_profiler.m_serialLock.unlock();
}
static tracy_force_inline void MemFree( const void* ptr )
{
#ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
#endif
const auto thread = GetThreadHandle();
s_profiler.m_serialLock.lock();
SendMemFree( QueueType::MemFree, thread, ptr );
s_profiler.m_serialLock.unlock();
}
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth )
{
#ifdef TRACY_HAS_CALLSTACK
# ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
rpmalloc_thread_initialize();
auto callstack = Callstack( depth );
s_profiler.m_serialLock.lock();
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
SendCallstackMemory( callstack );
s_profiler.m_serialLock.unlock();
#else
MemAlloc( ptr, size );
#endif
}
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth )
{
#ifdef TRACY_HAS_CALLSTACK
# ifdef TRACY_ON_DEMAND
if( !s_profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
rpmalloc_thread_initialize();
auto callstack = Callstack( depth );
s_profiler.m_serialLock.lock();
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
SendCallstackMemory( callstack );
s_profiler.m_serialLock.unlock();
#else
MemFree( ptr );
#endif
}
static tracy_force_inline void SendCallstack( int depth, uint64_t thread )
{
#ifdef TRACY_HAS_CALLSTACK
auto ptr = Callstack( depth );
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::Callstack );
MemWrite( &item->callstack.ptr, ptr );
MemWrite( &item->callstack.thread, thread );
tail.store( magic + 1, std::memory_order_release );
#endif
}
void SendCallstack( int depth, uint64_t thread, const char* skipBefore );
static bool ShouldExit();
#ifdef TRACY_ON_DEMAND
tracy_force_inline bool IsConnected()
{
return m_isConnected.load( std::memory_order_relaxed );
}
tracy_force_inline void DeferItem( const QueueItem& item )
{
m_deferredLock.lock();
auto dst = m_deferredQueue.push_next();
memcpy( dst, &item, sizeof( item ) );
m_deferredLock.unlock();
}
#endif
void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); }
bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); }
private:
enum DequeueStatus { Success, ConnectionLost, QueueEmpty };
static void LaunchWorker( void* ptr ) { ((Profiler*)ptr)->Worker(); }
void Worker();
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
DequeueStatus DequeueSerial();
bool AppendData( const void* data, size_t len );
bool CommitData();
bool NeedDataSize( size_t len );
tracy_force_inline void AppendDataUnsafe( const void* data, size_t len )
{
memcpy( m_buffer + m_bufferOffset, data, len );
m_bufferOffset += int( len );
}
bool SendData( const char* data, size_t len );
void SendString( uint64_t ptr, const char* str, QueueType type );
void SendSourceLocation( uint64_t ptr );
void SendSourceLocationPayload( uint64_t ptr );
void SendCallstackPayload( uint64_t ptr );
void SendCallstackFrame( uint64_t ptr );
bool HandleServerQuery();
void CalibrateTimer();
void CalibrateDelay();
static tracy_force_inline void SendCallstackMemory( void* ptr )
{
#ifdef TRACY_HAS_CALLSTACK
auto item = s_profiler.m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, QueueType::CallstackMemory );
MemWrite( &item->callstackMemory.ptr, (uint64_t)ptr );
s_profiler.m_serialQueue.commit_next();
#endif
}
static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size )
{
assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack );
auto item = s_profiler.m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, type );
MemWrite( &item->memAlloc.time, GetTime() );
MemWrite( &item->memAlloc.thread, thread );
MemWrite( &item->memAlloc.ptr, (uint64_t)ptr );
if( compile_time_condition<sizeof( size ) == 4>::value )
{
memcpy( &item->memAlloc.size, &size, 4 );
memset( &item->memAlloc.size + 4, 0, 2 );
}
else
{
assert( sizeof( size ) == 8 );
memcpy( &item->memAlloc.size, &size, 6 );
}
s_profiler.m_serialQueue.commit_next();
}
static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr )
{
assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack );
auto item = s_profiler.m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, type );
MemWrite( &item->memFree.time, GetTime() );
MemWrite( &item->memFree.thread, thread );
MemWrite( &item->memFree.ptr, (uint64_t)ptr );
s_profiler.m_serialQueue.commit_next();
}
double m_timerMul;
uint64_t m_resolution;
uint64_t m_delay;
std::atomic<int64_t> m_timeBegin;
uint64_t m_mainThread;
uint64_t m_epoch;
std::atomic<bool> m_shutdown;
std::atomic<bool> m_shutdownManual;
std::atomic<bool> m_shutdownFinished;
Socket* m_sock;
bool m_noExit;
LZ4_stream_t* m_stream;
char* m_buffer;
int m_bufferOffset;
int m_bufferStart;
QueueItem* m_itemBuf;
char* m_lz4Buf;
FastVector<QueueItem> m_serialQueue, m_serialDequeue;
TracyMutex m_serialLock;
#ifdef TRACY_ON_DEMAND
std::atomic<bool> m_isConnected;
std::atomic<uint64_t> m_frameCount;
TracyMutex m_deferredLock;
FastVector<QueueItem> m_deferredQueue;
#endif
};
};
#endif

132
client/TracyScoped.hpp Normal file
View File

@@ -0,0 +1,132 @@
#ifndef __TRACYSCOPED_HPP__
#define __TRACYSCOPED_HPP__
#include <stdint.h>
#include <string.h>
#include "../common/TracySystem.hpp"
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
#include "TracyProfiler.hpp"
namespace tracy
{
class ScopedZone
{
public:
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( s_profiler.IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto thread = GetThreadHandle();
m_thread = thread;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
MemWrite( &item->zoneBegin.thread, thread );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( s_profiler.IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
const auto thread = GetThreadHandle();
m_thread = thread;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginCallstack );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneBegin.time, Profiler::GetTime( item->zoneBegin.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneBegin.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneBegin.cpu, cpu );
#endif
MemWrite( &item->zoneBegin.thread, thread );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
tail.store( magic + 1, std::memory_order_release );
s_profiler.SendCallstack( depth, thread );
}
tracy_force_inline ~ScopedZone()
{
if( !m_active ) return;
Magic magic;
auto& token = s_token.ptr;
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
#ifdef TRACY_RDTSCP_OPT
MemWrite( &item->zoneEnd.time, Profiler::GetTime( item->zoneEnd.cpu ) );
#else
uint32_t cpu;
MemWrite( &item->zoneEnd.time, Profiler::GetTime( cpu ) );
MemWrite( &item->zoneEnd.cpu, cpu );
#endif
MemWrite( &item->zoneEnd.thread, m_thread );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline void Text( const char* txt, size_t size )
{
if( !m_active ) return;
Magic magic;
auto& token = s_token.ptr;
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneText );
MemWrite( &item->zoneText.thread, m_thread );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
}
tracy_force_inline void Name( const char* txt, size_t size )
{
if( !m_active ) return;
Magic magic;
auto& token = s_token.ptr;
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin<tracy::moodycamel::CanAlloc>( magic );
MemWrite( &item->hdr.type, QueueType::ZoneName );
MemWrite( &item->zoneText.thread, m_thread );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
}
private:
uint64_t m_thread;
const bool m_active;
};
}
#endif

View File

@@ -1,36 +1,16 @@
#ifndef __TRACYTHREAD_HPP__
#define __TRACYTHREAD_HPP__
#if defined _WIN32
#ifdef _MSC_VER
# include <windows.h>
#else
# include <pthread.h>
#endif
#ifdef TRACY_MANUAL_LIFETIME
# include "tracy_rpmalloc.hpp"
#endif
namespace tracy
{
#ifdef TRACY_MANUAL_LIFETIME
extern thread_local bool RpThreadInitDone;
#endif
class ThreadExitHandler
{
public:
~ThreadExitHandler()
{
#ifdef TRACY_MANUAL_LIFETIME
rpmalloc_thread_finalize( 1 );
RpThreadInitDone = false;
#endif
}
};
#if defined _WIN32
#ifdef _MSC_VER
class Thread
{

3697
client/concurrentqueue.h Normal file

File diff suppressed because it is too large Load Diff

2096
client/tracy_rpmalloc.cpp Normal file

File diff suppressed because it is too large Load Diff

151
client/tracy_rpmalloc.hpp Normal file
View File

@@ -0,0 +1,151 @@
/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson / Rampant Pixels
*
* This library provides a cross-platform lock free thread caching malloc implementation in C11.
* The latest source code is always available at
*
* https://github.com/rampantpixels/rpmalloc
*
* This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
*
*/
#pragma once
#include <stddef.h>
namespace tracy
{
#if defined(__clang__) || defined(__GNUC__)
# define RPMALLOC_ATTRIBUTE __attribute__((__malloc__))
# define RPMALLOC_RESTRICT
# define RPMALLOC_CDECL
#elif defined(_MSC_VER)
# define RPMALLOC_ATTRIBUTE
# define RPMALLOC_RESTRICT __declspec(restrict)
# define RPMALLOC_CDECL __cdecl
#else
# define RPMALLOC_ATTRIBUTE
# define RPMALLOC_RESTRICT
# define RPMALLOC_CDECL
#endif
//! Flag to rpaligned_realloc to not preserve content in reallocation
#define RPMALLOC_NO_PRESERVE 1
typedef struct rpmalloc_global_statistics_t {
//! Current amount of virtual memory mapped (only if ENABLE_STATISTICS=1)
size_t mapped;
//! Current amount of memory in global caches for small and medium sizes (<64KiB)
size_t cached;
//! Total amount of memory mapped (only if ENABLE_STATISTICS=1)
size_t mapped_total;
//! Total amount of memory unmapped (only if ENABLE_STATISTICS=1)
size_t unmapped_total;
} rpmalloc_global_statistics_t;
typedef struct rpmalloc_thread_statistics_t {
//! Current number of bytes available for allocation from active spans
size_t active;
//! Current number of bytes available in thread size class caches
size_t sizecache;
//! Current number of bytes available in thread span caches
size_t spancache;
//! Current number of bytes in pending deferred deallocations
size_t deferred;
//! Total number of bytes transitioned from thread cache to global cache
size_t thread_to_global;
//! Total number of bytes transitioned from global cache to thread cache
size_t global_to_thread;
} rpmalloc_thread_statistics_t;
typedef struct rpmalloc_config_t {
//! Map memory pages for the given number of bytes. The returned address MUST be
// aligned to the rpmalloc span size, which will always be a power of two.
// Optionally the function can store an alignment offset in the offset variable
// in case it performs alignment and the returned pointer is offset from the
// actual start of the memory region due to this alignment. The alignment offset
// will be passed to the memory unmap function. The alignment offset MUST NOT be
// larger than 65535 (storable in an uint16_t), if it is you must use natural
// alignment to shift it into 16 bits.
void* (*memory_map)(size_t size, size_t* offset);
//! Unmap the memory pages starting at address and spanning the given number of bytes.
// If release is set to 1, the unmap is for an entire span range as returned by
// a previous call to memory_map and that the entire range should be released.
// If release is set to 0, the unmap is a partial decommit of a subset of the mapped
// memory range.
void (*memory_unmap)(void* address, size_t size, size_t offset, int release);
//! Size of memory pages. The page size MUST be a power of two in [512,16384] range
// (2^9 to 2^14) unless 0 - set to 0 to use system page size. All memory mapping
// requests to memory_map will be made with size set to a multiple of the page size.
size_t page_size;
//! Size of a span of memory pages. MUST be a multiple of page size, and in [4096,262144]
// range (unless 0 - set to 0 to use the default span size).
size_t span_size;
//! Number of spans to map at each request to map new virtual memory blocks. This can
// be used to minimize the system call overhead at the cost of virtual memory address
// space. The extra mapped pages will not be written until actually used, so physical
// committed memory should not be affected in the default implementation.
size_t span_map_count;
//! Debug callback if memory guards are enabled. Called if a memory overwrite is detected
void (*memory_overwrite)(void* address);
} rpmalloc_config_t;
extern int
rpmalloc_initialize(void);
extern int
rpmalloc_initialize_config(const rpmalloc_config_t* config);
extern const rpmalloc_config_t*
rpmalloc_config(void);
extern void
rpmalloc_finalize(void);
extern void
rpmalloc_thread_initialize(void);
extern void
rpmalloc_thread_finalize(void);
extern void
rpmalloc_thread_collect(void);
extern int
rpmalloc_is_thread_initialized(void);
extern void
rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats);
extern void
rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats);
extern RPMALLOC_RESTRICT void*
rpmalloc(size_t size) RPMALLOC_ATTRIBUTE;
extern void
rpfree(void* ptr);
extern RPMALLOC_RESTRICT void*
rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIBUTE;
extern void*
rprealloc(void* ptr, size_t size);
extern void*
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags);
extern RPMALLOC_RESTRICT void*
rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
extern RPMALLOC_RESTRICT void*
rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
extern int
rpposix_memalign(void **memptr, size_t alignment, size_t size);
extern size_t
rpmalloc_usable_size(void* ptr);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,300 +0,0 @@
#.rst:
# ECMFindModuleHelpers
# --------------------
#
# Helper macros for find modules: ecm_find_package_version_check(),
# ecm_find_package_parse_components() and
# ecm_find_package_handle_library_components().
#
# ::
#
# ecm_find_package_version_check(<name>)
#
# Prints warnings if the CMake version or the project's required CMake version
# is older than that required by extra-cmake-modules.
#
# ::
#
# ecm_find_package_parse_components(<name>
# RESULT_VAR <variable>
# KNOWN_COMPONENTS <component1> [<component2> [...]]
# [SKIP_DEPENDENCY_HANDLING])
#
# This macro will populate <variable> with a list of components found in
# <name>_FIND_COMPONENTS, after checking that all those components are in the
# list of KNOWN_COMPONENTS; if there are any unknown components, it will print
# an error or warning (depending on the value of <name>_FIND_REQUIRED) and call
# return().
#
# The order of components in <variable> is guaranteed to match the order they
# are listed in the KNOWN_COMPONENTS argument.
#
# If SKIP_DEPENDENCY_HANDLING is not set, for each component the variable
# <name>_<component>_component_deps will be checked for dependent components.
# If <component> is listed in <name>_FIND_COMPONENTS, then all its (transitive)
# dependencies will also be added to <variable>.
#
# ::
#
# ecm_find_package_handle_library_components(<name>
# COMPONENTS <component> [<component> [...]]
# [SKIP_DEPENDENCY_HANDLING])
# [SKIP_PKG_CONFIG])
#
# Creates an imported library target for each component. The operation of this
# macro depends on the presence of a number of CMake variables.
#
# The <name>_<component>_lib variable should contain the name of this library,
# and <name>_<component>_header variable should contain the name of a header
# file associated with it (whatever relative path is normally passed to
# '#include'). <name>_<component>_header_subdir variable can be used to specify
# which subdirectory of the include path the headers will be found in.
# ecm_find_package_components() will then search for the library
# and include directory (creating appropriate cache variables) and create an
# imported library target named <name>::<component>.
#
# Additional variables can be used to provide additional information:
#
# If SKIP_PKG_CONFIG, the <name>_<component>_pkg_config variable is set, and
# pkg-config is found, the pkg-config module given by
# <name>_<component>_pkg_config will be searched for and used to help locate the
# library and header file. It will also be used to set
# <name>_<component>_VERSION.
#
# Note that if version information is found via pkg-config,
# <name>_<component>_FIND_VERSION can be set to require a particular version
# for each component.
#
# If SKIP_DEPENDENCY_HANDLING is not set, the INTERFACE_LINK_LIBRARIES property
# of the imported target for <component> will be set to contain the imported
# targets for the components listed in <name>_<component>_component_deps.
# <component>_FOUND will also be set to false if any of the components in
# <name>_<component>_component_deps are not found. This requires the components
# in <name>_<component>_component_deps to be listed before <component> in the
# COMPONENTS argument.
#
# The following variables will be set:
#
# ``<name>_TARGETS``
# the imported targets
# ``<name>_LIBRARIES``
# the found libraries
# ``<name>_INCLUDE_DIRS``
# the combined required include directories for the components
# ``<name>_DEFINITIONS``
# the "other" CFLAGS provided by pkg-config, if any
# ``<name>_VERSION``
# the value of ``<name>_<component>_VERSION`` for the first component that
# has this variable set (note that components are searched for in the order
# they are passed to the macro), although if it is already set, it will not
# be altered
#
# Note that these variables are never cleared, so if
# ecm_find_package_handle_library_components() is called multiple times with
# different components (typically because of multiple find_package() calls) then
# ``<name>_TARGETS``, for example, will contain all the targets found in any
# call (although no duplicates).
#
# Since pre-1.0.0.
#=============================================================================
# Copyright 2014 Alex Merry <alex.merry@kde.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
macro(ecm_find_package_version_check module_name)
if(CMAKE_VERSION VERSION_LESS 2.8.12)
message(FATAL_ERROR "CMake 2.8.12 is required by Find${module_name}.cmake")
endif()
if(CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 2.8.12)
message(AUTHOR_WARNING "Your project should require at least CMake 2.8.12 to use Find${module_name}.cmake")
endif()
endmacro()
macro(ecm_find_package_parse_components module_name)
set(ecm_fppc_options SKIP_DEPENDENCY_HANDLING)
set(ecm_fppc_oneValueArgs RESULT_VAR)
set(ecm_fppc_multiValueArgs KNOWN_COMPONENTS DEFAULT_COMPONENTS)
cmake_parse_arguments(ECM_FPPC "${ecm_fppc_options}" "${ecm_fppc_oneValueArgs}" "${ecm_fppc_multiValueArgs}" ${ARGN})
if(ECM_FPPC_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_parse_components: ${ECM_FPPC_UNPARSED_ARGUMENTS}")
endif()
if(NOT ECM_FPPC_RESULT_VAR)
message(FATAL_ERROR "Missing RESULT_VAR argument to ecm_find_package_parse_components")
endif()
if(NOT ECM_FPPC_KNOWN_COMPONENTS)
message(FATAL_ERROR "Missing KNOWN_COMPONENTS argument to ecm_find_package_parse_components")
endif()
if(NOT ECM_FPPC_DEFAULT_COMPONENTS)
set(ECM_FPPC_DEFAULT_COMPONENTS ${ECM_FPPC_KNOWN_COMPONENTS})
endif()
if(${module_name}_FIND_COMPONENTS)
set(ecm_fppc_requestedComps ${${module_name}_FIND_COMPONENTS})
if(NOT ECM_FPPC_SKIP_DEPENDENCY_HANDLING)
# Make sure deps are included
foreach(ecm_fppc_comp ${ecm_fppc_requestedComps})
foreach(ecm_fppc_dep_comp ${${module_name}_${ecm_fppc_comp}_component_deps})
list(FIND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}" ecm_fppc_index)
if("${ecm_fppc_index}" STREQUAL "-1")
if(NOT ${module_name}_FIND_QUIETLY)
message(STATUS "${module_name}: ${ecm_fppc_comp} requires ${${module_name}_${ecm_fppc_comp}_component_deps}")
endif()
list(APPEND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}")
endif()
endforeach()
endforeach()
else()
message(STATUS "Skipping dependency handling for ${module_name}")
endif()
list(REMOVE_DUPLICATES ecm_fppc_requestedComps)
# This makes sure components are listed in the same order as
# KNOWN_COMPONENTS (potentially important for inter-dependencies)
set(${ECM_FPPC_RESULT_VAR})
foreach(ecm_fppc_comp ${ECM_FPPC_KNOWN_COMPONENTS})
list(FIND ecm_fppc_requestedComps "${ecm_fppc_comp}" ecm_fppc_index)
if(NOT "${ecm_fppc_index}" STREQUAL "-1")
list(APPEND ${ECM_FPPC_RESULT_VAR} "${ecm_fppc_comp}")
list(REMOVE_AT ecm_fppc_requestedComps ${ecm_fppc_index})
endif()
endforeach()
# if there are any left, they are unknown components
if(ecm_fppc_requestedComps)
set(ecm_fppc_msgType STATUS)
if(${module_name}_FIND_REQUIRED)
set(ecm_fppc_msgType FATAL_ERROR)
endif()
if(NOT ${module_name}_FIND_QUIETLY)
message(${ecm_fppc_msgType} "${module_name}: requested unknown components ${ecm_fppc_requestedComps}")
endif()
return()
endif()
else()
set(${ECM_FPPC_RESULT_VAR} ${ECM_FPPC_DEFAULT_COMPONENTS})
endif()
endmacro()
macro(ecm_find_package_handle_library_components module_name)
set(ecm_fpwc_options SKIP_PKG_CONFIG SKIP_DEPENDENCY_HANDLING)
set(ecm_fpwc_oneValueArgs)
set(ecm_fpwc_multiValueArgs COMPONENTS)
cmake_parse_arguments(ECM_FPWC "${ecm_fpwc_options}" "${ecm_fpwc_oneValueArgs}" "${ecm_fpwc_multiValueArgs}" ${ARGN})
if(ECM_FPWC_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_handle_components: ${ECM_FPWC_UNPARSED_ARGUMENTS}")
endif()
if(NOT ECM_FPWC_COMPONENTS)
message(FATAL_ERROR "Missing COMPONENTS argument to ecm_find_package_handle_components")
endif()
include(FindPackageHandleStandardArgs)
find_package(PkgConfig QUIET)
foreach(ecm_fpwc_comp ${ECM_FPWC_COMPONENTS})
set(ecm_fpwc_dep_vars)
set(ecm_fpwc_dep_targets)
if(NOT SKIP_DEPENDENCY_HANDLING)
foreach(ecm_fpwc_dep ${${module_name}_${ecm_fpwc_comp}_component_deps})
list(APPEND ecm_fpwc_dep_vars "${module_name}_${ecm_fpwc_dep}_FOUND")
list(APPEND ecm_fpwc_dep_targets "${module_name}::${ecm_fpwc_dep}")
endforeach()
endif()
if(NOT ECM_FPWC_SKIP_PKG_CONFIG AND ${module_name}_${ecm_fpwc_comp}_pkg_config)
pkg_check_modules(PKG_${module_name}_${ecm_fpwc_comp} QUIET
${${module_name}_${ecm_fpwc_comp}_pkg_config})
endif()
find_path(${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
NAMES ${${module_name}_${ecm_fpwc_comp}_header}
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_INCLUDE_DIRS}
PATH_SUFFIXES ${${module_name}_${ecm_fpwc_comp}_header_subdir}
)
find_library(${module_name}_${ecm_fpwc_comp}_LIBRARY
NAMES ${${module_name}_${ecm_fpwc_comp}_lib}
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_LIBRARY_DIRS}
)
set(${module_name}_${ecm_fpwc_comp}_VERSION "${PKG_${module_name}_${ecm_fpwc_comp}_VERSION}")
if(NOT ${module_name}_VERSION)
set(${module_name}_VERSION ${${module_name}_${ecm_fpwc_comp}_VERSION})
endif()
set(_name_mismatched_arg)
if(NOT CMAKE_VERSION VERSION_LESS 3.17)
set(_name_mismatched_arg NAME_MISMATCHED)
endif()
find_package_handle_standard_args(${module_name}_${ecm_fpwc_comp}
FOUND_VAR
${module_name}_${ecm_fpwc_comp}_FOUND
REQUIRED_VARS
${module_name}_${ecm_fpwc_comp}_LIBRARY
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
${ecm_fpwc_dep_vars}
VERSION_VAR
${module_name}_${ecm_fpwc_comp}_VERSION
${_name_mismatched_arg}
)
mark_as_advanced(
${module_name}_${ecm_fpwc_comp}_LIBRARY
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
)
if(${module_name}_${ecm_fpwc_comp}_FOUND)
list(APPEND ${module_name}_LIBRARIES
"${${module_name}_${ecm_fpwc_comp}_LIBRARY}")
list(APPEND ${module_name}_INCLUDE_DIRS
"${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}")
set(${module_name}_DEFINITIONS
${${module_name}_DEFINITIONS}
${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS})
if(NOT TARGET ${module_name}::${ecm_fpwc_comp})
add_library(${module_name}::${ecm_fpwc_comp} UNKNOWN IMPORTED)
set_target_properties(${module_name}::${ecm_fpwc_comp} PROPERTIES
IMPORTED_LOCATION "${${module_name}_${ecm_fpwc_comp}_LIBRARY}"
INTERFACE_COMPILE_OPTIONS "${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS}"
INTERFACE_INCLUDE_DIRECTORIES "${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}"
INTERFACE_LINK_LIBRARIES "${ecm_fpwc_dep_targets}"
)
endif()
list(APPEND ${module_name}_TARGETS
"${module_name}::${ecm_fpwc_comp}")
endif()
endforeach()
if(${module_name}_LIBRARIES)
list(REMOVE_DUPLICATES ${module_name}_LIBRARIES)
endif()
if(${module_name}_INCLUDE_DIRS)
list(REMOVE_DUPLICATES ${module_name}_INCLUDE_DIRS)
endif()
if(${module_name}_DEFINITIONS)
list(REMOVE_DUPLICATES ${module_name}_DEFINITIONS)
endif()
if(${module_name}_TARGETS)
list(REMOVE_DUPLICATES ${module_name}_TARGETS)
endif()
endmacro()

View File

@@ -1,170 +0,0 @@
#.rst:
# FindWaylandScanner
# ------------------
#
# Try to find wayland-scanner.
#
# If the wayland-scanner executable is not in your PATH, you can provide
# an alternative name or full path location with the ``WaylandScanner_EXECUTABLE``
# variable.
#
# This will define the following variables:
#
# ``WaylandScanner_FOUND``
# True if wayland-scanner is available.
#
# ``WaylandScanner_EXECUTABLE``
# The wayland-scanner executable.
#
# If ``WaylandScanner_FOUND`` is TRUE, it will also define the following imported
# target:
#
# ``Wayland::Scanner``
# The wayland-scanner executable.
#
# This module provides the following functions to generate C protocol
# implementations:
#
# - ``ecm_add_wayland_client_protocol``
# - ``ecm_add_wayland_server_protocol``
#
# ::
#
# ecm_add_wayland_client_protocol(<source_files_var>
# PROTOCOL <xmlfile>
# BASENAME <basename>)
#
# Generate Wayland client protocol files from ``<xmlfile>`` XML
# definition for the ``<basename>`` interface and append those files
# to ``<source_files_var>``.
#
# ::
#
# ecm_add_wayland_server_protocol(<source_files_var>
# PROTOCOL <xmlfile>
# BASENAME <basename>)
#
# Generate Wayland server protocol files from ``<xmlfile>`` XML
# definition for the ``<basename>`` interface and append those files
# to ``<source_files_var>``.
#
# Since 1.4.0.
#=============================================================================
# Copyright 2012-2014 Pier Luigi Fiorini <pierluigi.fiorini@gmail.com>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#=============================================================================
include(${CMAKE_CURRENT_LIST_DIR}/ECMFindModuleHelpers.cmake)
ecm_find_package_version_check(WaylandScanner)
# Find wayland-scanner
find_program(WaylandScanner_EXECUTABLE NAMES wayland-scanner)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(WaylandScanner
FOUND_VAR
WaylandScanner_FOUND
REQUIRED_VARS
WaylandScanner_EXECUTABLE
)
mark_as_advanced(WaylandScanner_EXECUTABLE)
if(NOT TARGET Wayland::Scanner AND WaylandScanner_FOUND)
add_executable(Wayland::Scanner IMPORTED)
set_target_properties(Wayland::Scanner PROPERTIES
IMPORTED_LOCATION "${WaylandScanner_EXECUTABLE}"
)
endif()
include(FeatureSummary)
set_package_properties(WaylandScanner PROPERTIES
URL "https://wayland.freedesktop.org/"
DESCRIPTION "Executable that converts XML protocol files to C code"
)
function(ecm_add_wayland_client_protocol out_var)
# Parse arguments
set(oneValueArgs PROTOCOL BASENAME)
cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN})
if(ARGS_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_client_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"")
endif()
get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE)
set(_client_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-client-protocol.h")
set(_code "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-protocol.c")
set_source_files_properties(${_client_header} GENERATED)
set_source_files_properties(${_code} GENERATED)
set_property(SOURCE ${_client_header} PROPERTY SKIP_AUTOMOC ON)
add_custom_command(OUTPUT "${_client_header}"
COMMAND ${WaylandScanner_EXECUTABLE} client-header ${_infile} ${_client_header}
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile}
VERBATIM
)
add_custom_command(OUTPUT "${_code}"
COMMAND ${WaylandScanner_EXECUTABLE} private-code ${_infile} ${_code}
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile} ${_client_header}
VERBATIM
)
list(APPEND ${out_var} "${_client_header}" "${_code}")
set(${out_var} ${${out_var}} PARENT_SCOPE)
endfunction()
function(ecm_add_wayland_server_protocol out_var)
# Parse arguments
set(oneValueArgs PROTOCOL BASENAME)
cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN})
if(ARGS_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_server_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"")
endif()
ecm_add_wayland_client_protocol(${out_var}
PROTOCOL ${ARGS_PROTOCOL}
BASENAME ${ARGS_BASENAME})
get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE)
set(_server_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-server-protocol.h")
set_property(SOURCE ${_server_header} PROPERTY SKIP_AUTOMOC ON)
set_source_files_properties(${_server_header} GENERATED)
add_custom_command(OUTPUT "${_server_header}"
COMMAND ${WaylandScanner_EXECUTABLE} server-header ${_infile} ${_server_header}
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile}
VERBATIM
)
list(APPEND ${out_var} "${_server_header}")
set(${out_var} ${${out_var}} PARENT_SCOPE)
endfunction()

View File

@@ -1,56 +0,0 @@
if (NOT NO_ISA_EXTENSIONS)
include(CheckCXXCompilerFlag)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
CHECK_CXX_COMPILER_FLAG("-mcpu=native" COMPILER_SUPPORTS_MCPU_NATIVE)
if(COMPILER_SUPPORTS_MARCH_NATIVE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=native")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=native")
endif()
else()
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
if(COMPILER_SUPPORTS_MARCH_NATIVE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
endif()
endif()
if(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2")
endif()
endif()
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT LEGACY)
set(USE_WAYLAND ON)
else()
set(USE_WAYLAND OFF)
endif()
if(WIN32)
add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN)
add_compile_options(/MP)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
endif()
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
endif()
if(EMSCRIPTEN)
add_compile_options(-pthread)
add_link_options(-pthread)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
find_program(MOLD_LINKER mold)
if(MOLD_LINKER)
set(CMAKE_LINKER_TYPE "MOLD")
endif()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-eliminate-unused-debug-types")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-eliminate-unused-debug-types")
endif()
endif()
file(GENERATE OUTPUT .gitignore CONTENT "*")

View File

@@ -1,39 +0,0 @@
set(TRACY_COMMON_DIR ${CMAKE_CURRENT_LIST_DIR}/../public/common)
set(TRACY_COMMON_SOURCES
tracy_lz4.cpp
tracy_lz4hc.cpp
TracySocket.cpp
TracyStackFrames.cpp
TracySystem.cpp
)
list(TRANSFORM TRACY_COMMON_SOURCES PREPEND "${TRACY_COMMON_DIR}/")
set(TRACY_SERVER_DIR ${CMAKE_CURRENT_LIST_DIR}/../server)
set(TRACY_SERVER_SOURCES
TracyMemory.cpp
TracyMmap.cpp
TracyPrint.cpp
TracySysUtil.cpp
TracyTaskDispatch.cpp
TracyTextureCompression.cpp
TracyThreadCompress.cpp
TracyWorker.cpp
)
list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/")
add_library(TracyServer STATIC ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES})
target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR})
target_link_libraries(TracyServer PUBLIC TracyCapstone TracyZstd)
if(NO_STATISTICS)
target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS)
endif()
if(NOT NO_PARALLEL_STL AND UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
target_link_libraries(TracyServer PRIVATE TracyTbb)
endif()

View File

@@ -1,238 +0,0 @@
# Vendor Specific CMake
# The Tracy project keeps most vendor source locally
set (ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/../")
# Dependencies are taken from the system first and if not found, they are pulled with CPM and built from source
include(FindPkgConfig)
include(${CMAKE_CURRENT_LIST_DIR}/CPM.cmake)
option(DOWNLOAD_CAPSTONE "Force download capstone" ON)
option(DOWNLOAD_GLFW "Force download glfw" OFF)
option(DOWNLOAD_FREETYPE "Force download freetype" OFF)
# capstone
pkg_check_modules(CAPSTONE capstone)
if(CAPSTONE_FOUND AND NOT DOWNLOAD_CAPSTONE)
message(STATUS "Capstone found: ${CAPSTONE}")
add_library(TracyCapstone INTERFACE)
target_include_directories(TracyCapstone INTERFACE ${CAPSTONE_INCLUDE_DIRS})
target_link_libraries(TracyCapstone INTERFACE ${CAPSTONE_LINK_LIBRARIES})
else()
CPMAddPackage(
NAME capstone
GITHUB_REPOSITORY capstone-engine/capstone
GIT_TAG 5.0.1
)
add_library(TracyCapstone INTERFACE)
target_include_directories(TracyCapstone INTERFACE ${capstone_SOURCE_DIR}/include/capstone)
target_link_libraries(TracyCapstone INTERFACE capstone)
endif()
# GLFW
if(NOT USE_WAYLAND AND NOT EMSCRIPTEN)
pkg_check_modules(GLFW glfw3)
if (GLFW_FOUND AND NOT DOWNLOAD_GLFW)
add_library(TracyGlfw3 INTERFACE)
target_include_directories(TracyGlfw3 INTERFACE ${GLFW_INCLUDE_DIRS})
target_link_libraries(TracyGlfw3 INTERFACE ${GLFW_LINK_LIBRARIES})
else()
CPMAddPackage(
NAME glfw
GITHUB_REPOSITORY glfw/glfw
GIT_TAG 3.4
OPTIONS
"GLFW_BUILD_EXAMPLES OFF"
"GLFW_BUILD_TESTS OFF"
"GLFW_BUILD_DOCS OFF"
"GLFW_INSTALL OFF"
)
add_library(TracyGlfw3 INTERFACE)
target_link_libraries(TracyGlfw3 INTERFACE glfw)
endif()
endif()
# freetype
pkg_check_modules(FREETYPE freetype2)
if (FREETYPE_FOUND AND NOT DOWNLOAD_FREETYPE)
add_library(TracyFreetype INTERFACE)
target_include_directories(TracyFreetype INTERFACE ${FREETYPE_INCLUDE_DIRS})
target_link_libraries(TracyFreetype INTERFACE ${FREETYPE_LINK_LIBRARIES})
else()
CPMAddPackage(
NAME freetype
GITHUB_REPOSITORY freetype/freetype
GIT_TAG VER-2-13-2
OPTIONS
"FT_DISABLE_HARFBUZZ ON"
"FT_WITH_HARFBUZZ OFF"
)
add_library(TracyFreetype INTERFACE)
target_link_libraries(TracyFreetype INTERFACE freetype)
endif()
# zstd
set(ZSTD_DIR "${ROOT_DIR}/zstd")
set(ZSTD_SOURCES
decompress/zstd_ddict.c
decompress/zstd_decompress_block.c
decompress/huf_decompress.c
decompress/zstd_decompress.c
common/zstd_common.c
common/error_private.c
common/xxhash.c
common/entropy_common.c
common/debug.c
common/threading.c
common/pool.c
common/fse_decompress.c
compress/zstd_ldm.c
compress/zstd_compress_superblock.c
compress/zstd_opt.c
compress/zstd_compress_sequences.c
compress/fse_compress.c
compress/zstd_double_fast.c
compress/zstd_compress.c
compress/zstd_compress_literals.c
compress/hist.c
compress/zstdmt_compress.c
compress/zstd_lazy.c
compress/huf_compress.c
compress/zstd_fast.c
dictBuilder/zdict.c
dictBuilder/cover.c
dictBuilder/divsufsort.c
dictBuilder/fastcover.c
)
list(TRANSFORM ZSTD_SOURCES PREPEND "${ZSTD_DIR}/")
set_property(SOURCE ${ZSTD_DIR}/decompress/huf_decompress_amd64.S APPEND PROPERTY COMPILE_OPTIONS "-x" "assembler-with-cpp")
add_library(TracyZstd STATIC ${ZSTD_SOURCES})
target_include_directories(TracyZstd PUBLIC ${ZSTD_DIR})
target_compile_definitions(TracyZstd PRIVATE ZSTD_DISABLE_ASM)
# Diff Template Library
set(DTL_DIR "${ROOT_DIR}/dtl")
file(GLOB_RECURSE DTL_HEADERS CONFIGURE_DEPENDS RELATIVE ${DTL_DIR} "*.hpp")
add_library(TracyDtl INTERFACE)
target_sources(TracyDtl INTERFACE ${DTL_HEADERS})
target_include_directories(TracyDtl INTERFACE ${DTL_DIR})
# Get Opt
set(GETOPT_DIR "${ROOT_DIR}/getopt")
set(GETOPT_SOURCES ${GETOPT_DIR}/getopt.c)
set(GETOPT_HEADERS ${GETOPT_DIR}/getopt.h)
add_library(TracyGetOpt STATIC ${GETOPT_SOURCES} ${GETOPT_HEADERS})
target_include_directories(TracyGetOpt PUBLIC ${GETOPT_DIR})
# ImGui
set(IMGUI_DIR "${ROOT_DIR}/imgui")
set(IMGUI_SOURCES
imgui_widgets.cpp
imgui_draw.cpp
imgui_demo.cpp
imgui.cpp
imgui_tables.cpp
misc/freetype/imgui_freetype.cpp
)
list(TRANSFORM IMGUI_SOURCES PREPEND "${IMGUI_DIR}/")
add_definitions(-DIMGUI_ENABLE_FREETYPE)
add_library(TracyImGui STATIC ${IMGUI_SOURCES})
target_include_directories(TracyImGui PUBLIC ${IMGUI_DIR})
target_link_libraries(TracyImGui PUBLIC TracyFreetype)
# NFD
if (NOT NO_FILESELECTOR AND NOT EMSCRIPTEN)
set(NFD_DIR "${ROOT_DIR}/nfd")
if (WIN32)
set(NFD_SOURCES "${NFD_DIR}/nfd_win.cpp")
elseif (APPLE)
set(NFD_SOURCES "${NFD_DIR}/nfd_cocoa.m")
else()
if (GTK_FILESELECTOR)
set(NFD_SOURCES "${NFD_DIR}/nfd_gtk.cpp")
else()
set(NFD_SOURCES "${NFD_DIR}/nfd_portal.cpp")
endif()
endif()
file(GLOB_RECURSE NFD_HEADERS CONFIGURE_DEPENDS RELATIVE ${NFD_DIR} "*.h")
add_library(TracyNfd STATIC ${NFD_SOURCES} ${NFD_HEADERS})
target_include_directories(TracyNfd PUBLIC ${NFD_DIR})
if (APPLE)
find_library(APPKIT_LIBRARY AppKit)
find_library(UNIFORMTYPEIDENTIFIERS_LIBRARY UniformTypeIdentifiers)
target_link_libraries(TracyNfd PUBLIC ${APPKIT_LIBRARY} ${UNIFORMTYPEIDENTIFIERS_LIBRARY})
elseif (UNIX)
if (GTK_FILESELECTOR)
pkg_check_modules(GTK3 gtk+-3.0)
if (NOT GTK3_FOUND)
message(FATAL_ERROR "GTK3 not found. Please install it or set TRACY_GTK_FILESELECTOR to OFF.")
endif()
add_library(TracyGtk3 INTERFACE)
target_include_directories(TracyGtk3 INTERFACE ${GTK3_INCLUDE_DIRS})
target_link_libraries(TracyGtk3 INTERFACE ${GTK3_LINK_LIBRARIES})
target_link_libraries(TracyNfd PUBLIC TracyGtk3)
else()
pkg_check_modules(DBUS dbus-1)
if (NOT DBUS_FOUND)
message(FATAL_ERROR "D-Bus not found. Please install it or set TRACY_GTK_FILESELECTOR to ON.")
endif()
add_library(TracyDbus INTERFACE)
target_include_directories(TracyDbus INTERFACE ${DBUS_INCLUDE_DIRS})
target_link_libraries(TracyDbus INTERFACE ${DBUS_LINK_LIBRARIES})
target_link_libraries(TracyNfd PUBLIC TracyDbus)
endif()
endif()
endif()
# TBB
if (NO_PARALLEL_STL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_PARALLEL_SORT")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DNO_PARALLEL_SORT")
else()
if (UNIX AND NOT APPLE AND NOT EMSCRIPTEN)
# Tracy does not use TBB directly, but the implementation of parallel algorithms
# in some versions of libstdc++ depends on TBB. When it does, you must
# explicitly link against -ltbb.
#
# Some distributions have pgk-config files for TBB, others don't.
pkg_check_modules(TBB tbb)
if (TBB_FOUND)
add_library(TracyTbb INTERFACE)
target_include_directories(TracyTbb INTERFACE ${TBB_INCLUDE_DIRS})
target_link_libraries(TracyTbb INTERFACE ${TBB_LINK_LIBRARIES})
else()
CPMAddPackage(
NAME tbb
GITHUB_REPOSITORY oneapi-src/oneTBB
GIT_TAG v2021.12.0-rc2
OPTIONS "TBB_TEST OFF"
)
add_library(TracyTbb INTERFACE)
target_link_libraries(TracyTbb INTERFACE tbb)
endif()
endif()
endif()

View File

@@ -1,24 +0,0 @@
cmake_minimum_required(VERSION 3.10)
message("Parsing public/common/TracyVersion.hpp file")
file(READ "${CMAKE_CURRENT_LIST_DIR}/../public/common/TracyVersion.hpp" version)
# Note: This looks for a specific pattern in TracyVersion.hpp, if it changes
# this needs updating.
string(REGEX MATCH "Major = ([0-9]+)" _ ${version})
# This works do to the above () subexpression selection. See
# https://cmake.org/cmake/help/latest/command/string.html#regex-match for more
# details
set(TRACY_VERSION_MAJOR ${CMAKE_MATCH_1})
string(REGEX MATCH "Minor = ([0-9]+)" _ ${version})
set(TRACY_VERSION_MINOR ${CMAKE_MATCH_1})
string(REGEX MATCH "Patch = ([0-9]+)" _ ${version})
set(TRACY_VERSION_PATCH ${CMAKE_MATCH_1})
set(TRACY_VERSION_STRING "${TRACY_VERSION_MAJOR}.${TRACY_VERSION_MINOR}.${TRACY_VERSION_PATCH}")
message("VERSION ${TRACY_VERSION_STRING}")

33
common/TracyAlloc.hpp Normal file
View File

@@ -0,0 +1,33 @@
#ifndef __TRACYALLOC_HPP__
#define __TRACYALLOC_HPP__
#include <cstdlib>
#ifdef TRACY_ENABLE
# include "../client/tracy_rpmalloc.hpp"
#endif
namespace tracy
{
static inline void* tracy_malloc( size_t size )
{
#ifdef TRACY_ENABLE
return rpmalloc( size );
#else
return malloc( size );
#endif
}
static inline void tracy_free( void* ptr )
{
#ifdef TRACY_ENABLE
rpfree( ptr );
#else
free( ptr );
#endif
}
}
#endif

View File

@@ -10,6 +10,15 @@ namespace tracy
using TracyMutex = std::shared_mutex;
}
#elif defined __CYGWIN__
#include "tracy_benaphore.h"
namespace tracy
{
using TracyMutex = NonRecursiveBenaphore;
}
#else
#include <mutex>

62
common/TracyProtocol.hpp Normal file
View File

@@ -0,0 +1,62 @@
#ifndef __TRACYPROTOCOL_HPP__
#define __TRACYPROTOCOL_HPP__
#include <limits>
#include <stdint.h>
#include "../common/tracy_lz4.hpp"
namespace tracy
{
using lz4sz_t = uint32_t;
enum { TargetFrameSize = 256 * 1024 };
enum { LZ4Size = LZ4_COMPRESSBOUND( TargetFrameSize ) };
static_assert( LZ4Size <= std::numeric_limits<lz4sz_t>::max(), "LZ4Size greater than lz4sz_t" );
static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" );
enum ServerQuery : uint8_t
{
ServerQueryTerminate,
ServerQueryString,
ServerQueryThreadString,
ServerQuerySourceLocation,
ServerQueryPlotName,
ServerQueryCallstackFrame,
ServerQueryFrameName,
};
enum { WelcomeMessageProgramNameSize = 64 };
enum { WelcomeMessageHostInfoSize = 1024 };
#pragma pack( 1 )
struct WelcomeMessage
{
double timerMul;
int64_t initBegin;
int64_t initEnd;
uint64_t delay;
uint64_t resolution;
uint64_t epoch;
uint8_t onDemand;
char programName[WelcomeMessageProgramNameSize];
char hostInfo[WelcomeMessageHostInfoSize];
};
enum { WelcomeMessageSize = sizeof( WelcomeMessage ) };
struct OnDemandPayloadMessage
{
uint64_t frames;
};
enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) };
#pragma pack()
}
#endif

346
common/TracyQueue.hpp Normal file
View File

@@ -0,0 +1,346 @@
#ifndef __TRACYQUEUE_HPP__
#define __TRACYQUEUE_HPP__
#include <stdint.h>
namespace tracy
{
enum class QueueType : uint8_t
{
ZoneText,
ZoneName,
Message,
ZoneBeginAllocSrcLoc,
CallstackMemory,
Callstack,
Terminate,
KeepAlive,
Crash,
CrashReport,
ZoneBegin,
ZoneBeginCallstack,
ZoneEnd,
FrameMarkMsg,
FrameMarkMsgStart,
FrameMarkMsgEnd,
SourceLocation,
LockAnnounce,
LockWait,
LockObtain,
LockRelease,
LockSharedWait,
LockSharedObtain,
LockSharedRelease,
LockMark,
PlotData,
MessageLiteral,
GpuNewContext,
GpuZoneBegin,
GpuZoneBeginCallstack,
GpuZoneEnd,
GpuTime,
MemAlloc,
MemFree,
MemAllocCallstack,
MemFreeCallstack,
CallstackFrame,
StringData,
ThreadName,
CustomStringData,
PlotName,
SourceLocationPayload,
CallstackPayload,
FrameName,
NUM_TYPES
};
#pragma pack( 1 )
struct QueueZoneBegin
{
int64_t time;
uint64_t thread;
uint64_t srcloc; // ptr
uint32_t cpu;
};
struct QueueZoneEnd
{
int64_t time;
uint64_t thread;
uint32_t cpu;
};
struct QueueStringTransfer
{
uint64_t ptr;
};
struct QueueFrameMark
{
int64_t time;
uint64_t name; // ptr
};
struct QueueSourceLocation
{
uint64_t name;
uint64_t function; // ptr
uint64_t file; // ptr
uint32_t line;
uint8_t r;
uint8_t g;
uint8_t b;
};
struct QueueZoneText
{
uint64_t thread;
uint64_t text; // ptr
};
enum class LockType : uint8_t
{
Lockable,
SharedLockable
};
struct QueueLockAnnounce
{
uint32_t id;
uint64_t lckloc; // ptr
LockType type;
};
struct QueueLockWait
{
uint32_t id;
int64_t time;
uint64_t thread;
LockType type;
};
struct QueueLockObtain
{
uint32_t id;
int64_t time;
uint64_t thread;
};
struct QueueLockRelease
{
uint32_t id;
int64_t time;
uint64_t thread;
};
struct QueueLockMark
{
uint32_t id;
uint64_t thread;
uint64_t srcloc; // ptr
};
enum class PlotDataType : uint8_t
{
Float,
Double,
Int
};
struct QueuePlotData
{
uint64_t name; // ptr
int64_t time;
PlotDataType type;
union
{
double d;
float f;
int64_t i;
} data;
};
struct QueueMessage
{
int64_t time;
uint64_t thread;
uint64_t text; // ptr
};
struct QueueGpuNewContext
{
int64_t cpuTime;
int64_t gpuTime;
uint64_t thread;
float period;
uint8_t context;
uint8_t accuracyBits;
};
struct QueueGpuZoneBegin
{
int64_t cpuTime;
uint64_t srcloc;
uint64_t thread;
uint16_t queryId;
uint8_t context;
};
struct QueueGpuZoneEnd
{
int64_t cpuTime;
uint16_t queryId;
uint8_t context;
};
struct QueueGpuTime
{
int64_t gpuTime;
uint16_t queryId;
uint8_t context;
};
struct QueueMemAlloc
{
int64_t time;
uint64_t thread;
uint64_t ptr;
char size[6];
};
struct QueueMemFree
{
int64_t time;
uint64_t thread;
uint64_t ptr;
};
struct QueueCallstackMemory
{
uint64_t ptr;
};
struct QueueCallstack
{
uint64_t ptr;
uint64_t thread;
};
struct QueueCallstackFrame
{
uint64_t ptr;
uint64_t name;
uint64_t file;
uint32_t line;
};
struct QueueCrashReport
{
int64_t time;
uint64_t thread;
uint64_t text; // ptr
};
struct QueueHeader
{
union
{
QueueType type;
uint8_t idx;
};
};
struct QueueItem
{
QueueHeader hdr;
union
{
QueueZoneBegin zoneBegin;
QueueZoneEnd zoneEnd;
QueueStringTransfer stringTransfer;
QueueFrameMark frameMark;
QueueSourceLocation srcloc;
QueueZoneText zoneText;
QueueLockAnnounce lockAnnounce;
QueueLockWait lockWait;
QueueLockObtain lockObtain;
QueueLockRelease lockRelease;
QueueLockMark lockMark;
QueuePlotData plotData;
QueueMessage message;
QueueGpuNewContext gpuNewContext;
QueueGpuZoneBegin gpuZoneBegin;
QueueGpuZoneEnd gpuZoneEnd;
QueueGpuTime gpuTime;
QueueMemAlloc memAlloc;
QueueMemFree memFree;
QueueCallstackMemory callstackMemory;
QueueCallstack callstack;
QueueCallstackFrame callstackFrame;
QueueCrashReport crashReport;
};
};
#pragma pack()
enum { QueueItemSize = sizeof( QueueItem ) };
static const size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueZoneText ),
sizeof( QueueHeader ) + sizeof( QueueZoneText ), // zone name
sizeof( QueueHeader ) + sizeof( QueueMessage ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location
sizeof( QueueHeader ) + sizeof( QueueCallstackMemory ),
sizeof( QueueHeader ) + sizeof( QueueCallstack ),
// above items must be first
sizeof( QueueHeader ), // terminate
sizeof( QueueHeader ), // keep alive
sizeof( QueueHeader ), // crash
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueZoneEnd ),
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // continuous frames
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // start
sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // end
sizeof( QueueHeader ) + sizeof( QueueSourceLocation ),
sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ),
sizeof( QueueHeader ) + sizeof( QueueLockWait ),
sizeof( QueueHeader ) + sizeof( QueueLockObtain ),
sizeof( QueueHeader ) + sizeof( QueueLockRelease ),
sizeof( QueueHeader ) + sizeof( QueueLockWait ),
sizeof( QueueHeader ) + sizeof( QueueLockObtain ),
sizeof( QueueHeader ) + sizeof( QueueLockRelease ),
sizeof( QueueHeader ) + sizeof( QueueLockMark ),
sizeof( QueueHeader ) + sizeof( QueuePlotData ),
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
sizeof( QueueHeader ) + sizeof( QueueGpuTime ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ),
sizeof( QueueHeader ) + sizeof( QueueMemFree ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ),
// keep all QueueStringTransfer below
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // custom string data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame name
};
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType::NUM_TYPES, "QueueDataSize mismatch" );
static_assert( sizeof( void* ) <= sizeof( uint64_t ), "Pointer size > 8 bytes" );
static_assert( sizeof( void* ) == sizeof( uintptr_t ), "Pointer size != uintptr_t" );
};
#endif

335
common/TracySocket.cpp Normal file
View File

@@ -0,0 +1,335 @@
#include <algorithm>
#include <assert.h>
#include <new>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include "TracyAlloc.hpp"
#include "TracySocket.hpp"
#ifdef _MSC_VER
# include <winsock2.h>
# include <ws2tcpip.h>
# pragma warning(disable:4244)
# pragma warning(disable:4267)
#else
# include <sys/socket.h>
# include <netdb.h>
# include <unistd.h>
#endif
#ifndef MSG_NOSIGNAL
# define MSG_NOSIGNAL 0
#endif
namespace tracy
{
#ifdef _MSC_VER
typedef SOCKET socket_t;
#else
typedef int socket_t;
#endif
#ifdef _MSC_VER
struct __wsinit
{
__wsinit()
{
WSADATA wsaData;
if( WSAStartup( MAKEWORD( 2, 2 ), &wsaData ) != 0 )
{
fprintf( stderr, "Cannot init winsock.\n" );
exit( 1 );
}
}
};
void InitWinSock()
{
static __wsinit init;
}
#endif
Socket::Socket()
: m_sock( -1 )
, m_buf( (char*)tracy_malloc( BufSize ) )
, m_bufPtr( nullptr )
, m_bufLeft( 0 )
{
#ifdef _MSC_VER
InitWinSock();
#endif
}
Socket::Socket( int sock )
: m_sock( sock )
, m_buf( (char*)tracy_malloc( BufSize ) )
, m_bufPtr( nullptr )
, m_bufLeft( 0 )
{
}
Socket::~Socket()
{
tracy_free( m_buf );
if( m_sock != -1 )
{
Close();
}
}
bool Socket::Connect( const char* addr, const char* port )
{
assert( m_sock == -1 );
struct addrinfo hints;
struct addrinfo *res, *ptr;
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
if( getaddrinfo( addr, port, &hints, &res ) != 0 ) return false;
int sock = 0;
for( ptr = res; ptr; ptr = ptr->ai_next )
{
if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
#if defined __APPLE__
int val = 1;
setsockopt( m_sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 )
{
#ifdef _MSC_VER
closesocket( sock );
#else
close( sock );
#endif
continue;
}
break;
}
freeaddrinfo( res );
if( !ptr ) return false;
m_sock = sock;
return true;
}
void Socket::Close()
{
assert( m_sock != -1 );
#ifdef _MSC_VER
closesocket( m_sock );
#else
close( m_sock );
#endif
m_sock = -1;
}
int Socket::Send( const void* _buf, int len )
{
auto buf = (const char*)_buf;
assert( m_sock != -1 );
auto start = buf;
while( len > 0 )
{
auto ret = send( m_sock, buf, len, MSG_NOSIGNAL );
if( ret == -1 ) return -1;
len -= ret;
buf += ret;
}
return int( buf - start );
}
int Socket::RecvBuffered( void* buf, int len, const timeval* tv )
{
if( len <= m_bufLeft )
{
memcpy( buf, m_bufPtr, len );
m_bufPtr += len;
m_bufLeft -= len;
return len;
}
if( m_bufLeft > 0 )
{
memcpy( buf, m_bufPtr, m_bufLeft );
const auto ret = m_bufLeft;
m_bufLeft = 0;
return ret;
}
if( len >= BufSize ) return Recv( buf, len, tv );
m_bufLeft = Recv( m_buf, BufSize, tv );
if( m_bufLeft <= 0 ) return m_bufLeft;
const auto sz = std::min( len, m_bufLeft );
memcpy( buf, m_buf, sz );
m_bufPtr = m_buf + sz;
m_bufLeft -= sz;
return sz;
}
int Socket::Recv( void* _buf, int len, const timeval* tv )
{
auto buf = (char*)_buf;
fd_set fds;
FD_ZERO( &fds );
FD_SET( static_cast<socket_t>(m_sock), &fds );
#ifndef _WIN32
timeval _tv = *tv;
select( m_sock+1, &fds, nullptr, nullptr, &_tv );
#else
select( m_sock+1, &fds, nullptr, nullptr, tv );
#endif
if( FD_ISSET( m_sock, &fds ) )
{
return recv( m_sock, buf, len, 0 );
}
else
{
return -1;
}
}
bool Socket::Read( void* _buf, int len, const timeval* tv, std::function<bool()> exitCb )
{
auto buf = (char*)_buf;
while( len > 0 )
{
if( exitCb() ) return false;
const auto sz = RecvBuffered( buf, len, tv );
switch( sz )
{
case 0:
return false;
case -1:
#ifdef _WIN32
{
auto err = WSAGetLastError();
if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
}
#endif
break;
default:
len -= sz;
buf += sz;
break;
}
}
return true;
}
bool Socket::HasData()
{
if( m_bufLeft > 0 ) return true;
struct timeval tv;
memset( &tv, 0, sizeof( tv ) );
fd_set fds;
FD_ZERO( &fds );
FD_SET( static_cast<socket_t>(m_sock), &fds );
return select( m_sock+1, &fds, nullptr, nullptr, &tv ) > 0;
}
ListenSocket::ListenSocket()
: m_sock( -1 )
{
#ifdef _MSC_VER
InitWinSock();
#endif
}
ListenSocket::~ListenSocket()
{
}
bool ListenSocket::Listen( const char* port, int backlog )
{
assert( m_sock == -1 );
struct addrinfo* res;
struct addrinfo hints;
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = AF_INET6;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE;
if( getaddrinfo( nullptr, port, &hints, &res ) != 0 ) return false;
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
#if defined _MSC_VER || defined __CYGWIN__
unsigned long val = 0;
setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
#else
int val = 1;
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
#endif
if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) return false;
if( listen( m_sock, backlog ) == -1 ) return false;
return true;
}
Socket* ListenSocket::Accept()
{
struct sockaddr_storage remote;
socklen_t sz = sizeof( remote );
struct timeval tv;
tv.tv_sec = 0;
tv.tv_usec = 10000;
fd_set fds;
FD_ZERO( &fds );
FD_SET( static_cast<socket_t>(m_sock), &fds );
select( m_sock+1, &fds, nullptr, nullptr, &tv );
if( FD_ISSET( m_sock, &fds ) )
{
int sock = accept( m_sock, (sockaddr*)&remote, &sz);
#if defined __APPLE__
int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
if( sock == -1 )
{
return nullptr;
}
else
{
auto ptr = (Socket*)tracy_malloc( sizeof( Socket ) );
new(ptr) Socket( sock );
return ptr;
}
}
else
{
return nullptr;
}
}
void ListenSocket::Close()
{
assert( m_sock != -1 );
#ifdef _MSC_VER
closesocket( m_sock );
#else
close( m_sock );
#endif
m_sock = -1;
}
}

69
common/TracySocket.hpp Normal file
View File

@@ -0,0 +1,69 @@
#ifndef __TRACYSOCKET_HPP__
#define __TRACYSOCKET_HPP__
#include <functional>
struct timeval;
namespace tracy
{
#ifdef _MSC_VER
void InitWinSock();
#endif
class Socket
{
enum { BufSize = 128 * 1024 };
public:
Socket();
Socket( int sock );
~Socket();
bool Connect( const char* addr, const char* port );
void Close();
int Send( const void* buf, int len );
bool Read( void* buf, int len, const timeval* tv, std::function<bool()> exitCb );
bool HasData();
Socket( const Socket& ) = delete;
Socket( Socket&& ) = delete;
Socket& operator=( const Socket& ) = delete;
Socket& operator=( Socket&& ) = delete;
private:
int RecvBuffered( void* buf, int len, const timeval* tv );
int Recv( void* buf, int len, const timeval* tv );
int m_sock;
char* m_buf;
char* m_bufPtr;
int m_bufLeft;
};
class ListenSocket
{
public:
ListenSocket();
~ListenSocket();
bool Listen( const char* port, int backlog );
Socket* Accept();
void Close();
ListenSocket( const ListenSocket& ) = delete;
ListenSocket( ListenSocket&& ) = delete;
ListenSocket& operator=( const ListenSocket& ) = delete;
ListenSocket& operator=( ListenSocket&& ) = delete;
private:
int m_sock;
};
}
#endif

183
common/TracySystem.cpp Normal file
View File

@@ -0,0 +1,183 @@
#if defined _MSC_VER || defined __CYGWIN__ || defined _WIN32
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# endif
# ifndef NOMINMAX
# define NOMINMAX
# endif
#endif
#ifdef _WIN32
# include <windows.h>
#else
# include <pthread.h>
# include <string.h>
# include <unistd.h>
#endif
#ifdef __linux__
# ifndef __ANDROID__
# include <syscall.h>
# endif
# include <fcntl.h>
#endif
#include <inttypes.h>
#include <stdio.h>
#include "TracySystem.hpp"
#ifdef TRACY_COLLECT_THREAD_NAMES
# include <atomic>
# include "TracyAlloc.hpp"
#endif
namespace tracy
{
#ifdef TRACY_COLLECT_THREAD_NAMES
struct ThreadNameData
{
uint64_t id;
const char* name;
ThreadNameData* next;
};
extern std::atomic<ThreadNameData*>& s_threadNameData;
#endif
void SetThreadName( std::thread& thread, const char* name )
{
SetThreadName( thread.native_handle(), name );
}
void SetThreadName( std::thread::native_handle_type handle, const char* name )
{
#ifdef _WIN32
# if defined NTDDI_WIN10_RS2 && NTDDI_VERSION >= NTDDI_WIN10_RS2
wchar_t buf[256];
mbstowcs( buf, name, 256 );
SetThreadDescription( static_cast<HANDLE>( handle ), buf );
# else
const DWORD MS_VC_EXCEPTION=0x406D1388;
# pragma pack( push, 8 )
struct THREADNAME_INFO
{
DWORD dwType;
LPCSTR szName;
DWORD dwThreadID;
DWORD dwFlags;
};
# pragma pack(pop)
DWORD ThreadId = GetThreadId( static_cast<HANDLE>( handle ) );
THREADNAME_INFO info;
info.dwType = 0x1000;
info.szName = name;
info.dwThreadID = ThreadId;
info.dwFlags = 0;
__try
{
RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
}
__except(EXCEPTION_EXECUTE_HANDLER)
{
}
# endif
#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__
{
const auto sz = strlen( name );
if( sz <= 15 )
{
pthread_setname_np( handle, name );
}
else
{
char buf[16];
memcpy( buf, name, 15 );
buf[15] = '\0';
pthread_setname_np( handle, buf );
}
}
#endif
#ifdef TRACY_COLLECT_THREAD_NAMES
{
rpmalloc_thread_initialize();
const auto sz = strlen( name );
char* buf = (char*)tracy_malloc( sz+1 );
memcpy( buf, name, sz );
buf[sz+1] = '\0';
auto data = (ThreadNameData*)tracy_malloc( sizeof( ThreadNameData ) );
# ifdef _WIN32
data->id = GetThreadId( static_cast<HANDLE>( handle ) );
# elif defined __APPLE__
pthread_threadid_np( handle, &data->id );
# else
data->id = (uint64_t)handle;
# endif
data->name = buf;
data->next = s_threadNameData.load( std::memory_order_relaxed );
while( !s_threadNameData.compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {}
}
#endif
}
const char* GetThreadName( uint64_t id )
{
static char buf[256];
#ifdef TRACY_COLLECT_THREAD_NAMES
auto ptr = s_threadNameData.load( std::memory_order_relaxed );
while( ptr )
{
if( ptr->id == id )
{
return ptr->name;
}
ptr = ptr->next;
}
#else
# ifdef _WIN32
# if defined NTDDI_WIN10_RS2 && NTDDI_VERSION >= NTDDI_WIN10_RS2
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
if( hnd != 0 )
{
PWSTR tmp;
GetThreadDescription( hnd, &tmp );
auto ret = wcstombs( buf, tmp, 256 );
CloseHandle( hnd );
if( ret != 0 )
{
return buf;
}
}
# endif
# elif defined __GLIBC__ && !defined __ANDROID__ && !defined __EMSCRIPTEN__
if( pthread_getname_np( (pthread_t)id, buf, 256 ) == 0 )
{
return buf;
}
# elif defined __linux__
int cs, fd;
char path[32];
# ifdef __ANDROID__
int tid = gettid();
# else
int tid = (int) syscall( SYS_gettid );
# endif
snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", tid );
sprintf( buf, "%" PRIu64, id );
pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs );
if ( ( fd = open( path, O_RDONLY ) ) > 0) {
int len = read( fd, buf, 255 );
if ( len > 0 )
buf[len] = 0;
close( fd );
}
pthread_setcancelstate( cs, 0 );
return buf;
# endif
#endif
sprintf( buf, "%" PRIu64, id );
return buf;
}
}

45
common/TracySystem.hpp Normal file
View File

@@ -0,0 +1,45 @@
#ifndef __TRACYSYSTEM_HPP__
#define __TRACYSYSTEM_HPP__
#ifdef TRACY_ENABLE
# if defined __ANDROID__ || defined __CYGWIN__ || defined __APPLE__ || defined _GNU_SOURCE || ( defined _WIN32 && ( !defined NTDDI_WIN10_RS2 || NTDDI_VERSION < NTDDI_WIN10_RS2 ) )
# define TRACY_COLLECT_THREAD_NAMES
# endif
#endif
#ifdef _WIN32
#ifndef _WINDOWS_
extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
#endif
#else
# include <pthread.h>
#endif
#include <stdint.h>
#include <thread>
namespace tracy
{
static inline uint64_t GetThreadHandle()
{
#ifdef _WIN32
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
return uint64_t( GetCurrentThreadId() );
#elif defined __APPLE__
uint64_t id;
pthread_threadid_np( pthread_self(), &id );
return id;
#else
static_assert( sizeof( decltype( pthread_self() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
return uint64_t( pthread_self() );
#endif
}
void SetThreadName( std::thread& thread, const char* name );
void SetThreadName( std::thread::native_handle_type handle, const char* name );
const char* GetThreadName( uint64_t id );
}
#endif

68
common/tracy_benaphore.h Normal file
View File

@@ -0,0 +1,68 @@
// Copyright (c) 2015 Jeff Preshing
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#ifndef __TRACY_CPP11OM_BENAPHORE_H__
#define __TRACY_CPP11OM_BENAPHORE_H__
#include <cassert>
#include <thread>
#include <atomic>
#include "tracy_sema.h"
namespace tracy
{
class NonRecursiveBenaphore
{
private:
std::atomic<int> m_contentionCount;
DefaultSemaphoreType m_sema;
public:
NonRecursiveBenaphore() : m_contentionCount(0) {}
void lock()
{
if (m_contentionCount.fetch_add(1, std::memory_order_acquire) > 0)
{
m_sema.wait();
}
}
bool try_lock()
{
if (m_contentionCount.load(std::memory_order_relaxed) != 0)
return false;
int expected = 0;
return m_contentionCount.compare_exchange_strong(expected, 1, std::memory_order_acquire);
}
void unlock()
{
int oldCount = m_contentionCount.fetch_sub(1, std::memory_order_release);
assert(oldCount > 0);
if (oldCount > 1)
{
m_sema.signal();
}
}
};
}
#endif // __CPP11OM_BENAPHORE_H__

1918
common/tracy_lz4.cpp Normal file

File diff suppressed because it is too large Load Diff

604
common/tracy_lz4.hpp Normal file
View File

@@ -0,0 +1,604 @@
/*
* LZ4 - Fast LZ compression algorithm
* Header File
* Copyright (C) 2011-2017, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- LZ4 homepage : http://www.lz4.org
- LZ4 source repository : https://github.com/lz4/lz4
*/
#ifndef TRACY_LZ4_H_2983827168210
#define TRACY_LZ4_H_2983827168210
/* --- Dependency --- */
#include <stddef.h> /* size_t */
#include <stdint.h>
namespace tracy
{
/**
Introduction
LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core,
scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
The LZ4 compression library provides in-memory compression and decompression functions.
Compression can be done in:
- a single step (described as Simple Functions)
- a single step, reusing a context (described in Advanced Functions)
- unbounded multiple steps (described as Streaming compression)
lz4.h provides block compression functions. It gives full buffer control to user.
Decompressing an lz4-compressed block also requires metadata (such as compressed size).
Each application is free to encode such metadata in whichever way it wants.
An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
take care of encoding standard metadata alongside LZ4-compressed blocks.
If your application requires interoperability, it's recommended to use it.
A library is provided to take care of it, see lz4frame.h.
*/
/*^***************************************************************
* Export parameters
*****************************************************************/
/*
* LZ4_DLL_EXPORT :
* Enable exporting of functions when building a Windows DLL
* LZ4LIB_VISIBILITY :
* Control library symbols visibility.
*/
#ifndef LZ4LIB_VISIBILITY
# if defined(__GNUC__) && (__GNUC__ >= 4)
# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
# else
# define LZ4LIB_VISIBILITY
# endif
#endif
#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
#else
# define LZ4LIB_API LZ4LIB_VISIBILITY
#endif
/*------ Version ------*/
#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
#define LZ4_VERSION_MINOR 8 /* for new (non-breaking) interface capabilities */
#define LZ4_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */
#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
#define LZ4_QUOTE(str) #str
#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */
LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; unseful to check dll version */
/*-************************************
* Tuning parameter
**************************************/
/*!
* LZ4_MEMORY_USAGE :
* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
* Increasing memory usage improves compression ratio
* Reduced memory usage may improve speed, thanks to cache effect
* Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
*/
#ifndef LZ4_MEMORY_USAGE
# define LZ4_MEMORY_USAGE 12
#endif
/*-************************************
* Simple Functions
**************************************/
/*! LZ4_compress_default() :
Compresses 'srcSize' bytes from buffer 'src'
into already allocated 'dst' buffer of size 'dstCapacity'.
Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
It also runs faster, so it's a recommended setting.
If the function cannot compress 'src' into a more limited 'dst' budget,
compression stops *immediately*, and the function result is zero.
Note : as a consequence, 'dst' content is not valid.
Note 2 : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
dstCapacity : size of buffer 'dst' (which must be already allocated)
return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
or 0 if compression fails */
LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
/*! LZ4_decompress_safe() :
compressedSize : is the exact complete size of the compressed block.
dstCapacity : is the size of destination buffer, which must be already allocated.
return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
If destination buffer is not large enough, decoding will stop and output an error code (negative value).
If the source stream is detected malformed, the function will stop decoding and return a negative result.
This function is protected against malicious data packets.
*/
LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
/*-************************************
* Advanced Functions
**************************************/
#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
/*!
LZ4_compressBound() :
Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
This function is primarily useful for memory allocation purposes (destination buffer size).
Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
inputSize : max supported value is LZ4_MAX_INPUT_SIZE
return : maximum output size in a "worst case" scenario
or 0, if input size is incorrect (too large or negative)
*/
LZ4LIB_API int LZ4_compressBound(int inputSize);
/*!
LZ4_compress_fast() :
Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
An acceleration value of "1" is the same as regular LZ4_compress_default()
Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
*/
LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
/*!
LZ4_compress_fast_extState() :
Same compression function, just using an externally allocated memory space to store compression state.
Use LZ4_sizeofState() to know how much memory must be allocated,
and allocate it on 8-bytes boundaries (using malloc() typically).
Then, provide it as 'void* state' to compression function.
*/
LZ4LIB_API int LZ4_sizeofState(void);
LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
/*!
LZ4_compress_destSize() :
Reverse the logic : compresses as much data as possible from 'src' buffer
into already allocated buffer 'dst' of size 'targetDestSize'.
This function either compresses the entire 'src' content into 'dst' if it's large enough,
or fill 'dst' buffer completely with as much data as possible from 'src'.
*srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
New value is necessarily <= old value.
return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
or 0 if compression fails
*/
LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
/*!
LZ4_decompress_fast() : **unsafe!**
This function is a bit faster than LZ4_decompress_safe(),
but it may misbehave on malformed input because it doesn't perform full validation of compressed data.
originalSize : is the uncompressed size to regenerate
Destination buffer must be already allocated, and its size must be >= 'originalSize' bytes.
return : number of bytes read from source buffer (== compressed size).
If the source stream is detected malformed, the function stops decoding and return a negative result.
note : This function is only usable if the originalSize of uncompressed data is known in advance.
The caller should also check that all the compressed input has been consumed properly,
i.e. that the return value matches the size of the buffer with compressed input.
The function never writes past the output buffer. However, since it doesn't know its 'src' size,
it may read past the intended input. Also, because match offsets are not validated during decoding,
reads from 'src' may underflow. Use this function in trusted environment **only**.
*/
LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
/*!
LZ4_decompress_safe_partial() :
This function decompress a compressed block of size 'srcSize' at position 'src'
into destination buffer 'dst' of size 'dstCapacity'.
The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that.
However, it's not accurate, and may write more than 'targetOutputSize' (but always <= dstCapacity).
@return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity)
Note : this number can also be < targetOutputSize, if compressed block contains less data.
Therefore, always control how many bytes were decoded.
If source stream is detected malformed, function returns a negative result.
This function is protected against malicious data packets.
*/
LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
/*-*********************************************
* Streaming Compression Functions
***********************************************/
typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */
/*! LZ4_createStream() and LZ4_freeStream() :
* LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
* LZ4_freeStream() releases its memory.
*/
LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr);
/*! LZ4_resetStream() :
* An LZ4_stream_t structure can be allocated once and re-used multiple times.
* Use this function to start compressing a new stream.
*/
LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
/*! LZ4_loadDict() :
* Use this function to load a static dictionary into LZ4_stream_t.
* Any previous data will be forgotten, only 'dictionary' will remain in memory.
* Loading a size of 0 is allowed, and is the same as reset.
* @return : dictionary size, in bytes (necessarily <= 64 KB)
*/
LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
/*! LZ4_compress_fast_continue() :
* Compress 'src' content using data from previously compressed blocks, for better compression ratio.
* 'dst' buffer must be already allocated.
* If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
*
* Important : The previous 64KB of compressed data is assumed to remain present and unmodified in memory!
*
* Special 1 : When input is a double-buffer, they can have any size, including < 64 KB.
* Make sure that buffers are separated by at least one byte.
* This way, each block only depends on previous block.
* Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
*
* @return : size of compressed block
* or 0 if there is an error (typically, cannot fit into 'dst').
* After an error, the stream status is invalid, it can only be reset or freed.
*/
LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
/*! LZ4_saveDict() :
* If last 64KB data cannot be guaranteed to remain available at its current memory location,
* save it into a safer place (char* safeBuffer).
* This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
* but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
* @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
*/
LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
/*-**********************************************
* Streaming Decompression Functions
* Bufferless synchronous API
************************************************/
typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */
/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
* creation / destruction of streaming decompression tracking context.
* A tracking context can be re-used multiple times.
*/
LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
/*! LZ4_setStreamDecode() :
* An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
* Use this function to start decompression of a new stream of blocks.
* A dictionary can optionnally be set. Use NULL or size 0 for a reset order.
* Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
* @return : 1 if OK, 0 if error
*/
LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
/*! LZ4_decoderRingBufferSize() : v1.8.2
* Note : in a ring buffer scenario (optional),
* blocks are presumed decompressed next to each other
* up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
* at which stage it resumes from beginning of ring buffer.
* When setting such a ring buffer for streaming decompression,
* provides the minimum size of this ring buffer
* to be compatible with any source respecting maxBlockSize condition.
* @return : minimum ring buffer size,
* or 0 if there is an error (invalid maxBlockSize).
*/
LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) (65536 + 14 + (mbs)) /* for static allocation; mbs presumed valid */
/*! LZ4_decompress_*_continue() :
* These decoding functions allow decompression of consecutive blocks in "streaming" mode.
* A block is an unsplittable entity, it must be presented entirely to a decompression function.
* Decompression functions only accepts one block at a time.
* The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
* If less than 64KB of data has been decoded, all the data must be present.
*
* Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
* - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
* maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
* In which case, encoding and decoding buffers do not need to be synchronized.
* Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
* - Synchronized mode :
* Decompression buffer size is _exactly_ the same as compression buffer size,
* and follows exactly same update rule (block boundaries at same positions),
* and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
* _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
* - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
* In which case, encoding and decoding buffers do not need to be synchronized,
* and encoding ring buffer can have any size, including small ones ( < 64 KB).
*
* Whenever these conditions are not possible,
* save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
* then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
*/
LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
/*! LZ4_decompress_*_usingDict() :
* These decoding functions work the same as
* a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
* They are stand-alone, and don't need an LZ4_streamDecode_t structure.
* Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
*/
LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
/*^**********************************************
* !!!!!! STATIC LINKING ONLY !!!!!!
***********************************************/
/*-************************************
* Unstable declarations
**************************************
* Declarations in this section should be considered unstable.
* Use at your own peril, etc., etc.
* They may be removed in the future.
* Their signatures may change.
**************************************/
#ifdef LZ4_STATIC_LINKING_ONLY
/*! LZ4_resetStream_fast() :
* Use this, like LZ4_resetStream(), to prepare a context for a new chain of
* calls to a streaming API (e.g., LZ4_compress_fast_continue()).
*
* Note:
* Using this in advance of a non- streaming-compression function is redundant,
* and potentially bad for performance, since they all perform their own custom
* reset internally.
*
* Differences from LZ4_resetStream():
* When an LZ4_stream_t is known to be in a internally coherent state,
* it can often be prepared for a new compression with almost no work, only
* sometimes falling back to the full, expensive reset that is always required
* when the stream is in an indeterminate state (i.e., the reset performed by
* LZ4_resetStream()).
*
* LZ4_streams are guaranteed to be in a valid state when:
* - returned from LZ4_createStream()
* - reset by LZ4_resetStream()
* - memset(stream, 0, sizeof(LZ4_stream_t)), though this is discouraged
* - the stream was in a valid state and was reset by LZ4_resetStream_fast()
* - the stream was in a valid state and was then used in any compression call
* that returned success
* - the stream was in an indeterminate state and was used in a compression
* call that fully reset the state (e.g., LZ4_compress_fast_extState()) and
* that returned success
*
* When a stream isn't known to be in a valid state, it is not safe to pass to
* any fastReset or streaming function. It must first be cleansed by the full
* LZ4_resetStream().
*/
LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
/*! LZ4_compress_fast_extState_fastReset() :
* A variant of LZ4_compress_fast_extState().
*
* Using this variant avoids an expensive initialization step. It is only safe
* to call if the state buffer is known to be correctly initialized already
* (see above comment on LZ4_resetStream_fast() for a definition of "correctly
* initialized"). From a high level, the difference is that this function
* initializes the provided state with a call to something like
* LZ4_resetStream_fast() while LZ4_compress_fast_extState() starts with a
* call to LZ4_resetStream().
*/
LZ4LIB_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
/*! LZ4_attach_dictionary() :
* This is an experimental API that allows for the efficient use of a
* static dictionary many times.
*
* Rather than re-loading the dictionary buffer into a working context before
* each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
* working LZ4_stream_t, this function introduces a no-copy setup mechanism,
* in which the working stream references the dictionary stream in-place.
*
* Several assumptions are made about the state of the dictionary stream.
* Currently, only streams which have been prepared by LZ4_loadDict() should
* be expected to work.
*
* Alternatively, the provided dictionary stream pointer may be NULL, in which
* case any existing dictionary stream is unset.
*
* If a dictionary is provided, it replaces any pre-existing stream history.
* The dictionary contents are the only history that can be referenced and
* logically immediately precede the data compressed in the first subsequent
* compression call.
*
* The dictionary will only remain attached to the working stream through the
* first compression call, at the end of which it is cleared. The dictionary
* stream (and source buffer) must remain in-place / accessible / unchanged
* through the completion of the first compression call on the stream.
*/
LZ4LIB_API void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream);
#endif
/*-************************************
* Private definitions
**************************************
* Do not use these definitions.
* They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
* Using these definitions will expose code to API and/or ABI break in future versions of the library.
**************************************/
#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
struct LZ4_stream_t_internal {
uint32_t hashTable[LZ4_HASH_SIZE_U32];
uint32_t currentOffset;
uint16_t initCheck;
uint16_t tableType;
const uint8_t* dictionary;
const LZ4_stream_t_internal* dictCtx;
uint32_t dictSize;
};
typedef struct {
const uint8_t* externalDict;
size_t extDictSize;
const uint8_t* prefixEnd;
size_t prefixSize;
} LZ4_streamDecode_t_internal;
#else
typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
struct LZ4_stream_t_internal {
unsigned int hashTable[LZ4_HASH_SIZE_U32];
unsigned int currentOffset;
unsigned short initCheck;
unsigned short tableType;
const unsigned char* dictionary;
const LZ4_stream_t_internal* dictCtx;
unsigned int dictSize;
};
typedef struct {
const unsigned char* externalDict;
size_t extDictSize;
const unsigned char* prefixEnd;
size_t prefixSize;
} LZ4_streamDecode_t_internal;
#endif
/*!
* LZ4_stream_t :
* information structure to track an LZ4 stream.
* init this structure before first use.
* note : only use in association with static linking !
* this definition is not API/ABI safe,
* it may change in a future version !
*/
#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
union LZ4_stream_u {
unsigned long long table[LZ4_STREAMSIZE_U64];
LZ4_stream_t_internal internal_donotuse;
} ; /* previously typedef'd to LZ4_stream_t */
/*!
* LZ4_streamDecode_t :
* information structure to track an LZ4 stream during decompression.
* init this structure using LZ4_setStreamDecode (or memset()) before first use
* note : only use in association with static linking !
* this definition is not API/ABI safe,
* and may change in a future version !
*/
#define LZ4_STREAMDECODESIZE_U64 4
#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
union LZ4_streamDecode_u {
unsigned long long table[LZ4_STREAMDECODESIZE_U64];
LZ4_streamDecode_t_internal internal_donotuse;
} ; /* previously typedef'd to LZ4_streamDecode_t */
/*-************************************
* Obsolete Functions
**************************************/
/*! Deprecation warnings
Should deprecation warnings be a problem,
it is generally possible to disable them,
typically with -Wno-deprecated-declarations for gcc
or _CRT_SECURE_NO_WARNINGS in Visual.
Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS */
#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
# define LZ4_DEPRECATED(message) /* disable deprecation warnings */
#else
# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
# define LZ4_DEPRECATED(message) [[deprecated(message)]]
# elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
# elif (LZ4_GCC_VERSION >= 301)
# define LZ4_DEPRECATED(message) __attribute__((deprecated))
# elif defined(_MSC_VER)
# define LZ4_DEPRECATED(message) __declspec(deprecated(message))
# else
# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
# define LZ4_DEPRECATED(message)
# endif
#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
/* Obsolete compression functions */
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* source, char* dest, int sourceSize);
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
/* Obsolete decompression functions */
LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
/* Obsolete streaming functions; degraded functionality; do not use!
*
* In order to perform streaming compression, these functions depended on data
* that is no longer tracked in the state. They have been preserved as well as
* possible: using them will still produce a correct output. However, they don't
* actually retain any history between compression calls. The compression ratio
* achieved will therefore be no better than compressing each chunk
* independently.
*/
LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void);
LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer);
LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
/* Obsolete streaming decoding functions */
/*LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead")*/ LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
}
#endif /* LZ4_H_2983827168210 */

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/*
LZ4 HC - High Compression Mode of LZ4
Header File
Copyright (C) 2011-2020, Yann Collet.
Copyright (C) 2011-2017, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
@@ -31,13 +31,15 @@
- LZ4 source repository : https://github.com/lz4/lz4
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
*/
#ifndef TRACY_LZ4_HC_H_19834876238432
#define TRACY_LZ4_HC_H_19834876238432
#ifndef LZ4_HC_H_19834876238432
#define LZ4_HC_H_19834876238432
/* --- Dependency --- */
/* note : lz4hc requires lz4.h/lz4.c for compilation */
#include "tracy_lz4.hpp" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
namespace tracy
{
/* --- Useful constants --- */
#define LZ4HC_CLEVEL_MIN 3
@@ -45,14 +47,12 @@
#define LZ4HC_CLEVEL_OPT_MIN 10
#define LZ4HC_CLEVEL_MAX 12
namespace tracy
{
/*-************************************
* Block Compression
**************************************/
/*! LZ4_compress_HC() :
* Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm.
* Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
* `dst` must be already allocated.
* Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
* Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
@@ -75,21 +75,7 @@ LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dst
* Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
*/
LZ4LIB_API int LZ4_sizeofStateHC(void);
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
/*! LZ4_compress_HC_destSize() : v1.9.0+
* Will compress as much data as possible from `src`
* to fit into `targetDstSize` budget.
* Result is provided in 2 parts :
* @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
* or 0 if compression fails.
* `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src`
*/
LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC,
const char* src, char* dst,
int* srcSizePtr, int targetDstSize,
int compressionLevel);
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
/*-************************************
@@ -101,92 +87,46 @@ LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC,
/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
* These functions create and release memory for LZ4 HC streaming state.
* Newly created states are automatically initialized.
* A same state can be used multiple times consecutively,
* starting with LZ4_resetStreamHC_fast() to start a new stream of blocks.
* Existing states can be re-used several times, using LZ4_resetStreamHC().
* These methods are API and ABI stable, they can be used in combination with a DLL.
*/
LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
/*
These functions compress data in successive blocks of any size,
using previous blocks as dictionary, to improve compression ratio.
One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
There is an exception for ring buffers, which can be smaller than 64 KB.
Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue().
Before starting compression, state must be allocated and properly initialized.
LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT.
Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream)
or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental).
LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once,
which is automatically the case when state is created using LZ4_createStreamHC().
After reset, a first "fictional block" can be designated as initial dictionary,
using LZ4_loadDictHC() (Optional).
Invoke LZ4_compress_HC_continue() to compress each successive block.
The number of blocks is unlimited.
Previous input blocks, including initial dictionary when present,
must remain accessible and unmodified during compression.
It's allowed to update compression level anytime between blocks,
using LZ4_setCompressionLevel() (experimental).
'dst' buffer should be sized to handle worst case scenarios
(see LZ4_compressBound(), it ensures compression success).
In case of failure, the API does not guarantee recovery,
so the state _must_ be reset.
To ensure compression success
whenever `dst` buffer size cannot be made >= LZ4_compressBound(),
consider using LZ4_compress_HC_continue_destSize().
Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks,
it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC().
Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB)
After completing a streaming compression,
it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state,
just by resetting it, using LZ4_resetStreamHC_fast().
*/
LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */
LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr,
const char* src, char* dst,
int srcSize, int maxDstSize);
/*! LZ4_compress_HC_continue_destSize() : v1.9.0+
* Similar to LZ4_compress_HC_continue(),
* but will read as much data as possible from `src`
* to fit into `targetDstSize` budget.
* Result is provided into 2 parts :
* @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
* or 0 if compression fails.
* `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`.
* Note that this function may not consume the entire input.
*/
LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
const char* src, char* dst,
int* srcSizePtr, int targetDstSize);
LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
/*
These functions compress data in successive blocks of any size, using previous blocks as dictionary.
One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
There is an exception for ring buffers, which can be smaller than 64 KB.
Ring buffers scenario is automatically detected and handled by LZ4_compress_HC_continue().
Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
Then, use LZ4_compress_HC_continue() to compress each successive block.
Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
'dst' buffer should be sized to handle worst case scenarios (see LZ4_compressBound()), to ensure operation success.
Because in case of failure, the API does not guarantee context recovery, and context will have to be reset.
If `dst` buffer budget cannot be >= LZ4_compressBound(), consider using LZ4_compress_HC_continue_destSize() instead.
If, for any reason, previous data block can't be preserved unmodified in memory for next compression block,
you can save it to a more stable memory space, using LZ4_saveDictHC().
Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
*/
/*^**********************************************
* !!!!!! STATIC LINKING ONLY !!!!!!
***********************************************/
/*-******************************************************************
/*-**************************************************************
* PRIVATE DEFINITIONS :
* Do not use these definitions directly.
* They are merely exposed to allow static allocation of `LZ4_streamHC_t`.
* Declare an `LZ4_streamHC_t` directly, rather than any type below.
* Even then, only do so in the context of static linking, as definitions may change between versions.
********************************************************************/
* Do not use these definitions.
* They are exposed to allow static allocation of `LZ4_streamHC_t`.
* Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
****************************************************************/
#define LZ4HC_DICTIONARY_LOGSIZE 16
#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
@@ -196,51 +136,59 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in
#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
/* Never ever use these definitions directly !
* Declare or allocate an LZ4_streamHC_t instead.
**/
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
#include <stdint.h>
typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
struct LZ4HC_CCtx_internal
{
LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE];
LZ4_u16 chainTable[LZ4HC_MAXD];
const LZ4_byte* end; /* next block here to continue on current prefix */
const LZ4_byte* prefixStart; /* Indexes relative to this position */
const LZ4_byte* dictStart; /* alternate reference for extDict */
LZ4_u32 dictLimit; /* below that point, need extDict */
LZ4_u32 lowLimit; /* below that point, no more dict */
LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */
short compressionLevel;
LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set,
otherwise, favor compression ratio */
LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */
uint32_t hashTable[LZ4HC_HASHTABLESIZE];
uint16_t chainTable[LZ4HC_MAXD];
const uint8_t* end; /* next block here to continue on current prefix */
const uint8_t* base; /* All index relative to this position */
const uint8_t* dictBase; /* alternate base for extDict */
uint32_t dictLimit; /* below that point, need extDict */
uint32_t lowLimit; /* below that point, no more dict */
uint32_t nextToUpdate; /* index from which to continue dictionary update */
short compressionLevel;
short favorDecSpeed;
const LZ4HC_CCtx_internal* dictCtx;
};
#define LZ4_STREAMHC_MINSIZE 262200 /* static size, for inter-version compatibility */
#else
typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
struct LZ4HC_CCtx_internal
{
unsigned int hashTable[LZ4HC_HASHTABLESIZE];
unsigned short chainTable[LZ4HC_MAXD];
const unsigned char* end; /* next block here to continue on current prefix */
const unsigned char* base; /* All index relative to this position */
const unsigned char* dictBase; /* alternate base for extDict */
unsigned int dictLimit; /* below that point, need extDict */
unsigned int lowLimit; /* below that point, no more dict */
unsigned int nextToUpdate; /* index from which to continue dictionary update */
short compressionLevel;
short favorDecSpeed;
const LZ4HC_CCtx_internal* dictCtx;
};
#endif
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 262200 */
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
union LZ4_streamHC_u {
char minStateSize[LZ4_STREAMHC_MINSIZE];
size_t table[LZ4_STREAMHCSIZE_SIZET];
LZ4HC_CCtx_internal internal_donotuse;
}; /* previously typedef'd to LZ4_streamHC_t */
}; /* previously typedef'd to LZ4_streamHC_t */
/*
LZ4_streamHC_t :
This structure allows static allocation of LZ4 HC streaming state.
State must be initialized using LZ4_resetStreamHC() before first use.
/* LZ4_streamHC_t :
* This structure allows static allocation of LZ4 HC streaming state.
* This can be used to allocate statically on stack, or as part of a larger structure.
*
* Such state **must** be initialized using LZ4_initStreamHC() before first use.
*
* Note that invoking LZ4_initStreamHC() is not required when
* the state was created using LZ4_createStreamHC() (which is recommended).
* Using the normal builder, a newly created state is automatically initialized.
*
* Static allocation shall only be used in combination with static linking.
*/
/* LZ4_initStreamHC() : v1.9.0+
* Required before first use of a statically allocated LZ4_streamHC_t.
* Before v1.9.0 : use LZ4_resetStreamHC() instead
*/
LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC(void* buffer, size_t size);
Static allocation shall only be used in combination with static linking.
When invoking LZ4 from a DLL, use create/free functions instead, which are API and ABI stable.
*/
/*-************************************
@@ -251,11 +199,11 @@ LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC(void* buffer, size_t size);
/* deprecated compression functions */
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC (const char* source, char* dest, int inputSize);
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
@@ -268,26 +216,13 @@ LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_comp
* LZ4_slideInputBufferHC() will truncate the history of the stream, rather
* than preserve a window-sized chunk of history.
*/
#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer);
LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API int LZ4_freeHC (void* LZ4HC_Data);
#endif
LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API int LZ4_freeHC (void* LZ4HC_Data);
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int LZ4_sizeofStreamStateHC(void);
LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStreamStateHC(void* state, char* inputBuffer);
/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC().
* The intention is to emphasize the difference with LZ4_resetStreamHC_fast(),
* which is now the recommended function to start a new stream of blocks,
* but cannot be used to initialize a memory segment containing arbitrary garbage data.
*
* It is recommended to switch to LZ4_initStreamHC().
* LZ4_resetStreamHC() will generate deprecation warnings in a future version.
*/
LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") LZ4LIB_API int LZ4_resetStreamStateHC(void* state, char* inputBuffer);
}
@@ -300,34 +235,53 @@ LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionL
* They should not be linked from DLL,
* as there is no guarantee of API stability yet.
* Prototypes will be promoted to "stable" status
* after successful usage in real-life scenarios.
* after successfull usage in real-life scenarios.
***************************************************/
#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
#ifndef TRACY_LZ4_HC_SLO_098092834
#define TRACY_LZ4_HC_SLO_098092834
#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */
#include "tracy_lz4.hpp"
#ifndef LZ4_HC_SLO_098092834
#define LZ4_HC_SLO_098092834
namespace tracy
{
/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental)
* It's possible to change compression level
* between successive invocations of LZ4_compress_HC_continue*()
* for dynamic adaptation.
/*! LZ4_compress_HC_destSize() : v1.8.0 (experimental)
* Will try to compress as much data from `src` as possible
* that can fit into `targetDstSize` budget.
* Result is provided in 2 parts :
* @return : the number of bytes written into 'dst'
* or 0 if compression fails.
* `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`
*/
LZ4LIB_STATIC_API void LZ4_setCompressionLevel(
LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
int LZ4_compress_HC_destSize(void* LZ4HC_Data,
const char* src, char* dst,
int* srcSizePtr, int targetDstSize,
int compressionLevel);
/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental)
* Opt. Parser will favor decompression speed over compression ratio.
* Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN.
/*! LZ4_compress_HC_continue_destSize() : v1.8.0 (experimental)
* Similar as LZ4_compress_HC_continue(),
* but will read a variable nb of bytes from `src`
* to fit into `targetDstSize` budget.
* Result is provided in 2 parts :
* @return : the number of bytes written into 'dst'
* or 0 if compression fails.
* `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`.
*/
LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed(
LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
const char* src, char* dst,
int* srcSizePtr, int targetDstSize);
/*! LZ4_resetStreamHC_fast() : v1.9.0+
/*! LZ4_setCompressionLevel() : v1.8.0 (experimental)
* It's possible to change compression level between 2 invocations of LZ4_compress_HC_continue*()
*/
void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
/*! LZ4_favorDecompressionSpeed() : v1.8.2 (experimental)
* Parser will select decisions favoring decompression over compression ratio.
* Only work at highest compression settings (level >= LZ4HC_CLEVEL_OPT_MIN)
*/
void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
/*! LZ4_resetStreamHC_fast() :
* When an LZ4_streamHC_t is known to be in a internally coherent state,
* it can often be prepared for a new compression with almost no work, only
* sometimes falling back to the full, expensive reset that is always required
@@ -344,14 +298,8 @@ LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed(
* - the stream was in an indeterminate state and was used in a compression
* call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
* returned success
*
* Note:
* A stream that was last used in a compression call that returned an error
* may be passed to this function. However, it will be fully reset, which will
* clear any existing history and settings from the context.
*/
LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(
LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
void LZ4_resetStreamHC_fast(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
/*! LZ4_compress_HC_extStateHC_fastReset() :
* A variant of LZ4_compress_HC_extStateHC().
@@ -364,11 +312,7 @@ LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(
* LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
* call to LZ4_resetStreamHC().
*/
LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset (
void* state,
const char* src, char* dst,
int srcSize, int dstCapacity,
int compressionLevel);
int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
/*! LZ4_attach_HC_dictionary() :
* This is an experimental API that allows for the efficient use of a
@@ -395,9 +339,7 @@ LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset (
* stream (and source buffer) must remain in-place / accessible / unchanged
* through the lifetime of the stream session.
*/
LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary(
LZ4_streamHC_t *working_stream,
const LZ4_streamHC_t *dictionary_stream);
LZ4LIB_API void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream);
}

255
common/tracy_sema.h Normal file
View File

@@ -0,0 +1,255 @@
// Copyright (c) 2015 Jeff Preshing
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#ifndef __TRACY_CPP11OM_SEMAPHORE_H__
#define __TRACY_CPP11OM_SEMAPHORE_H__
#include <atomic>
#include <cassert>
#if defined(__MACH__)
#include <mach/mach.h>
#elif defined(__unix__)
#include <semaphore.h>
#endif
namespace tracy
{
#if defined(_WIN32)
//---------------------------------------------------------
// Semaphore (Windows)
//---------------------------------------------------------
#ifndef MAXLONG
enum { MAXLONG = 0x7fffffff };
#endif
#ifndef INFINITE
enum { INFINITE = 0xFFFFFFFF };
#endif
#ifndef _WINDOWS_
typedef void* HANDLE;
extern "C" __declspec(dllimport) HANDLE __stdcall CreateSemaphoreA( void*, long, long, const char* );
extern "C" __declspec(dllimport) int __stdcall CloseHandle( HANDLE );
extern "C" __declspec(dllimport) unsigned long __stdcall WaitForSingleObject( HANDLE, unsigned long );
extern "C" __declspec(dllimport) int __stdcall ReleaseSemaphore( HANDLE, long, long* );
#endif
class Semaphore
{
private:
HANDLE m_hSema;
Semaphore(const Semaphore& other) = delete;
Semaphore& operator=(const Semaphore& other) = delete;
public:
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
m_hSema = CreateSemaphoreA(NULL, initialCount, MAXLONG, NULL);
}
~Semaphore()
{
CloseHandle(m_hSema);
}
void wait()
{
WaitForSingleObject(m_hSema, INFINITE);
}
void signal(int count = 1)
{
ReleaseSemaphore(m_hSema, count, NULL);
}
};
#elif defined(__MACH__)
//---------------------------------------------------------
// Semaphore (Apple iOS and OSX)
// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
//---------------------------------------------------------
class Semaphore
{
private:
semaphore_t m_sema;
Semaphore(const Semaphore& other) = delete;
Semaphore& operator=(const Semaphore& other) = delete;
public:
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
}
~Semaphore()
{
semaphore_destroy(mach_task_self(), m_sema);
}
void wait()
{
semaphore_wait(m_sema);
}
void signal()
{
semaphore_signal(m_sema);
}
void signal(int count)
{
while (count-- > 0)
{
semaphore_signal(m_sema);
}
}
};
#elif defined(__unix__)
//---------------------------------------------------------
// Semaphore (POSIX, Linux)
//---------------------------------------------------------
class Semaphore
{
private:
sem_t m_sema;
Semaphore(const Semaphore& other) = delete;
Semaphore& operator=(const Semaphore& other) = delete;
public:
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
sem_init(&m_sema, 0, initialCount);
}
~Semaphore()
{
sem_destroy(&m_sema);
}
void wait()
{
// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
int rc;
do
{
rc = sem_wait(&m_sema);
}
while (rc == -1 && errno == EINTR);
}
void signal()
{
sem_post(&m_sema);
}
void signal(int count)
{
while (count-- > 0)
{
sem_post(&m_sema);
}
}
};
#else
#error Unsupported platform!
#endif
//---------------------------------------------------------
// LightweightSemaphore
//---------------------------------------------------------
class LightweightSemaphore
{
private:
std::atomic<int> m_count;
Semaphore m_sema;
void waitWithPartialSpinning()
{
int oldCount;
// Is there a better way to set the initial spin count?
// If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
// as threads start hitting the kernel semaphore.
int spin = 10000;
while (spin--)
{
oldCount = m_count.load(std::memory_order_relaxed);
if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire))
return;
std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop.
}
oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
if (oldCount <= 0)
{
m_sema.wait();
}
}
public:
LightweightSemaphore(int initialCount = 0) : m_count(initialCount)
{
assert(initialCount >= 0);
}
bool tryWait()
{
int oldCount = m_count.load(std::memory_order_relaxed);
return (oldCount > 0 && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire));
}
void wait()
{
if (!tryWait())
waitWithPartialSpinning();
}
void signal(int count = 1)
{
int oldCount = m_count.fetch_add(count, std::memory_order_release);
int toRelease = -oldCount < count ? -oldCount : count;
if (toRelease > 0)
{
m_sema.signal(toRelease);
}
}
};
typedef LightweightSemaphore DefaultSemaphoreType;
}
#endif // __CPP11OM_SEMAPHORE_H__

View File

@@ -1,28 +0,0 @@
cmake_minimum_required(VERSION 3.16)
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
option(NO_PARALLEL_STL "Disable parallel STL" OFF)
set(NO_STATISTICS OFF)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
set(CMAKE_CXX_STANDARD 20)
project(
tracy-csvexport
LANGUAGES C CXX
VERSION ${TRACY_VERSION_STRING}
)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
set(PROGRAM_FILES
src/csvexport.cpp
)
add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})

View File

@@ -1,353 +0,0 @@
#ifdef _WIN32
# include <windows.h>
#endif
#include <algorithm>
#include <cctype>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include "../../server/TracyFileRead.hpp"
#include "../../server/TracyWorker.hpp"
#include "../../getopt/getopt.h"
void print_usage_exit(int e)
{
fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
fprintf(stderr, "Usage:\n");
fprintf(stderr, " extract [OPTION...] <trace file>\n");
fprintf(stderr, "\n");
fprintf(stderr, " -h, --help Print usage\n");
fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n");
fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n");
fprintf(stderr, " -c, --case Case sensitive filtering\n");
fprintf(stderr, " -e, --self Get self times\n");
fprintf(stderr, " -u, --unwrap Report each zone event\n");
fprintf(stderr, " -m, --messages Report only messages\n");
exit(e);
}
struct Args {
const char* filter;
const char* separator;
const char* trace_file;
bool case_sensitive;
bool self_time;
bool unwrap;
bool unwrapMessages;
};
Args parse_args(int argc, char** argv)
{
if (argc == 1)
{
print_usage_exit(1);
}
Args args = { "", ",", "", false, false, false, false };
struct option long_opts[] = {
{ "help", no_argument, NULL, 'h' },
{ "filter", optional_argument, NULL, 'f' },
{ "sep", optional_argument, NULL, 's' },
{ "case", no_argument, NULL, 'c' },
{ "self", no_argument, NULL, 'e' },
{ "unwrap", no_argument, NULL, 'u' },
{ "messages", no_argument, NULL, 'm' },
{ NULL, 0, NULL, 0 }
};
int c;
while ((c = getopt_long(argc, argv, "hf:s:ceum", long_opts, NULL)) != -1)
{
switch (c)
{
case 'h':
print_usage_exit(0);
break;
case 'f':
args.filter = optarg;
break;
case 's':
args.separator = optarg;
break;
case 'c':
args.case_sensitive = true;
break;
case 'e':
args.self_time = true;
break;
case 'u':
args.unwrap = true;
break;
case 'm':
args.unwrapMessages = true;
break;
default:
print_usage_exit(1);
break;
}
}
if (argc != optind + 1)
{
print_usage_exit(1);
}
args.trace_file = argv[optind];
return args;
}
bool is_substring(
const char* term,
const char* s,
bool case_sensitive = false
){
auto new_term = std::string(term);
auto new_s = std::string(s);
if (!case_sensitive) {
std::transform(
new_term.begin(),
new_term.end(),
new_term.begin(),
[](unsigned char c){ return std::tolower(c); }
);
std::transform(
new_s.begin(),
new_s.end(),
new_s.begin(),
[](unsigned char c){ return std::tolower(c); }
);
}
return new_s.find(new_term) != std::string::npos;
}
const char* get_name(int32_t id, const tracy::Worker& worker)
{
auto& srcloc = worker.GetSourceLocation(id);
return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function);
}
template <typename T>
std::string join(const T& v, const char* sep) {
std::ostringstream s;
for (const auto& i : v) {
if (&i != &v[0]) {
s << sep;
}
s << i;
}
return s.str();
}
// From TracyView.cpp
int64_t GetZoneChildTimeFast(
const tracy::Worker& worker,
const tracy::ZoneEvent& zone
){
int64_t time = 0;
if( zone.HasChildren() )
{
auto& children = worker.GetZoneChildren( zone.Child() );
if( children.is_magic() )
{
auto& vec = *(tracy::Vector<tracy::ZoneEvent>*)&children;
for( auto& v : vec )
{
assert( v.IsEndValid() );
time += v.End() - v.Start();
}
}
else
{
for( auto& v : children )
{
assert( v->IsEndValid() );
time += v->End() - v->Start();
}
}
}
return time;
}
int main(int argc, char** argv)
{
#ifdef _WIN32
if (!AttachConsole(ATTACH_PARENT_PROCESS))
{
AllocConsole();
SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07);
}
#endif
Args args = parse_args(argc, argv);
auto f = std::unique_ptr<tracy::FileRead>(
tracy::FileRead::Open(args.trace_file)
);
if (!f)
{
fprintf(stderr, "Could not open file %s\n", args.trace_file);
return 1;
}
auto worker = tracy::Worker(*f);
if (args.unwrapMessages)
{
const auto& msgs = worker.GetMessages();
if (msgs.size() > 0)
{
std::vector<const char*> columnsForMessages;
columnsForMessages = {
"MessageName", "total_ns"
};
std::string headerForMessages = join(columnsForMessages, args.separator);
printf("%s\n", headerForMessages.data());
for(auto& it : msgs)
{
std::vector<std::string> values(columnsForMessages.size());
values[0] = worker.GetString(it->ref);
values[1] = std::to_string(it->time);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
else
{
printf("There are currently no messages!\n");
}
return 0;
}
while (!worker.AreSourceLocationZonesReady())
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
auto& slz = worker.GetSourceLocationZones();
tracy::Vector<decltype(slz.begin())> slz_selected;
slz_selected.reserve(slz.size());
uint32_t total_cnt = 0;
for(auto it = slz.begin(); it != slz.end(); ++it)
{
if(it->second.total != 0)
{
++total_cnt;
if(args.filter[0] == '\0')
{
slz_selected.push_back_no_space_check(it);
}
else
{
auto name = get_name(it->first, worker);
if(is_substring(args.filter, name, args.case_sensitive))
{
slz_selected.push_back_no_space_check(it);
}
}
}
}
std::vector<const char*> columns;
if (args.unwrap)
{
columns = {
"name", "src_file", "src_line", "ns_since_start", "exec_time_ns", "thread"
};
}
else
{
columns = {
"name", "src_file", "src_line", "total_ns", "total_perc",
"counts", "mean_ns", "min_ns", "max_ns", "std_ns"
};
}
std::string header = join(columns, args.separator);
printf("%s\n", header.data());
const auto last_time = worker.GetLastTime();
for(auto& it : slz_selected)
{
std::vector<std::string> values(columns.size());
values[0] = get_name(it->first, worker);
const auto& srcloc = worker.GetSourceLocation(it->first);
values[1] = worker.GetString(srcloc.file);
values[2] = std::to_string(srcloc.line);
const auto& zone_data = it->second;
if (args.unwrap)
{
int i = 0;
for (const auto& zone_thread_data : zone_data.zones) {
const auto zone_event = zone_thread_data.Zone();
const auto tId = zone_thread_data.Thread();
const auto start = zone_event->Start();
const auto end = zone_event->End();
values[3] = std::to_string(start);
auto timespan = end - start;
if (args.self_time) {
timespan -= GetZoneChildTimeFast(worker, *zone_event);
}
values[4] = std::to_string(timespan);
values[5] = std::to_string(tId);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
else
{
const auto time = args.self_time ? zone_data.selfTotal : zone_data.total;
values[3] = std::to_string(time);
values[4] = std::to_string(100. * time / last_time);
values[5] = std::to_string(zone_data.zones.size());
const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
/ zone_data.zones.size();
values[6] = std::to_string(avg);
const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max;
values[7] = std::to_string(tmin);
values[8] = std::to_string(tmax);
const auto sz = zone_data.zones.size();
const auto ss = zone_data.sumSq
- 2. * zone_data.total * avg
+ avg * avg * sz;
double std = 0;
if( sz > 1 )
std = sqrt(ss / (sz - 1));
values[9] = std::to_string(std);
std::string row = join(values, args.separator);
printf("%s\n", row.data());
}
}
return 0;
}

BIN
doc/cost.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

370
doc/design.svg Normal file
View File

@@ -0,0 +1,370 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
id="svg8"
version="1.1"
viewBox="0 0 139.17125 37.041668"
height="140"
width="526.00159">
<defs
id="defs2">
<marker
style="overflow:visible"
id="marker6660"
refX="0"
refY="0"
orient="auto">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path6658" />
</marker>
<marker
style="overflow:visible"
id="marker6158"
refX="0"
refY="0"
orient="auto">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path6156" />
</marker>
<marker
style="overflow:visible"
id="Arrow1Send"
refX="0"
refY="0"
orient="auto">
<path
transform="matrix(-0.2,0,0,-0.2,-1.2,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path4694" />
</marker>
<marker
style="overflow:visible"
id="marker5984"
refX="0"
refY="0"
orient="auto">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path5982" />
</marker>
<marker
orient="auto"
refY="0"
refX="0"
id="marker5482"
style="overflow:visible">
<path
id="path5480"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
transform="matrix(-0.4,0,0,-0.4,-4,0)" />
</marker>
<marker
orient="auto"
refY="0"
refX="0"
id="marker5472"
style="overflow:visible">
<path
id="path5470"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
transform="matrix(-0.4,0,0,-0.4,-4,0)" />
</marker>
<marker
style="overflow:visible"
id="marker5378"
refX="0"
refY="0"
orient="auto">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path5376" />
</marker>
<marker
style="overflow:visible"
id="marker5308"
refX="0"
refY="0"
orient="auto">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path5306" />
</marker>
<marker
style="overflow:visible"
id="Arrow1Mend"
refX="0"
refY="0"
orient="auto">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path4688" />
</marker>
<marker
orient="auto"
refY="0"
refX="0"
id="marker5170"
style="overflow:visible">
<path
id="path5168"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
transform="matrix(-0.8,0,0,-0.8,-10,0)" />
</marker>
<marker
orient="auto"
refY="0"
refX="0"
id="marker4963"
style="overflow:visible">
<path
id="path4961"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
transform="matrix(-0.8,0,0,-0.8,-10,0)" />
</marker>
<marker
style="overflow:visible"
id="marker6158-2"
refX="0"
refY="0"
orient="auto">
<path
transform="matrix(-0.4,0,0,-0.4,-4,0)"
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path6156-2" />
</marker>
</defs>
<metadata
id="metadata5">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
transform="translate(-18.388332,-17.864582)"
id="layer1">
<g
id="g4666">
<rect
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
id="rect4607"
width="17.197916"
height="6.614583"
x="18.520834"
y="20.510416" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458302px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
x="20.968229"
y="24.869841"
id="text4611"><tspan
id="tspan4609"
x="20.968229"
y="24.869841"
style="stroke-width:0.26458332px">Thread 1</tspan></text>
</g>
<g
id="g4661">
<rect
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
id="rect4607-4"
width="17.197916"
height="6.6145835"
x="18.520834"
y="32.416668" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458349px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
x="20.979254"
y="36.776093"
id="text4611-8"><tspan
id="tspan4609-9"
x="20.979254"
y="36.776093"
style="stroke-width:0.26458332px">Thread 2</tspan></text>
</g>
<g
id="g4671">
<rect
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
id="rect4607-8"
width="17.197916"
height="6.6145835"
x="18.520832"
y="44.322918" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458349px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
x="20.951002"
y="48.682343"
id="text4611-89"><tspan
id="tspan4609-6"
x="20.951002"
y="48.682343"
style="stroke-width:0.26458332px">Thread 3</tspan></text>
</g>
<g
id="g5096">
<ellipse
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
id="path4644"
cx="67.775978"
cy="36.3787"
rx="10.583333"
ry="4.6302085" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:2.82222223px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
x="67.733261"
y="35.623535"
id="text4648"><tspan
id="tspan4646"
x="67.733261"
y="35.623535"
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px">Tracy</tspan><tspan
x="67.733261"
y="39.151314"
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px"
id="tspan4650">client</tspan></text>
</g>
<path
id="path4673"
d="m 37.041666,24.479166 19.84375,7.937502"
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker5472)" />
<path
id="path4675"
d="m 37.041666,46.968751 19.84375,-6.614584"
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker5482)" />
<path
id="path4677"
d="M 37.041667,36.385417 H 55.5625"
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker5378)" />
<path
id="path5059"
d="M 84.666667,17.864582 V 54.90625"
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:1.05833327, 2.11666654;stroke-dashoffset:0;stroke-opacity:1" />
<g
id="g5106">
<ellipse
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
id="path4644-1"
cx="101.98283"
cy="36.56768"
rx="10.583333"
ry="4.6302085" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:2.82222223px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
x="101.90772"
y="35.812515"
id="text4648-2"><tspan
id="tspan4646-5"
x="101.90772"
y="35.812515"
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px">Tracy</tspan><tspan
x="101.90772"
y="39.340294"
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px"
id="tspan4650-1">server</tspan></text>
</g>
<path
id="path5108"
d="M 79.375,37.708333 H 89.958333"
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker6660)" />
<path
id="path5110"
d="M 89.958333,35.0625 H 79.375"
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker5308)" />
<g
transform="translate(-2.64619,-1.3704153)"
id="g6152">
<ellipse
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="path6114"
cx="128.98439"
cy="33.692333"
rx="4.6302085"
ry="1.2756696" />
<path
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
d="m 124.36251,41.677042 c -0.004,0.01582 -0.007,0.03168 -0.008,0.04754 5.3e-4,0.704384 2.07327,1.275328 4.62995,1.275373 2.55689,3.5e-5 4.62988,-0.570931 4.63048,-1.275373 -10e-4,-0.01585 -0.003,-0.03171 -0.006,-0.04754"
id="path6114-1" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 124.35417,33.739583 v 8.021022"
id="path6138" />
<path
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 133.61458,33.739582 v 8.051744"
id="path6140" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458302px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
x="126.89217"
y="39.409225"
id="text6144"><tspan
id="tspan6142"
x="126.89217"
y="39.409225"
style="stroke-width:0.26458332px">DB</tspan></text>
</g>
<path
id="path6154"
d="m 113.77082,36.385418 h 6.61459"
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker6158)" />
<g
transform="translate(2.6458333,1.2715659e-6)"
id="g6241">
<rect
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
id="rect6232"
width="17.197916"
height="5.291667"
x="137.58333"
y="33.739582" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458302px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
x="140.8851"
y="37.164005"
id="text6236"><tspan
id="tspan6234"
x="140.8851"
y="37.164005"
style="stroke-width:0.26458332px">Display</tspan></text>
</g>
<path
id="path6154-3"
d="m 132.29166,36.385417 h 6.61459"
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker6158-2)" />
</g>
</svg>

After

Width:  |  Height:  |  Size: 15 KiB

BIN
doc/locks.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.3 KiB

BIN
doc/messages.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 KiB

BIN
doc/plot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 213 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 250 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 234 KiB

View File

@@ -1,706 +0,0 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_DIFF_H
#define DTL_DIFF_H
namespace dtl {
/**
* diff class template
* sequence must support random_access_iterator.
*/
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
class Diff
{
private :
dtl_typedefs(elem, sequence)
sequence A;
sequence B;
size_t M;
size_t N;
size_t delta;
size_t offset;
long long *fp;
long long editDistance;
Lcs< elem > lcs;
Ses< elem > ses;
editPath path;
editPathCordinates pathCordinates;
bool swapped;
bool huge;
bool trivial;
bool editDistanceOnly;
uniHunkVec uniHunks;
comparator cmp;
long long ox;
long long oy;
public :
Diff () {}
Diff (const sequence& a,
const sequence& b) : A(a), B(b), ses(false) {
init();
}
Diff (const sequence& a,
const sequence& b,
bool deletesFirst) : A(a), B(b), ses(deletesFirst) {
init();
}
Diff (const sequence& a,
const sequence& b,
const comparator& comp) : A(a), B(b), ses(false), cmp(comp) {
init();
}
Diff (const sequence& a,
const sequence& b,
bool deleteFirst,
const comparator& comp) : A(a), B(b), ses(deleteFirst), cmp(comp) {
init();
}
~Diff() {}
long long getEditDistance () const {
return editDistance;
}
Lcs< elem > getLcs () const {
return lcs;
}
elemVec getLcsVec () const {
return lcs.getSequence();
}
Ses< elem > getSes () const {
return ses;
}
uniHunkVec getUniHunks () const {
return uniHunks;
}
/* These should be deprecated */
bool isHuge () const {
return huge;
}
void onHuge () {
this->huge = true;
}
void offHuge () {
this->huge = false;
}
bool isUnserious () const {
return trivial;
}
void onUnserious () {
this->trivial = true;
}
void offUnserious () {
this->trivial = false;
}
void onOnlyEditDistance () {
this->editDistanceOnly = true;
}
/* These are the replacements for the above */
bool hugeEnabled () const {
return huge;
}
void enableHuge () {
this->huge = true;
}
void disableHuge () {
this->huge = false;
}
bool trivialEnabled () const {
return trivial;
}
void enableTrivial () {
this->trivial = true;
}
void disableTrivial () {
this->trivial = false;
}
void editDistanceOnlyEnabled () {
this->editDistanceOnly = true;
}
/**
* patching with Unified Format Hunks
*/
sequence uniPatch (const sequence& seq) {
elemList seqLst(seq.begin(), seq.end());
sesElemVec shunk;
sesElemVec_iter vsesIt;
elemList_iter lstIt = seqLst.begin();
long long inc_dec_total = 0;
long long gap = 1;
for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) {
joinSesVec(shunk, it->common[0]);
joinSesVec(shunk, it->change);
joinSesVec(shunk, it->common[1]);
it->a += inc_dec_total;
inc_dec_total += it->inc_dec_count;
for (long long i=0;i<it->a - gap;++i) {
++lstIt;
}
gap = it->a + it->b + it->inc_dec_count;
vsesIt = shunk.begin();
while (vsesIt!=shunk.end()) {
switch (vsesIt->second.type) {
case SES_ADD :
seqLst.insert(lstIt, vsesIt->first);
break;
case SES_DELETE :
if (lstIt != seqLst.end()) {
lstIt = seqLst.erase(lstIt);
}
break;
case SES_COMMON :
if (lstIt != seqLst.end()) {
++lstIt;
}
break;
default :
// no fall-through
break;
}
++vsesIt;
}
shunk.clear();
}
sequence patchedSeq(seqLst.begin(), seqLst.end());
return patchedSeq;
}
/**
* patching with Shortest Edit Script (SES)
*/
sequence patch (const sequence& seq) const {
sesElemVec sesSeq = ses.getSequence();
elemList seqLst(seq.begin(), seq.end());
elemList_iter lstIt = seqLst.begin();
for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) {
switch (sesIt->second.type) {
case SES_ADD :
seqLst.insert(lstIt, sesIt->first);
break;
case SES_DELETE :
lstIt = seqLst.erase(lstIt);
break;
case SES_COMMON :
++lstIt;
break;
default :
// no through
break;
}
}
sequence patchedSeq(seqLst.begin(), seqLst.end());
return patchedSeq;
}
/**
* compose Longest Common Subsequence and Shortest Edit Script.
* The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm"
* described by Sun Wu, Udi Manber and Gene Myers
*/
void compose() {
if (isHuge()) {
pathCordinates.reserve(MAX_CORDINATES_SIZE);
}
ox = 0;
oy = 0;
long long p = -1;
fp = new long long[M + N + 3];
fill(&fp[0], &fp[M + N + 3], -1);
path = editPath(M + N + 3);
fill(path.begin(), path.end(), -1);
ONP:
do {
++p;
for (long long k=-p;k<=static_cast<long long>(delta)-1;++k) {
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
}
for (long long k=static_cast<long long>(delta)+p;k>=static_cast<long long>(delta)+1;--k) {
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
}
fp[delta+offset] = snake(static_cast<long long>(delta), fp[delta-1+offset]+1, fp[delta+1+offset]);
} while (fp[delta+offset] != static_cast<long long>(N) && pathCordinates.size() < MAX_CORDINATES_SIZE);
editDistance += static_cast<long long>(delta) + 2 * p;
long long r = path[delta+offset];
P cordinate;
editPathCordinates epc(0);
// recording edit distance only
if (editDistanceOnly) {
delete[] this->fp;
return;
}
while(r != -1) {
cordinate.x = pathCordinates[(size_t)r].x;
cordinate.y = pathCordinates[(size_t)r].y;
epc.push_back(cordinate);
r = pathCordinates[(size_t)r].k;
}
// record Longest Common Subsequence & Shortest Edit Script
if (!recordSequence(epc)) {
pathCordinates.resize(0);
epc.resize(0);
p = -1;
goto ONP;
}
delete[] this->fp;
}
/**
* print difference between A and B as an SES
*/
template < typename stream >
void printSES (stream& out) const {
sesElemVec ses_v = ses.getSequence();
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
}
void printSES (ostream& out = cout) const {
printSES< ostream >(out);
}
/**
* print differences given an SES
*/
template < typename stream >
static void printSES (const Ses< elem >& s, stream& out) {
sesElemVec ses_v = s.getSequence();
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
}
static void printSES (const Ses< elem >& s, ostream& out = cout) {
printSES< ostream >(s, out);
}
/**
* print difference between A and B as an SES with custom printer
*/
template < typename stream, template < typename SEET, typename STRT > class PT >
void printSES (stream& out) const {
sesElemVec ses_v = ses.getSequence ();
for_each (ses_v.begin (), ses_v.end(), PT < sesElem, stream > (out));
}
/**
* store difference between A and B as an SES with custom storage
*/
template < typename storedData, template < typename SEET, typename STRT > class ST >
void storeSES(storedData& sd) const {
sesElemVec ses_v = ses.getSequence();
for_each(ses_v.begin(), ses_v.end(), ST < sesElem, storedData >(sd));
}
/**
* print difference between A and B in the Unified Format
*/
template < typename stream >
void printUnifiedFormat (stream& out) const {
for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out));
}
void printUnifiedFormat (ostream& out = cout) const {
printUnifiedFormat< ostream >(out);
}
/**
* print unified format difference with given unified format hunks
*/
template < typename stream >
static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) {
for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out));
}
static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) {
printUnifiedFormat< ostream >(hunks, out);
}
/**
* compose Unified Format Hunks from Shortest Edit Script
*/
void composeUnifiedHunks () {
sesElemVec common[2];
sesElemVec change;
sesElemVec ses_v = ses.getSequence();
long long l_cnt = 1;
long long length = distance(ses_v.begin(), ses_v.end());
long long middle = 0;
bool isMiddle, isAfter;
elemInfo einfo;
long long a, b, c, d; // @@ -a,b +c,d @@
long long inc_dec_count = 0;
uniHunk< sesElem > hunk;
sesElemVec adds;
sesElemVec deletes;
isMiddle = isAfter = false;
a = b = c = d = 0;
for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) {
einfo = it->second;
switch (einfo.type) {
case SES_ADD :
middle = 0;
++inc_dec_count;
adds.push_back(*it);
if (!isMiddle) isMiddle = true;
if (isMiddle) ++d;
if (l_cnt >= length) {
joinSesVec(change, deletes);
joinSesVec(change, adds);
isAfter = true;
}
break;
case SES_DELETE :
middle = 0;
--inc_dec_count;
deletes.push_back(*it);
if (!isMiddle) isMiddle = true;
if (isMiddle) ++b;
if (l_cnt >= length) {
joinSesVec(change, deletes);
joinSesVec(change, adds);
isAfter = true;
}
break;
case SES_COMMON :
++b;++d;
if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) {
if (static_cast<long long>(common[0].size()) < DTL_CONTEXT_SIZE) {
if (a == 0 && c == 0) {
if (!wasSwapped()) {
a = einfo.beforeIdx;
c = einfo.afterIdx;
} else {
a = einfo.afterIdx;
c = einfo.beforeIdx;
}
}
common[0].push_back(*it);
} else {
rotate(common[0].begin(), common[0].begin() + 1, common[0].end());
common[0].pop_back();
common[0].push_back(*it);
++a;++c;
--b;--d;
}
}
if (isMiddle && !isAfter) {
++middle;
joinSesVec(change, deletes);
joinSesVec(change, adds);
change.push_back(*it);
if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) {
isAfter = true;
}
adds.clear();
deletes.clear();
}
break;
default :
// no through
break;
}
// compose unified format hunk
if (isAfter && !change.empty()) {
sesElemVec_iter cit = it;
long long cnt = 0;
for (long long i=0;i<DTL_SEPARATE_SIZE && (cit != ses_v.end());++i, ++cit) {
if (cit->second.type == SES_COMMON) {
++cnt;
}
}
if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) {
middle = 0;
isAfter = false;
continue;
}
if (static_cast<long long>(common[0].size()) >= DTL_SEPARATE_SIZE) {
long long c0size = static_cast<long long>(common[0].size());
rotate(common[0].begin(),
common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE,
common[0].end());
for (long long i=0;i<c0size - DTL_SEPARATE_SIZE;++i) {
common[0].pop_back();
}
a += c0size - DTL_SEPARATE_SIZE;
c += c0size - DTL_SEPARATE_SIZE;
}
if (a == 0) ++a;
if (c == 0) ++c;
if (wasSwapped()) swap(a, c);
hunk.a = a;
hunk.b = b;
hunk.c = c;
hunk.d = d;
hunk.common[0] = common[0];
hunk.change = change;
hunk.common[1] = common[1];
hunk.inc_dec_count = inc_dec_count;
uniHunks.push_back(hunk);
isMiddle = false;
isAfter = false;
common[0].clear();
common[1].clear();
adds.clear();
deletes.clear();
change.clear();
a = b = c = d = middle = inc_dec_count = 0;
}
}
}
/**
* compose ses from stream
*/
template <typename stream>
static Ses< elem > composeSesFromStream (stream& st)
{
elem line;
Ses< elem > ret;
long long x_idx, y_idx;
x_idx = y_idx = 1;
while (getline(st, line)) {
elem mark(line.begin(), line.begin() + 1);
elem e(line.begin() + 1, line.end());
if (mark == SES_MARK_DELETE) {
ret.addSequence(e, x_idx, 0, SES_DELETE);
++x_idx;
} else if (mark == SES_MARK_ADD) {
ret.addSequence(e, y_idx, 0, SES_ADD);
++y_idx;
} else if (mark == SES_MARK_COMMON) {
ret.addSequence(e, x_idx, y_idx, SES_COMMON);
++x_idx;
++y_idx;
}
}
return ret;
}
private :
/**
* initialize
*/
void init () {
M = distance(A.begin(), A.end());
N = distance(B.begin(), B.end());
if (M < N) {
swapped = false;
} else {
swap(A, B);
swap(M, N);
swapped = true;
}
editDistance = 0;
delta = N - M;
offset = M + 1;
huge = false;
trivial = false;
editDistanceOnly = false;
fp = NULL;
}
/**
* search shortest path and record the path
*/
long long snake(const long long& k, const long long& above, const long long& below) {
long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset];
long long y = max(above, below);
long long x = y - k;
while ((size_t)x < M && (size_t)y < N && (swapped ? cmp.impl(B[(size_t)y], A[(size_t)x]) : cmp.impl(A[(size_t)x], B[(size_t)y]))) {
++x;++y;
}
path[(size_t)k+offset] = static_cast<long long>(pathCordinates.size());
if (!editDistanceOnly) {
P p;
p.x = x;p.y = y;p.k = r;
pathCordinates.push_back(p);
}
return y;
}
/**
* record SES and LCS
*/
bool recordSequence (const editPathCordinates& v) {
sequence_const_iter x(A.begin());
sequence_const_iter y(B.begin());
long long x_idx, y_idx; // line number for Unified Format
long long px_idx, py_idx; // cordinates
bool complete = false;
x_idx = y_idx = 1;
px_idx = py_idx = 0;
for (size_t i=v.size()-1;!complete;--i) {
while(px_idx < v[i].x || py_idx < v[i].y) {
if (v[i].y - v[i].x > py_idx - px_idx) {
if (!wasSwapped()) {
ses.addSequence(*y, 0, y_idx + oy, SES_ADD);
} else {
ses.addSequence(*y, y_idx + oy, 0, SES_DELETE);
}
++y;
++y_idx;
++py_idx;
} else if (v[i].y - v[i].x < py_idx - px_idx) {
if (!wasSwapped()) {
ses.addSequence(*x, x_idx + ox, 0, SES_DELETE);
} else {
ses.addSequence(*x, 0, x_idx + ox, SES_ADD);
}
++x;
++x_idx;
++px_idx;
} else {
if (!wasSwapped()) {
lcs.addSequence(*x);
ses.addSequence(*x, x_idx + ox, y_idx + oy, SES_COMMON);
} else {
lcs.addSequence(*y);
ses.addSequence(*y, y_idx + oy, x_idx + ox, SES_COMMON);
}
++x;
++y;
++x_idx;
++y_idx;
++px_idx;
++py_idx;
}
}
if (i == 0) complete = true;
}
if (x_idx > static_cast<long long>(M) && y_idx > static_cast<long long>(N)) {
// all recording succeeded
} else {
// trivial difference
if (trivialEnabled()) {
if (!wasSwapped()) {
recordOddSequence(x_idx, M, x, SES_DELETE);
recordOddSequence(y_idx, N, y, SES_ADD);
} else {
recordOddSequence(x_idx, M, x, SES_ADD);
recordOddSequence(y_idx, N, y, SES_DELETE);
}
return true;
}
// nontrivial difference
sequence A_(A.begin() + (size_t)x_idx - 1, A.end());
sequence B_(B.begin() + (size_t)y_idx - 1, B.end());
A = A_;
B = B_;
M = distance(A.begin(), A.end());
N = distance(B.begin(), B.end());
delta = N - M;
offset = M + 1;
delete[] fp;
fp = new long long[M + N + 3];
fill(&fp[0], &fp[M + N + 3], -1);
fill(path.begin(), path.end(), -1);
ox = x_idx - 1;
oy = y_idx - 1;
return false;
}
return true;
}
/**
* record odd sequence in SES
*/
void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) {
while(idx < length){
ses.addSequence(*it, idx, 0, et);
++it;
++idx;
++editDistance;
}
ses.addSequence(*it, idx, 0, et);
++editDistance;
}
/**
* join SES vectors
*/
void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const {
if (!s2.empty()) {
for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
s1.push_back(*vit);
}
}
}
/**
* check if the sequences have been swapped
*/
bool inline wasSwapped () const {
return swapped;
}
};
}
#endif // DTL_DIFF_H

View File

@@ -1,245 +0,0 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_DIFF3_H
#define DTL_DIFF3_H
namespace dtl {
/**
* diff3 class template
* sequence must support random_access_iterator.
*/
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
class Diff3
{
private:
dtl_typedefs(elem, sequence)
sequence A;
sequence B;
sequence C;
sequence S;
Diff< elem, sequence, comparator > diff_ba;
Diff< elem, sequence, comparator > diff_bc;
bool conflict;
elem csepabegin;
elem csepa;
elem csepaend;
public :
Diff3 () {}
Diff3 (const sequence& a,
const sequence& b,
const sequence& c) : A(a), B(b), C(c),
diff_ba(b, a), diff_bc(b, c),
conflict(false) {}
~Diff3 () {}
bool isConflict () const {
return conflict;
}
sequence getMergedSequence () const {
return S;
}
/**
* merge changes B and C into A
*/
bool merge () {
if (diff_ba.getEditDistance() == 0) { // A == B
if (diff_bc.getEditDistance() == 0) { // A == B == C
S = B;
return true;
}
S = C;
return true;
} else { // A != B
if (diff_bc.getEditDistance() == 0) { // A != B == C
S = A;
return true;
} else { // A != B != C
S = merge_();
if (isConflict()) { // conflict occured
return false;
}
}
}
return true;
}
/**
* compose differences
*/
void compose () {
diff_ba.compose();
diff_bc.compose();
}
private :
/**
* merge implementation
*/
sequence merge_ () {
elemVec seq;
Ses< elem > ses_ba = diff_ba.getSes();
Ses< elem > ses_bc = diff_bc.getSes();
sesElemVec ses_ba_v = ses_ba.getSequence();
sesElemVec ses_bc_v = ses_bc.getSequence();
sesElemVec_iter ba_it = ses_ba_v.begin();
sesElemVec_iter bc_it = ses_bc_v.begin();
sesElemVec_iter ba_end = ses_ba_v.end();
sesElemVec_iter bc_end = ses_bc_v.end();
while (!isEnd(ba_end, ba_it) || !isEnd(bc_end, bc_it)) {
while (true) {
if (!isEnd(ba_end, ba_it) &&
!isEnd(bc_end, bc_it) &&
ba_it->first == bc_it->first &&
ba_it->second.type == SES_COMMON &&
bc_it->second.type == SES_COMMON) {
// do nothing
} else {
break;
}
if (!isEnd(ba_end, ba_it)) seq.push_back(ba_it->first);
else if (!isEnd(bc_end, bc_it)) seq.push_back(bc_it->first);
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
}
if (isEnd(ba_end, ba_it) || isEnd(bc_end, bc_it)) break;
if ( ba_it->second.type == SES_COMMON
&& bc_it->second.type == SES_DELETE) {
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else if (ba_it->second.type == SES_COMMON &&
bc_it->second.type == SES_ADD) {
seq.push_back(bc_it->first);
forwardUntilEnd(bc_end, bc_it);
} else if (ba_it->second.type == SES_DELETE &&
bc_it->second.type == SES_COMMON) {
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else if (ba_it->second.type == SES_DELETE &&
bc_it->second.type == SES_DELETE) {
if (ba_it->first == bc_it->first) {
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else {
// conflict
conflict = true;
return B;
}
} else if (ba_it->second.type == SES_DELETE &&
bc_it->second.type == SES_ADD) {
// conflict
conflict = true;
return B;
} else if (ba_it->second.type == SES_ADD &&
bc_it->second.type == SES_COMMON) {
seq.push_back(ba_it->first);
forwardUntilEnd(ba_end, ba_it);
} else if (ba_it->second.type == SES_ADD &&
bc_it->second.type == SES_DELETE) {
// conflict
conflict = true;
return B;
} else if (ba_it->second.type == SES_ADD &&
bc_it->second.type == SES_ADD) {
if (ba_it->first == bc_it->first) {
seq.push_back(ba_it->first);
forwardUntilEnd(ba_end, ba_it);
forwardUntilEnd(bc_end, bc_it);
} else {
// conflict
conflict = true;
return B;
}
}
}
if (isEnd(ba_end, ba_it)) {
addDecentSequence(bc_end, bc_it, seq);
} else if (isEnd(bc_end, bc_it)) {
addDecentSequence(ba_end, ba_it, seq);
}
sequence mergedSeq(seq.begin(), seq.end());
return mergedSeq;
}
/**
* join elem vectors
*/
void inline joinElemVec (elemVec& s1, elemVec& s2) const {
if (!s2.empty()) {
for (elemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
s1.push_back(*vit);
}
}
}
/**
* check if sequence is at end
*/
template <typename T_iter>
bool inline isEnd (const T_iter& end, const T_iter& it) const {
return it == end ? true : false;
}
/**
* increment iterator until iterator is at end
*/
template <typename T_iter>
void inline forwardUntilEnd (const T_iter& end, T_iter& it) const {
if (!isEnd(end, it)) ++it;
}
/**
* add elements whose SES's type is ADD
*/
void inline addDecentSequence (const sesElemVec_iter& end, sesElemVec_iter& it, elemVec& seq) const {
while (!isEnd(end, it)) {
if (it->second.type == SES_ADD) seq.push_back(it->first);
++it;
}
}
};
}
#endif // DTL_DIFF3_H

View File

@@ -1,55 +0,0 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_LCS_H
#define DTL_LCS_H
namespace dtl {
/**
* Longest Common Subsequence template class
*/
template <typename elem>
class Lcs : public Sequence< elem >
{
public :
Lcs () {}
~Lcs () {}
};
}
#endif // DTL_LCS_H

View File

@@ -1,65 +0,0 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_SEQUENCE_H
#define DTL_SEQUENCE_H
namespace dtl {
/**
* sequence class template
*/
template <typename elem>
class Sequence
{
public :
typedef vector< elem > elemVec;
Sequence () {}
virtual ~Sequence () {}
elemVec getSequence () const {
return sequence;
}
void addSequence (elem e) {
sequence.push_back(e);
}
protected :
elemVec sequence;
};
}
#endif // DTL_SEQUENCE_H

View File

@@ -1,132 +0,0 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_SES_H
#define DTL_SES_H
namespace dtl {
/**
* Shortest Edit Script template class
*/
template <typename elem>
class Ses : public Sequence< elem >
{
private :
typedef pair< elem, elemInfo > sesElem;
typedef vector< sesElem > sesElemVec;
public :
Ses () : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(false) {
nextDeleteIdx = 0;
}
Ses (bool moveDel) : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(moveDel) {
nextDeleteIdx = 0;
}
~Ses () {}
bool isOnlyAdd () const {
return onlyAdd;
}
bool isOnlyDelete () const {
return onlyDelete;
}
bool isOnlyCopy () const {
return onlyCopy;
}
bool isOnlyOneOperation () const {
return isOnlyAdd() || isOnlyDelete() || isOnlyCopy();
}
bool isChange () const {
return !onlyCopy;
}
using Sequence< elem >::addSequence;
void addSequence (elem e, long long beforeIdx, long long afterIdx, const edit_t type) {
elemInfo info;
info.beforeIdx = beforeIdx;
info.afterIdx = afterIdx;
info.type = type;
sesElem pe(e, info);
if (!deletesFirst) {
sequence.push_back(pe);
}
switch (type) {
case SES_DELETE:
onlyCopy = false;
onlyAdd = false;
if (deletesFirst) {
sequence.insert(sequence.begin() + nextDeleteIdx, pe);
nextDeleteIdx++;
}
break;
case SES_COMMON:
onlyAdd = false;
onlyDelete = false;
if (deletesFirst) {
sequence.push_back(pe);
nextDeleteIdx = sequence.size();
}
break;
case SES_ADD:
onlyDelete = false;
onlyCopy = false;
if (deletesFirst) {
sequence.push_back(pe);
}
break;
}
}
sesElemVec getSequence () const {
return sequence;
}
private :
sesElemVec sequence;
bool onlyAdd;
bool onlyDelete;
bool onlyCopy;
bool deletesFirst;
size_t nextDeleteIdx;
};
}
#endif // DTL_SES_H

View File

@@ -1,47 +0,0 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DTL_H
#define DTL_H
#include "variables.hpp"
#include "functors.hpp"
#include "Sequence.hpp"
#include "Lcs.hpp"
#include "Ses.hpp"
#include "Diff.hpp"
#include "Diff3.hpp"
#endif // DTL_H

View File

@@ -1,151 +0,0 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_FUNCTORS_H
#define DTL_FUNCTORS_H
namespace dtl {
/**
* printer class template
*/
template <typename sesElem, typename stream = ostream >
class Printer
{
public :
Printer () : out_(cout) {}
Printer (stream& out) : out_(out) {}
virtual ~Printer () {}
virtual void operator() (const sesElem& se) const = 0;
protected :
stream& out_;
};
/**
* common element printer class template
*/
template <typename sesElem, typename stream = ostream >
class CommonPrinter : public Printer < sesElem, stream >
{
public :
CommonPrinter () : Printer < sesElem, stream > () {}
CommonPrinter (stream& out) : Printer < sesElem, stream > (out) {}
~CommonPrinter () {}
void operator() (const sesElem& se) const {
this->out_ << SES_MARK_COMMON << se.first << endl;
}
};
/**
* ses element printer class template
*/
template <typename sesElem, typename stream = ostream >
class ChangePrinter : public Printer < sesElem, stream >
{
public :
ChangePrinter () : Printer < sesElem, stream > () {}
ChangePrinter (stream& out) : Printer < sesElem, stream > (out) {}
~ChangePrinter () {}
void operator() (const sesElem& se) const {
switch (se.second.type) {
case SES_ADD:
this->out_ << SES_MARK_ADD << se.first << endl;
break;
case SES_DELETE:
this->out_ << SES_MARK_DELETE << se.first << endl;
break;
case SES_COMMON:
this->out_ << SES_MARK_COMMON << se.first << endl;
break;
}
}
};
/**
* unified format element printer class template
*/
template <typename sesElem, typename stream = ostream >
class UniHunkPrinter
{
public :
UniHunkPrinter () : out_(cout) {}
UniHunkPrinter (stream& out) : out_(out) {}
~UniHunkPrinter () {}
void operator() (const uniHunk< sesElem >& hunk) const {
out_ << "@@"
<< " -" << hunk.a << "," << hunk.b
<< " +" << hunk.c << "," << hunk.d
<< " @@" << endl;
for_each(hunk.common[0].begin(), hunk.common[0].end(), CommonPrinter< sesElem, stream >(out_));
for_each(hunk.change.begin(), hunk.change.end(), ChangePrinter< sesElem, stream >(out_));
for_each(hunk.common[1].begin(), hunk.common[1].end(), CommonPrinter< sesElem, stream >(out_));
}
private :
stream& out_;
};
/**
* storage class template
*/
template <typename sesElem, typename storedData >
class Storage
{
public:
Storage(storedData& sd) : storedData_(sd) {}
virtual ~Storage() {}
virtual void operator() (const sesElem& se) const = 0;
protected:
storedData& storedData_;
};
/**
* compare class template
*/
template <typename elem>
class Compare
{
public :
Compare () {}
virtual ~Compare () {}
virtual inline bool impl (const elem& e1, const elem& e2) const {
return e1 == e2;
}
};
}
#endif // DTL_FUNCTORS_H

View File

@@ -1,142 +0,0 @@
/**
dtl -- Diff Template Library
In short, Diff Template Library is distributed under so called "BSD license",
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the authors nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* If you use this library, you must include dtl.hpp only. */
#ifndef DTL_VARIABLES_H
#define DTL_VARIABLES_H
#include <vector>
#include <list>
#include <string>
#include <algorithm>
#include <iostream>
namespace dtl {
using std::vector;
using std::string;
using std::pair;
using std::ostream;
using std::list;
using std::for_each;
using std::distance;
using std::fill;
using std::cout;
using std::endl;
using std::rotate;
using std::swap;
using std::max;
/**
* version string
*/
const string version = "1.20";
/**
* type of edit for SES
*/
typedef int edit_t;
const edit_t SES_DELETE = -1;
const edit_t SES_COMMON = 0;
const edit_t SES_ADD = 1;
/**
* mark of SES
*/
#define SES_MARK_DELETE "-"
#define SES_MARK_COMMON " "
#define SES_MARK_ADD "+"
/**
* info for Unified Format
*/
typedef struct eleminfo {
long long beforeIdx; // index of prev sequence
long long afterIdx; // index of after sequence
edit_t type; // type of edit(Add, Delete, Common)
bool operator==(const eleminfo& other) const{
return (this->beforeIdx == other.beforeIdx && this->afterIdx == other.afterIdx && this->type == other.type);
}
} elemInfo;
const long long DTL_SEPARATE_SIZE = 3;
const long long DTL_CONTEXT_SIZE = 3;
/**
* cordinate for registering route
*/
typedef struct Point {
long long x; // x cordinate
long long y; // y cordinate
long long k; // vertex
} P;
/**
* limit of cordinate size
*/
const unsigned long long MAX_CORDINATES_SIZE = 2000000;
typedef vector< long long > editPath;
typedef vector< P > editPathCordinates;
/**
* Structure of Unified Format Hunk
*/
template <typename sesElem>
struct uniHunk {
long long a, b, c, d; // @@ -a,b +c,d @@
vector< sesElem > common[2]; // anteroposterior commons on changes
vector< sesElem > change; // changes
long long inc_dec_count; // count of increace and decrease
};
#define dtl_typedefs(elem, sequence) \
typedef pair< elem, elemInfo > sesElem; \
typedef vector< sesElem > sesElemVec; \
typedef vector< uniHunk< sesElem > > uniHunkVec; \
typedef list< elem > elemList; \
typedef vector< elem > elemVec; \
typedef typename uniHunkVec::iterator uniHunkVec_iter; \
typedef typename sesElemVec::iterator sesElemVec_iter; \
typedef typename elemList::iterator elemList_iter; \
typedef typename sequence::iterator sequence_iter; \
typedef typename sequence::const_iterator sequence_const_iter; \
typedef typename elemVec::iterator elemVec_iter;
}
#endif // DTL_VARIABLES_H

View File

@@ -1,17 +0,0 @@
cmake_minimum_required(VERSION 3.10)
project(OpenCLVectorAdd)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
find_package(OpenCL REQUIRED)
add_executable(OpenCLVectorAdd OpenCLVectorAdd.cpp)
add_library(TracyClient STATIC ../../public/TracyClient.cpp
../../public/tracy/TracyOpenCL.hpp)
target_include_directories(TracyClient PUBLIC ../../public/tracy)
target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE=1)
target_link_libraries(OpenCLVectorAdd PUBLIC OpenCL::OpenCL TracyClient ${CMAKE_DL_LIBS} Threads::Threads)

View File

@@ -1,220 +0,0 @@
#include <algorithm>
#include <iostream>
#include <cassert>
#include <string>
#include <vector>
#include <numeric>
#include <CL/cl.h>
#include <Tracy.hpp>
#include <TracyOpenCL.hpp>
#define CL_ASSERT(err) \
if((err) != CL_SUCCESS) \
{ \
std::cerr << "OpenCL Call Returned " << err << std::endl; \
assert(false); \
}
const char kernelSource[] =
" void __kernel vectorAdd(global float* C, global float* A, global float* B, int N) "
" { "
" int i = get_global_id(0); "
" if (i < N) { "
" C[i] = A[i] + B[i]; "
" } "
" } ";
int main()
{
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue commandQueue;
cl_kernel vectorAddKernel;
cl_program program;
cl_int err;
cl_mem bufferA, bufferB, bufferC;
TracyCLCtx tracyCLCtx;
{
ZoneScopedN("OpenCL Init");
cl_uint numPlatforms = 0;
CL_ASSERT(clGetPlatformIDs(0, nullptr, &numPlatforms));
if (numPlatforms == 0)
{
std::cerr << "Cannot find OpenCL platform to run this application" << std::endl;
return 1;
}
CL_ASSERT(clGetPlatformIDs(1, &platform, nullptr));
size_t platformNameBufferSize = 0;
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, nullptr, &platformNameBufferSize));
std::string platformName(platformNameBufferSize, '\0');
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformNameBufferSize, &platformName[0], nullptr));
std::cout << "OpenCL Platform: " << platformName << std::endl;
CL_ASSERT(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr));
size_t deviceNameBufferSize = 0;
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, &deviceNameBufferSize));
std::string deviceName(deviceNameBufferSize, '\0');
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, deviceNameBufferSize, &deviceName[0], nullptr));
std::cout << "OpenCL Device: " << deviceName << std::endl;
err = CL_SUCCESS;
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
CL_ASSERT(err);
size_t kernelSourceLength = sizeof(kernelSource);
const char* kernelSourceArray = { kernelSource };
program = clCreateProgramWithSource(context, 1, &kernelSourceArray, &kernelSourceLength, &err);
CL_ASSERT(err);
if (clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr) != CL_SUCCESS)
{
size_t programBuildLogBufferSize = 0;
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &programBuildLogBufferSize));
std::string programBuildLog(programBuildLogBufferSize, '\0');
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, programBuildLogBufferSize, &programBuildLog[0], nullptr));
std::clog << programBuildLog << std::endl;
return 1;
}
vectorAddKernel = clCreateKernel(program, "vectorAdd", &err);
CL_ASSERT(err);
commandQueue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
CL_ASSERT(err);
}
tracyCLCtx = TracyCLContext(context, device);
size_t N = 10 * 1024 * 1024 / sizeof(float); // 10MB of floats
std::vector<float> hostA, hostB, hostC;
{
ZoneScopedN("Host Data Init");
hostA.resize(N);
hostB.resize(N);
hostC.resize(N);
std::iota(std::begin(hostA), std::end(hostA), 0.0f);
std::iota(std::begin(hostB), std::end(hostB), 0.0f);
}
{
ZoneScopedN("Host to Device Memory Copy");
bufferA = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
CL_ASSERT(err);
bufferB = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
CL_ASSERT(err);
bufferC = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
CL_ASSERT(err);
cl_event writeBufferAEvent, writeBufferBEvent;
{
ZoneScopedN("Write Buffer A");
TracyCLZoneS(tracyCLCtx, "Write BufferA", 5);
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferA, CL_FALSE, 0, N * sizeof(float), hostA.data(), 0, nullptr, &writeBufferAEvent));
TracyCLZoneSetEvent(writeBufferAEvent);
}
{
ZoneScopedN("Write Buffer B");
TracyCLZone(tracyCLCtx, "Write BufferB");
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferB, CL_FALSE, 0, N * sizeof(float), hostB.data(), 0, nullptr, &writeBufferBEvent));
TracyCLZoneSetEvent(writeBufferBEvent);
}
}
cl_int clN = static_cast<cl_int>(N);
const int numFrames = 10;
const int launchsPerFrame = 10;
constexpr int numLaunchs = numFrames * launchsPerFrame;
std::vector<cl_event> kernelLaunchEvts;
kernelLaunchEvts.reserve(numLaunchs);
for (int i = 0; i < numFrames; ++i)
{
FrameMark;
for (int j = 0; j < launchsPerFrame; ++j) {
ZoneScopedN("VectorAdd Kernel Launch");
TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4);
CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC));
CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA));
CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB));
CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(cl_int), &clN));
cl_event vectorAddKernelEvent;
CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent));
TracyCLZoneSetEvent(vectorAddKernelEvent);
CL_ASSERT(clRetainEvent(vectorAddKernelEvent));
kernelLaunchEvts.push_back(vectorAddKernelEvent);
std::cout << "VectorAdd Kernel Enqueued" << std::endl;
}
{
// Wait frame events to be finished
ZoneScopedN("clFinish");
CL_ASSERT(clFinish(commandQueue));
}
// You should collect on each 'frame' ends, so that streaming can be achieved.
TracyCLCollect(tracyCLCtx);
}
{
ZoneScopedN("Device to Host Memory Copy");
TracyCLZone(tracyCLCtx, "Read Buffer C");
cl_event readbufferCEvent;
CL_ASSERT(clEnqueueReadBuffer(commandQueue, bufferC, CL_TRUE, 0, N * sizeof(float), hostC.data(), 0, nullptr, &readbufferCEvent));
TracyCLZoneSetEvent(readbufferCEvent);
}
CL_ASSERT(clFinish(commandQueue));
std::vector<float> durations(kernelLaunchEvts.size());
for (int i=0; i<kernelLaunchEvts.size(); i++) {
cl_event evt = kernelLaunchEvts[i];
cl_ulong start;
cl_ulong end;
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr));
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr));
CL_ASSERT(clReleaseEvent(evt));
durations[i] = (end - start) * 0.001f;
std::cout << "VectorAdd Kernel " << i << " tooks " << static_cast<int>(durations[i]) << "us" << std::endl;
};
float avg = std::accumulate(durations.cbegin(), durations.cend(), 0.0f) / durations.size();
float stddev2 = std::accumulate(durations.cbegin(), durations.cend(), 0.0f, [avg](const float& acc, const float& v) {
auto d = v - avg;
return acc + d*d;
}) / (durations.size() - 1.0f);
std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl;
// User should ensure all events are finished, in this case, collect after the clFinish will do the trick.
TracyCLCollect(tracyCLCtx);
{
ZoneScopedN("Checking results");
for (int i = 0; i < N; ++i)
{
assert(hostC[i] == hostA[i] + hostB[i]);
}
}
std::cout << "Results are correct!" << std::endl;
TracyCLDestroy(tracyCLCtx);
return 0;
}

View File

@@ -1 +0,0 @@
Windows/Compiled*Shader.h

View File

@@ -1,4 +0,0 @@
https://github.com/aras-p/ToyPathTracer
Modified to render only 10 frames. Client part requires 12 GB, server part
requires 6.4 GB.

View File

@@ -1,33 +0,0 @@
#if defined(__APPLE__) && !defined(__METAL_VERSION__)
#include <TargetConditionals.h>
#endif
#define kBackbufferWidth 1280
#define kBackbufferHeight 720
#if defined(__EMSCRIPTEN__)
#define CPU_CAN_DO_SIMD 0
#define CPU_CAN_DO_THREADS 0
#else
#define CPU_CAN_DO_SIMD 1
#define CPU_CAN_DO_THREADS 1
#endif
#define DO_SAMPLES_PER_PIXEL 4
#define DO_ANIMATE_SMOOTHING 0.9f
#define DO_LIGHT_SAMPLING 1
#define DO_MITSUBA_COMPARE 0
// Should path tracing be done on the GPU with a compute shader?
#define DO_COMPUTE_GPU 0
#define kCSGroupSizeX 8
#define kCSGroupSizeY 8
#define kCSMaxObjects 64
// Should float3 struct use SSE/NEON?
#define DO_FLOAT3_WITH_SIMD (!(DO_COMPUTE_GPU) && CPU_CAN_DO_SIMD && 1)
// Should HitSpheres function use SSE/NEON?
#define DO_HIT_SPHERES_SIMD (CPU_CAN_DO_SIMD && 1)

View File

@@ -1,192 +0,0 @@
#pragma once
#if defined(_MSC_VER)
#define VM_INLINE __forceinline
#else
#define VM_INLINE __attribute__((unused, always_inline, nodebug)) inline
#endif
#define kSimdWidth 4
#if !defined(__arm__) && !defined(__arm64__) && !defined(__EMSCRIPTEN__)
// ---- SSE implementation
#include <xmmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
#define SHUFFLE4(V, X,Y,Z,W) float4(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(W,Z,Y,X)))
struct float4
{
VM_INLINE float4() {}
VM_INLINE explicit float4(const float *p) { m = _mm_loadu_ps(p); }
VM_INLINE explicit float4(float x, float y, float z, float w) { m = _mm_set_ps(w, z, y, x); }
VM_INLINE explicit float4(float v) { m = _mm_set_ps1(v); }
VM_INLINE explicit float4(__m128 v) { m = v; }
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
VM_INLINE float getW() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 3, 3, 3))); }
__m128 m;
};
typedef float4 bool4;
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = _mm_and_ps(a.m, b.m); return a; }
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = _mm_or_ps(a.m, b.m); return a; }
VM_INLINE float4 operator- (float4 a) { a.m = _mm_xor_ps(a.m, _mm_set1_ps(-0.0f)); return a; }
VM_INLINE float4 min(float4 a, float4 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
VM_INLINE float4 max(float4 a, float4 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
VM_INLINE float hmin(float4 v)
{
v = min(v, SHUFFLE4(v, 2, 3, 0, 0));
v = min(v, SHUFFLE4(v, 1, 0, 0, 0));
return v.getX();
}
// Returns a 4-bit code where bit0..bit3 is X..W
VM_INLINE unsigned mask(float4 v) { return _mm_movemask_ps(v.m); }
// Once we have a comparison, we can branch based on its results:
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
// "select", i.e. hibit(cond) ? b : a
// on SSE4.1 and up this can be done easily via "blend" instruction;
// on older SSEs has to do a bunch of hoops, see
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
{
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
a.m = _mm_blendv_ps(a.m, b.m, cond.m);
#else
__m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31));
a.m = _mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m));
#endif
return a;
}
VM_INLINE __m128i select(__m128i a, __m128i b, bool4 cond)
{
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
return _mm_blendv_epi8(a, b, _mm_castps_si128(cond.m));
#else
__m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31);
return _mm_or_si128(_mm_and_si128(d, b), _mm_andnot_si128(d, a));
#endif
}
VM_INLINE float4 sqrtf(float4 v) { return float4(_mm_sqrt_ps(v.m)); }
#elif !defined(__EMSCRIPTEN__)
// ---- NEON implementation
#define USE_NEON 1
#include <arm_neon.h>
struct float4
{
VM_INLINE float4() {}
VM_INLINE explicit float4(const float *p) { m = vld1q_f32(p); }
VM_INLINE explicit float4(float x, float y, float z, float w) { float v[4] = {x, y, z, w}; m = vld1q_f32(v); }
VM_INLINE explicit float4(float v) { m = vdupq_n_f32(v); }
VM_INLINE explicit float4(float32x4_t v) { m = v; }
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
VM_INLINE float getW() const { return vgetq_lane_f32(m, 3); }
float32x4_t m;
};
typedef float4 bool4;
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = vaddq_f32(a.m, b.m); return a; }
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = vsubq_f32(a.m, b.m); return a; }
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = vmulq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = vceqq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = vcltq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = vcleq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = vandq_u32(a.m, b.m); return a; }
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = vorrq_u32(a.m, b.m); return a; }
VM_INLINE float4 operator- (float4 a) { a.m = vnegq_f32(a.m); return a; }
VM_INLINE float4 min(float4 a, float4 b) { a.m = vminq_f32(a.m, b.m); return a; }
VM_INLINE float4 max(float4 a, float4 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
VM_INLINE float hmin(float4 v)
{
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
return vget_lane_f32(minOfMinOfHalfs, 0);
}
// Returns a 4-bit code where bit0..bit3 is X..W
VM_INLINE unsigned mask(float4 v)
{
static const uint32x4_t movemask = { 1, 2, 4, 8 };
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
uint32x4_t t1 = vtstq_u32(t0, highbit);
uint32x4_t t2 = vandq_u32(t1, movemask);
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
}
// Once we have a comparison, we can branch based on its results:
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
// "select", i.e. hibit(cond) ? b : a
// on SSE4.1 and up this can be done easily via "blend" instruction;
// on older SSEs has to do a bunch of hoops, see
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
{
a.m = vbslq_f32(cond.m, b.m, a.m);
return a;
}
VM_INLINE int32x4_t select(int32x4_t a, int32x4_t b, bool4 cond)
{
return vbslq_f32(cond.m, b, a);
}
VM_INLINE float4 sqrtf(float4 v)
{
float32x4_t V = v.m;
float32x4_t S0 = vrsqrteq_f32(V);
float32x4_t P0 = vmulq_f32( V, S0 );
float32x4_t R0 = vrsqrtsq_f32( P0, S0 );
float32x4_t S1 = vmulq_f32( S0, R0 );
float32x4_t P1 = vmulq_f32( V, S1 );
float32x4_t R1 = vrsqrtsq_f32( P1, S1 );
float32x4_t S2 = vmulq_f32( S1, R1 );
float32x4_t P2 = vmulq_f32( V, S2 );
float32x4_t R2 = vrsqrtsq_f32( P2, S2 );
float32x4_t S3 = vmulq_f32( S2, R2 );
return float4(vmulq_f32(V, S3));
}
VM_INLINE float4 splatX(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 0)); }
VM_INLINE float4 splatY(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 1)); }
VM_INLINE float4 splatZ(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 0)); }
VM_INLINE float4 splatW(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 1)); }
#endif

View File

@@ -1,203 +0,0 @@
#include "Maths.h"
#include <stdlib.h>
#include <stdint.h>
static uint32_t XorShift32(uint32_t& state)
{
uint32_t x = state;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 15;
state = x;
return x;
}
float RandomFloat01(uint32_t& state)
{
return (XorShift32(state) & 0xFFFFFF) / 16777216.0f;
}
float3 RandomInUnitDisk(uint32_t& state)
{
float3 p;
do
{
p = 2.0 * float3(RandomFloat01(state),RandomFloat01(state),0) - float3(1,1,0);
} while (dot(p,p) >= 1.0);
return p;
}
float3 RandomInUnitSphere(uint32_t& state)
{
float3 p;
do {
p = 2.0*float3(RandomFloat01(state),RandomFloat01(state),RandomFloat01(state)) - float3(1,1,1);
} while (sqLength(p) >= 1.0);
return p;
}
float3 RandomUnitVector(uint32_t& state)
{
float z = RandomFloat01(state) * 2.0f - 1.0f;
float a = RandomFloat01(state) * 2.0f * kPI;
float r = sqrtf(1.0f - z * z);
float x = r * cosf(a);
float y = r * sinf(a);
return float3(x, y, z);
}
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit)
{
#if DO_HIT_SPHERES_SIMD
float4 hitT = float4(tMax);
#if USE_NEON
int32x4_t id = vdupq_n_s32(-1);
#else
__m128i id = _mm_set1_epi32(-1);
#endif
#if DO_FLOAT3_WITH_SIMD && !USE_NEON
float4 rOrigX = SHUFFLE4(r.orig, 0, 0, 0, 0);
float4 rOrigY = SHUFFLE4(r.orig, 1, 1, 1, 1);
float4 rOrigZ = SHUFFLE4(r.orig, 2, 2, 2, 2);
float4 rDirX = SHUFFLE4(r.dir, 0, 0, 0, 0);
float4 rDirY = SHUFFLE4(r.dir, 1, 1, 1, 1);
float4 rDirZ = SHUFFLE4(r.dir, 2, 2, 2, 2);
#elif DO_FLOAT3_WITH_SIMD
float4 rOrigX = splatX(r.orig.m);
float4 rOrigY = splatY(r.orig.m);
float4 rOrigZ = splatZ(r.orig.m);
float4 rDirX = splatX(r.dir.m);
float4 rDirY = splatY(r.dir.m);
float4 rDirZ = splatZ(r.dir.m);
#else
float4 rOrigX = float4(r.orig.x);
float4 rOrigY = float4(r.orig.y);
float4 rOrigZ = float4(r.orig.z);
float4 rDirX = float4(r.dir.x);
float4 rDirY = float4(r.dir.y);
float4 rDirZ = float4(r.dir.z);
#endif
float4 tMin4 = float4(tMin);
#if USE_NEON
int32x4_t curId = vcombine_u32(vcreate_u32(0ULL | (1ULL<<32)), vcreate_u32(2ULL | (3ULL<<32)));
#else
__m128i curId = _mm_set_epi32(3, 2, 1, 0);
#endif
// process 4 spheres at once
for (int i = 0; i < spheres.simdCount; i += kSimdWidth)
{
// load data for 4 spheres
float4 sCenterX = float4(spheres.centerX + i);
float4 sCenterY = float4(spheres.centerY + i);
float4 sCenterZ = float4(spheres.centerZ + i);
float4 sSqRadius = float4(spheres.sqRadius + i);
// note: we flip this vector and calculate -b (nb) since that happens to be slightly preferable computationally
float4 coX = sCenterX - rOrigX;
float4 coY = sCenterY - rOrigY;
float4 coZ = sCenterZ - rOrigZ;
float4 nb = coX * rDirX + coY * rDirY + coZ * rDirZ;
float4 c = coX * coX + coY * coY + coZ * coZ - sSqRadius;
float4 discr = nb * nb - c;
bool4 discrPos = discr > float4(0.0f);
// if ray hits any of the 4 spheres
if (any(discrPos))
{
float4 discrSq = sqrtf(discr);
// ray could hit spheres at t0 & t1
float4 t0 = nb - discrSq;
float4 t1 = nb + discrSq;
float4 t = select(t1, t0, t0 > tMin4); // if t0 is above min, take it (since it's the earlier hit); else try t1.
bool4 msk = discrPos & (t > tMin4) & (t < hitT);
// if hit, take it
id = select(id, curId, msk);
hitT = select(hitT, t, msk);
}
#if USE_NEON
curId = vaddq_s32(curId, vdupq_n_s32(kSimdWidth));
#else
curId = _mm_add_epi32(curId, _mm_set1_epi32(kSimdWidth));
#endif
}
// now we have up to 4 hits, find and return closest one
float minT = hmin(hitT);
if (minT < tMax) // any actual hits?
{
int minMask = mask(hitT == float4(minT));
if (minMask != 0)
{
int id_scalar[4];
float hitT_scalar[4];
#if USE_NEON
vst1q_s32(id_scalar, id);
vst1q_f32(hitT_scalar, hitT.m);
#else
_mm_storeu_si128((__m128i *)id_scalar, id);
_mm_storeu_ps(hitT_scalar, hitT.m);
#endif
// In general, you would do this with a bit scan (first set/trailing zero count).
// But who cares, it's only 16 options.
static const int laneId[16] =
{
0, 0, 1, 0, // 00xx
2, 0, 1, 0, // 01xx
3, 0, 1, 0, // 10xx
2, 0, 1, 0, // 11xx
};
int lane = laneId[minMask];
int hitId = id_scalar[lane];
float finalHitT = hitT_scalar[lane];
outHit.pos = r.pointAt(finalHitT);
outHit.normal = (outHit.pos - float3(spheres.centerX[hitId], spheres.centerY[hitId], spheres.centerZ[hitId])) * spheres.invRadius[hitId];
outHit.t = finalHitT;
return hitId;
}
}
return -1;
#else // #if DO_HIT_SPHERES_SIMD
float hitT = tMax;
int id = -1;
for (int i = 0; i < spheres.count; ++i)
{
float coX = spheres.centerX[i] - r.orig.getX();
float coY = spheres.centerY[i] - r.orig.getY();
float coZ = spheres.centerZ[i] - r.orig.getZ();
float nb = coX * r.dir.getX() + coY * r.dir.getY() + coZ * r.dir.getZ();
float c = coX * coX + coY * coY + coZ * coZ - spheres.sqRadius[i];
float discr = nb * nb - c;
if (discr > 0)
{
float discrSq = sqrtf(discr);
// Try earlier t
float t = nb - discrSq;
if (t <= tMin) // before min, try later t!
t = nb + discrSq;
if (t > tMin && t < hitT)
{
id = i;
hitT = t;
}
}
}
if (id != -1)
{
outHit.pos = r.pointAt(hitT);
outHit.normal = (outHit.pos - float3(spheres.centerX[id], spheres.centerY[id], spheres.centerZ[id])) * spheres.invRadius[id];
outHit.t = hitT;
return id;
}
else
return -1;
#endif // #else of #if DO_HIT_SPHERES_SIMD
}

View File

@@ -1,436 +0,0 @@
#pragma once
#include <math.h>
#include <assert.h>
#include <stdint.h>
#include "Config.h"
#include "MathSimd.h"
#define kPI 3.1415926f
// SSE/SIMD vector largely based on http://www.codersnotes.com/notes/maths-lib-2016/
#if DO_FLOAT3_WITH_SIMD
#if !defined(__arm__) && !defined(__arm64__)
// ---- SSE implementation
// SHUFFLE3(v, 0,1,2) leaves the vector unchanged (v.xyz).
// SHUFFLE3(v, 0,0,0) splats the X (v.xxx).
#define SHUFFLE3(V, X,Y,Z) float3(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(Z,Z,Y,X)))
struct float3
{
VM_INLINE float3() {}
VM_INLINE explicit float3(const float *p) { m = _mm_set_ps(p[2], p[2], p[1], p[0]); }
VM_INLINE explicit float3(float x, float y, float z) { m = _mm_set_ps(z, z, y, x); }
VM_INLINE explicit float3(float v) { m = _mm_set1_ps(v); }
VM_INLINE explicit float3(__m128 v) { m = v; }
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
VM_INLINE float3 yzx() const { return SHUFFLE3(*this, 1, 2, 0); }
VM_INLINE float3 zxy() const { return SHUFFLE3(*this, 2, 0, 1); }
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
void setX(float x)
{
m = _mm_move_ss(m, _mm_set_ss(x));
}
void setY(float y)
{
__m128 t = _mm_move_ss(m, _mm_set_ss(y));
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 2, 0, 0));
m = _mm_move_ss(t, m);
}
void setZ(float z)
{
__m128 t = _mm_move_ss(m, _mm_set_ss(z));
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 0, 1, 0));
m = _mm_move_ss(t, m);
}
__m128 m;
};
typedef float3 bool3;
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
VM_INLINE float3 operator/ (float3 a, float3 b) { a.m = _mm_div_ps(a.m, b.m); return a; }
VM_INLINE float3 operator* (float3 a, float b) { a.m = _mm_mul_ps(a.m, _mm_set1_ps(b)); return a; }
VM_INLINE float3 operator/ (float3 a, float b) { a.m = _mm_div_ps(a.m, _mm_set1_ps(b)); return a; }
VM_INLINE float3 operator* (float a, float3 b) { b.m = _mm_mul_ps(_mm_set1_ps(a), b.m); return b; }
VM_INLINE float3 operator/ (float a, float3 b) { b.m = _mm_div_ps(_mm_set1_ps(a), b.m); return b; }
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
VM_INLINE float3 min(float3 a, float3 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
VM_INLINE float3 max(float3 a, float3 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
VM_INLINE float3 operator- (float3 a) { return float3(_mm_setzero_ps()) - a; }
VM_INLINE float hmin(float3 v)
{
v = min(v, SHUFFLE3(v, 1, 0, 2));
return min(v, SHUFFLE3(v, 2, 0, 1)).getX();
}
VM_INLINE float hmax(float3 v)
{
v = max(v, SHUFFLE3(v, 1, 0, 2));
return max(v, SHUFFLE3(v, 2, 0, 1)).getX();
}
VM_INLINE float3 cross(float3 a, float3 b)
{
// x <- a.y*b.z - a.z*b.y
// y <- a.z*b.x - a.x*b.z
// z <- a.x*b.y - a.y*b.x
// We can save a shuffle by grouping it in this wacky order:
return (a.zxy()*b - a*b.zxy()).zxy();
}
// Returns a 3-bit code where bit0..bit2 is X..Z
VM_INLINE unsigned mask(float3 v) { return _mm_movemask_ps(v.m) & 7; }
// Once we have a comparison, we can branch based on its results:
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
#else // #if !defined(__arm__) && !defined(__arm64__)
// ---- NEON implementation
#include <arm_neon.h>
struct float3
{
VM_INLINE float3() {}
VM_INLINE explicit float3(const float *p) { float v[4] = {p[0], p[1], p[2], 0}; m = vld1q_f32(v); }
VM_INLINE explicit float3(float x, float y, float z) { float v[4] = {x, y, z, 0}; m = vld1q_f32(v); }
VM_INLINE explicit float3(float v) { m = vdupq_n_f32(v); }
VM_INLINE explicit float3(float32x4_t v) { m = v; }
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
VM_INLINE float3 yzx() const
{
float32x2_t low = vget_low_f32(m);
float32x4_t yzx = vcombine_f32(vext_f32(low, vget_high_f32(m), 1), low);
return float3(yzx);
}
VM_INLINE float3 zxy() const
{
float32x4_t p = m;
p = vuzpq_f32(vreinterpretq_f32_s32(vextq_s32(vreinterpretq_s32_f32(p), vreinterpretq_s32_f32(p), 1)), p).val[1];
return float3(p);
}
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
void setX(float x)
{
m = vsetq_lane_f32(x, m, 0);
}
void setY(float y)
{
m = vsetq_lane_f32(y, m, 1);
}
void setZ(float z)
{
m = vsetq_lane_f32(z, m, 2);
}
float32x4_t m;
};
typedef float3 bool3;
VM_INLINE float32x4_t rcp_2(float32x4_t v)
{
float32x4_t e = vrecpeq_f32(v);
e = vmulq_f32(vrecpsq_f32(e, v), e);
e = vmulq_f32(vrecpsq_f32(e, v), e);
return e;
}
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = vaddq_f32(a.m, b.m); return a; }
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = vsubq_f32(a.m, b.m); return a; }
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = vmulq_f32(a.m, b.m); return a; }
VM_INLINE float3 operator/ (float3 a, float3 b) { float32x4_t recip = rcp_2(b.m); a.m = vmulq_f32(a.m, recip); return a; }
VM_INLINE float3 operator* (float3 a, float b) { a.m = vmulq_f32(a.m, vdupq_n_f32(b)); return a; }
VM_INLINE float3 operator/ (float3 a, float b) { float32x4_t recip = rcp_2(vdupq_n_f32(b)); a.m = vmulq_f32(a.m, recip); return a; }
VM_INLINE float3 operator* (float a, float3 b) { b.m = vmulq_f32(vdupq_n_f32(a), b.m); return b; }
VM_INLINE float3 operator/ (float a, float3 b) { float32x4_t recip = rcp_2(b.m); b.m = vmulq_f32(vdupq_n_f32(a), recip); return b; }
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = vceqq_f32(a.m, b.m); return a; }
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = vcltq_f32(a.m, b.m); return a; }
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = vcleq_f32(a.m, b.m); return a; }
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
VM_INLINE float3 min(float3 a, float3 b) { a.m = vminq_f32(a.m, b.m); return a; }
VM_INLINE float3 max(float3 a, float3 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
VM_INLINE float3 operator- (float3 a) { a.m = vnegq_f32(a.m); return a; }
VM_INLINE float hmin(float3 v)
{
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
return vget_lane_f32(minOfMinOfHalfs, 0);
}
VM_INLINE float hmax(float3 v)
{
float32x2_t maxOfHalfs = vpmax_f32(vget_low_f32(v.m), vget_high_f32(v.m));
float32x2_t maxOfMaxOfHalfs = vpmax_f32(maxOfHalfs, maxOfHalfs);
return vget_lane_f32(maxOfMaxOfHalfs, 0);
}
VM_INLINE float3 cross(float3 a, float3 b)
{
// x <- a.y*b.z - a.z*b.y
// y <- a.z*b.x - a.x*b.z
// z <- a.x*b.y - a.y*b.x
// We can save a shuffle by grouping it in this wacky order:
return (a.zxy()*b - a*b.zxy()).zxy();
}
// Returns a 3-bit code where bit0..bit2 is X..Z
VM_INLINE unsigned mask(float3 v)
{
static const uint32x4_t movemask = { 1, 2, 4, 8 };
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
uint32x4_t t1 = vtstq_u32(t0, highbit);
uint32x4_t t2 = vandq_u32(t1, movemask);
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
}
// Once we have a comparison, we can branch based on its results:
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
#endif // #else of #if !defined(__arm__) && !defined(__arm64__)
#else // #if DO_FLOAT3_WITH_SIMD
// ---- Simple scalar C implementation
struct float3
{
float3() : x(0), y(0), z(0) {}
float3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
float3 operator-() const { return float3(-x, -y, -z); }
float3& operator+=(const float3& o) { x+=o.x; y+=o.y; z+=o.z; return *this; }
float3& operator-=(const float3& o) { x-=o.x; y-=o.y; z-=o.z; return *this; }
float3& operator*=(const float3& o) { x*=o.x; y*=o.y; z*=o.z; return *this; }
float3& operator*=(float o) { x*=o; y*=o; z*=o; return *this; }
VM_INLINE float getX() const { return x; }
VM_INLINE float getY() const { return y; }
VM_INLINE float getZ() const { return z; }
VM_INLINE void setX(float x_) { x = x_; }
VM_INLINE void setY(float y_) { y = y_; }
VM_INLINE void setZ(float z_) { z = z_; }
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
float x, y, z;
};
VM_INLINE float3 operator+(const float3& a, const float3& b) { return float3(a.x+b.x,a.y+b.y,a.z+b.z); }
VM_INLINE float3 operator-(const float3& a, const float3& b) { return float3(a.x-b.x,a.y-b.y,a.z-b.z); }
VM_INLINE float3 operator*(const float3& a, const float3& b) { return float3(a.x*b.x,a.y*b.y,a.z*b.z); }
VM_INLINE float3 operator*(const float3& a, float b) { return float3(a.x*b,a.y*b,a.z*b); }
VM_INLINE float3 operator*(float a, const float3& b) { return float3(a*b.x,a*b.y,a*b.z); }
VM_INLINE float dot(const float3& a, const float3& b) { return a.x*b.x+a.y*b.y+a.z*b.z; }
VM_INLINE float3 cross(const float3& a, const float3& b)
{
return float3(
a.y*b.z - a.z*b.y,
-(a.x*b.z - a.z*b.x),
a.x*b.y - a.y*b.x
);
}
#endif // #else of #if DO_FLOAT3_WITH_SIMD
VM_INLINE float length(float3 v) { return sqrtf(dot(v, v)); }
VM_INLINE float sqLength(float3 v) { return dot(v, v); }
VM_INLINE float3 normalize(float3 v) { return v * (1.0f / length(v)); }
VM_INLINE float3 lerp(float3 a, float3 b, float t) { return a + (b-a)*t; }
inline void AssertUnit(float3 v)
{
assert(fabsf(sqLength(v) - 1.0f) < 0.01f);
}
inline float3 reflect(float3 v, float3 n)
{
return v - 2*dot(v,n)*n;
}
inline bool refract(float3 v, float3 n, float nint, float3& outRefracted)
{
AssertUnit(v);
float dt = dot(v, n);
float discr = 1.0f - nint*nint*(1-dt*dt);
if (discr > 0)
{
outRefracted = nint * (v - n*dt) - n*sqrtf(discr);
return true;
}
return false;
}
inline float schlick(float cosine, float ri)
{
float r0 = (1-ri) / (1+ri);
r0 = r0*r0;
return r0 + (1-r0)*powf(1-cosine, 5);
}
struct Ray
{
Ray() {}
Ray(float3 orig_, float3 dir_) : orig(orig_), dir(dir_) { AssertUnit(dir); }
float3 pointAt(float t) const { return orig + dir * t; }
float3 orig;
float3 dir;
};
struct Hit
{
float3 pos;
float3 normal;
float t;
};
struct Sphere
{
Sphere() : radius(1.0f), invRadius(0.0f) {}
Sphere(float3 center_, float radius_) : center(center_), radius(radius_), invRadius(0.0f) {}
void UpdateDerivedData() { invRadius = 1.0f/radius; }
float3 center;
float radius;
float invRadius;
};
// data for all spheres in a "structure of arrays" layout
struct SpheresSoA
{
SpheresSoA(int c)
{
count = c;
// we'll be processing spheres in kSimdWidth chunks, so make sure to allocate
// enough space
simdCount = (c + (kSimdWidth - 1)) / kSimdWidth * kSimdWidth;
centerX = new float[simdCount];
centerY = new float[simdCount];
centerZ = new float[simdCount];
sqRadius = new float[simdCount];
invRadius = new float[simdCount];
// set all data to "impossible sphere" state
for (int i = count; i < simdCount; ++i)
{
centerX[i] = centerY[i] = centerZ[i] = 10000.0f;
sqRadius[i] = 0.0f;
invRadius[i] = 0.0f;
}
}
~SpheresSoA()
{
delete[] centerX;
delete[] centerY;
delete[] centerZ;
delete[] sqRadius;
delete[] invRadius;
}
float* centerX;
float* centerY;
float* centerZ;
float* sqRadius;
float* invRadius;
int simdCount;
int count;
};
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit);
float RandomFloat01(uint32_t& state);
float3 RandomInUnitDisk(uint32_t& state);
float3 RandomInUnitSphere(uint32_t& state);
float3 RandomUnitVector(uint32_t& state);
struct Camera
{
Camera() {}
// vfov is top to bottom in degrees
Camera(const float3& lookFrom, const float3& lookAt, const float3& vup, float vfov, float aspect, float aperture, float focusDist)
{
lensRadius = aperture / 2;
float theta = vfov*kPI/180;
float halfHeight = tanf(theta/2);
float halfWidth = aspect * halfHeight;
origin = lookFrom;
w = normalize(lookFrom - lookAt);
u = normalize(cross(vup, w));
v = cross(w, u);
lowerLeftCorner = origin - halfWidth*focusDist*u - halfHeight*focusDist*v - focusDist*w;
horizontal = 2*halfWidth*focusDist*u;
vertical = 2*halfHeight*focusDist*v;
}
Ray GetRay(float s, float t, uint32_t& state) const
{
float3 rd = lensRadius * RandomInUnitDisk(state);
float3 offset = u * rd.getX() + v * rd.getY();
return Ray(origin + offset, normalize(lowerLeftCorner + s*horizontal + t*vertical - origin - offset));
}
float3 origin;
float3 lowerLeftCorner;
float3 horizontal;
float3 vertical;
float3 u, v, w;
float lensRadius;
};

View File

@@ -1,392 +0,0 @@
#include "Config.h"
#include "Test.h"
#include "Maths.h"
#include <algorithm>
#if CPU_CAN_DO_THREADS
#include "enkiTS/TaskScheduler_c.h"
#include <thread>
#endif
#include <atomic>
#include "../../../public/tracy/Tracy.hpp"
// 46 spheres (2 emissive) when enabled; 9 spheres (1 emissive) when disabled
#define DO_BIG_SCENE 1
static Sphere s_Spheres[] =
{
{float3(0,-100.5,-1), 100},
{float3(2,0,-1), 0.5f},
{float3(0,0,-1), 0.5f},
{float3(-2,0,-1), 0.5f},
{float3(2,0,1), 0.5f},
{float3(0,0,1), 0.5f},
{float3(-2,0,1), 0.5f},
{float3(0.5f,1,0.5f), 0.5f},
{float3(-1.5f,1.5f,0.f), 0.3f},
#if DO_BIG_SCENE
{float3(4,0,-3), 0.5f}, {float3(3,0,-3), 0.5f}, {float3(2,0,-3), 0.5f}, {float3(1,0,-3), 0.5f}, {float3(0,0,-3), 0.5f}, {float3(-1,0,-3), 0.5f}, {float3(-2,0,-3), 0.5f}, {float3(-3,0,-3), 0.5f}, {float3(-4,0,-3), 0.5f},
{float3(4,0,-4), 0.5f}, {float3(3,0,-4), 0.5f}, {float3(2,0,-4), 0.5f}, {float3(1,0,-4), 0.5f}, {float3(0,0,-4), 0.5f}, {float3(-1,0,-4), 0.5f}, {float3(-2,0,-4), 0.5f}, {float3(-3,0,-4), 0.5f}, {float3(-4,0,-4), 0.5f},
{float3(4,0,-5), 0.5f}, {float3(3,0,-5), 0.5f}, {float3(2,0,-5), 0.5f}, {float3(1,0,-5), 0.5f}, {float3(0,0,-5), 0.5f}, {float3(-1,0,-5), 0.5f}, {float3(-2,0,-5), 0.5f}, {float3(-3,0,-5), 0.5f}, {float3(-4,0,-5), 0.5f},
{float3(4,0,-6), 0.5f}, {float3(3,0,-6), 0.5f}, {float3(2,0,-6), 0.5f}, {float3(1,0,-6), 0.5f}, {float3(0,0,-6), 0.5f}, {float3(-1,0,-6), 0.5f}, {float3(-2,0,-6), 0.5f}, {float3(-3,0,-6), 0.5f}, {float3(-4,0,-6), 0.5f},
{float3(1.5f,1.5f,-2), 0.3f},
#endif // #if DO_BIG_SCENE
};
const int kSphereCount = sizeof(s_Spheres) / sizeof(s_Spheres[0]);
static SpheresSoA s_SpheresSoA(kSphereCount);
struct Material
{
enum Type { Lambert, Metal, Dielectric };
Type type;
float3 albedo;
float3 emissive;
float roughness;
float ri;
};
static Material s_SphereMats[kSphereCount] =
{
{ Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, },
{ Material::Lambert, float3(0.8f, 0.4f, 0.4f), float3(0,0,0), 0, 0, },
{ Material::Lambert, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0, },
{ Material::Metal, float3(0.4f, 0.4f, 0.8f), float3(0,0,0), 0, 0 },
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0 },
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.2f, 0 },
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.6f, 0 },
{ Material::Dielectric, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 1.5f },
{ Material::Lambert, float3(0.8f, 0.6f, 0.2f), float3(30,25,15), 0, 0 },
#if DO_BIG_SCENE
{ Material::Lambert, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
{ Material::Metal, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
{ Material::Metal, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
{ Material::Lambert, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
{ Material::Lambert, float3(0.1f, 0.2f, 0.5f), float3(3,10,20), 0, 0 },
#endif
};
static int s_EmissiveSpheres[kSphereCount];
static int s_EmissiveSphereCount;
static Camera s_Cam;
const float kMinT = 0.001f;
const float kMaxT = 1.0e7f;
const int kMaxDepth = 10;
bool HitWorld(const Ray& r, float tMin, float tMax, Hit& outHit, int& outID)
{
outID = HitSpheres(r, s_SpheresSoA, tMin, tMax, outHit);
return outID != -1;
}
static bool Scatter(const Material& mat, const Ray& r_in, const Hit& rec, float3& attenuation, Ray& scattered, float3& outLightE, int& inoutRayCount, uint32_t& state)
{
ZoneScoped;
outLightE = float3(0,0,0);
if (mat.type == Material::Lambert)
{
// random point on unit sphere that is tangent to the hit point
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
scattered = Ray(rec.pos, normalize(target - rec.pos));
attenuation = mat.albedo;
// sample lights
#if DO_LIGHT_SAMPLING
for (int j = 0; j < s_EmissiveSphereCount; ++j)
{
int i = s_EmissiveSpheres[j];
const Material& smat = s_SphereMats[i];
if (&mat == &smat)
continue; // skip self
const Sphere& s = s_Spheres[i];
// create a random direction towards sphere
// coord system for sampling: sw, su, sv
float3 sw = normalize(s.center - rec.pos);
float3 su = normalize(cross(fabs(sw.getX())>0.01f ? float3(0,1,0):float3(1,0,0), sw));
float3 sv = cross(sw, su);
// sample sphere by solid angle
float cosAMax = sqrtf(1.0f - s.radius*s.radius / sqLength(rec.pos-s.center));
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
float cosA = 1.0f - eps1 + eps1 * cosAMax;
float sinA = sqrtf(1.0f - cosA*cosA);
float phi = 2 * kPI * eps2;
float3 l = su * (cosf(phi) * sinA) + sv * (sinf(phi) * sinA) + sw * cosA;
//l = normalize(l); // NOTE(fg): This is already normalized, by construction.
// shoot shadow ray
Hit lightHit;
int hitID;
++inoutRayCount;
if (HitWorld(Ray(rec.pos, l), kMinT, kMaxT, lightHit, hitID) && hitID == i)
{
float omega = 2 * kPI * (1-cosAMax);
float3 rdir = r_in.dir;
AssertUnit(rdir);
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
outLightE += (mat.albedo * smat.emissive) * (std::max(0.0f, dot(l, nl)) * omega / kPI);
}
}
#endif
return true;
}
else if (mat.type == Material::Metal)
{
AssertUnit(r_in.dir); AssertUnit(rec.normal);
float3 refl = reflect(r_in.dir, rec.normal);
// reflected ray, and random inside of sphere based on roughness
float roughness = mat.roughness;
#if DO_MITSUBA_COMPARE
roughness = 0; // until we get better BRDF for metals
#endif
scattered = Ray(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
attenuation = mat.albedo;
return dot(scattered.dir, rec.normal) > 0;
}
else if (mat.type == Material::Dielectric)
{
AssertUnit(r_in.dir); AssertUnit(rec.normal);
float3 outwardN;
float3 rdir = r_in.dir;
float3 refl = reflect(rdir, rec.normal);
float nint;
attenuation = float3(1,1,1);
float3 refr;
float reflProb;
float cosine;
if (dot(rdir, rec.normal) > 0)
{
outwardN = -rec.normal;
nint = mat.ri;
cosine = mat.ri * dot(rdir, rec.normal);
}
else
{
outwardN = rec.normal;
nint = 1.0f / mat.ri;
cosine = -dot(rdir, rec.normal);
}
if (refract(rdir, outwardN, nint, refr))
{
reflProb = schlick(cosine, mat.ri);
}
else
{
reflProb = 1;
}
if (RandomFloat01(state) < reflProb)
scattered = Ray(rec.pos, normalize(refl));
else
scattered = Ray(rec.pos, normalize(refr));
}
else
{
attenuation = float3(1,0,1);
return false;
}
return true;
}
static float3 Trace(const Ray& r, int depth, int& inoutRayCount, uint32_t& state, bool doMaterialE = true)
{
ZoneScoped;
Hit rec;
int id = 0;
++inoutRayCount;
if (HitWorld(r, kMinT, kMaxT, rec, id))
{
Ray scattered;
float3 attenuation;
float3 lightE;
const Material& mat = s_SphereMats[id];
float3 matE = mat.emissive;
if (depth < kMaxDepth && Scatter(mat, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
{
#if DO_LIGHT_SAMPLING
if (!doMaterialE) matE = float3(0,0,0); // don't add material emission if told so
// dor Lambert materials, we just did explicit light (emissive) sampling and already
// for their contribution, so if next ray bounce hits the light again, don't add
// emission
doMaterialE = (mat.type != Material::Lambert);
#endif
return matE + lightE + attenuation * Trace(scattered, depth+1, inoutRayCount, state, doMaterialE);
}
else
{
return matE;
}
}
else
{
// sky
#if DO_MITSUBA_COMPARE
return float3(0.15f,0.21f,0.3f); // easier compare with Mitsuba's constant environment light
#else
float3 unitDir = r.dir;
float t = 0.5f*(unitDir.getY() + 1.0f);
return ((1.0f-t)*float3(1.0f, 1.0f, 1.0f) + t*float3(0.5f, 0.7f, 1.0f)) * 0.3f;
#endif
}
}
#if CPU_CAN_DO_THREADS
static enkiTaskScheduler* g_TS;
#endif
void InitializeTest()
{
ZoneScoped;
#if CPU_CAN_DO_THREADS
g_TS = enkiNewTaskScheduler();
enkiInitTaskSchedulerNumThreads(g_TS, std::max<int>( 2, std::thread::hardware_concurrency() - 2));
#endif
}
void ShutdownTest()
{
ZoneScoped;
#if CPU_CAN_DO_THREADS
enkiDeleteTaskScheduler(g_TS);
#endif
}
struct JobData
{
float time;
int frameCount;
int screenWidth, screenHeight;
float* backbuffer;
Camera* cam;
std::atomic<int> rayCount;
unsigned testFlags;
};
static void TraceRowJob(uint32_t start, uint32_t end, uint32_t threadnum, void* data_)
{
ZoneScoped;
JobData& data = *(JobData*)data_;
float* backbuffer = data.backbuffer + start * data.screenWidth * 4;
float invWidth = 1.0f / data.screenWidth;
float invHeight = 1.0f / data.screenHeight;
float lerpFac = float(data.frameCount) / float(data.frameCount+1);
if (data.testFlags & kFlagAnimate)
lerpFac *= DO_ANIMATE_SMOOTHING;
if (!(data.testFlags & kFlagProgressive))
lerpFac = 0;
int rayCount = 0;
for (uint32_t y = start; y < end; ++y)
{
uint32_t state = (y * 9781 + data.frameCount * 6271) | 1;
for (int x = 0; x < data.screenWidth; ++x)
{
float3 col(0, 0, 0);
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
{
float u = float(x + RandomFloat01(state)) * invWidth;
float v = float(y + RandomFloat01(state)) * invHeight;
Ray r = data.cam->GetRay(u, v, state);
col += Trace(r, 0, rayCount, state);
}
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
float3 prev(backbuffer[0], backbuffer[1], backbuffer[2]);
col = prev * lerpFac + col * (1-lerpFac);
col.store(backbuffer);
backbuffer += 4;
}
}
data.rayCount += rayCount;
}
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags)
{
ZoneScoped;
if (testFlags & kFlagAnimate)
{
s_Spheres[1].center.setY(cosf(time) + 1.0f);
s_Spheres[8].center.setZ(sinf(time)*0.3f);
}
float3 lookfrom(0, 2, 3);
float3 lookat(0, 0, 0);
float distToFocus = 3;
#if DO_MITSUBA_COMPARE
float aperture = 0.0f;
#else
float aperture = 0.1f;
#endif
#if DO_BIG_SCENE
aperture *= 0.2f;
#endif
s_EmissiveSphereCount = 0;
for (int i = 0; i < kSphereCount; ++i)
{
Sphere& s = s_Spheres[i];
s.UpdateDerivedData();
s_SpheresSoA.centerX[i] = s.center.getX();
s_SpheresSoA.centerY[i] = s.center.getY();
s_SpheresSoA.centerZ[i] = s.center.getZ();
s_SpheresSoA.sqRadius[i] = s.radius * s.radius;
s_SpheresSoA.invRadius[i] = s.invRadius;
// Remember IDs of emissive spheres (light sources)
const Material& smat = s_SphereMats[i];
if (smat.emissive.getX() > 0 || smat.emissive.getY() > 0 || smat.emissive.getZ() > 0)
{
s_EmissiveSpheres[s_EmissiveSphereCount] = i;
s_EmissiveSphereCount++;
}
}
s_Cam = Camera(lookfrom, lookat, float3(0, 1, 0), 60, float(screenWidth) / float(screenHeight), aperture, distToFocus);
}
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags)
{
ZoneScoped;
JobData args;
args.time = time;
args.frameCount = frameCount;
args.screenWidth = screenWidth;
args.screenHeight = screenHeight;
args.backbuffer = backbuffer;
args.cam = &s_Cam;
args.testFlags = testFlags;
args.rayCount = 0;
#if CPU_CAN_DO_THREADS
enkiTaskSet* task = enkiCreateTaskSet(g_TS, TraceRowJob);
bool threaded = true;
enkiAddTaskSetToPipeMinRange(g_TS, task, &args, screenHeight, threaded ? 4 : screenHeight);
enkiWaitForTaskSet(g_TS, task);
enkiDeleteTaskSet(task);
#else
TraceRowJob(0, screenHeight, 0, &args);
#endif
outRayCount = args.rayCount;
}
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize)
{
ZoneScoped;
outCount = kSphereCount;
outObjectSize = sizeof(Sphere);
outMaterialSize = sizeof(Material);
outCamSize = sizeof(Camera);
}
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount)
{
ZoneScoped;
memcpy(outObjects, s_Spheres, kSphereCount * sizeof(s_Spheres[0]));
memcpy(outMaterials, s_SphereMats, kSphereCount * sizeof(s_SphereMats[0]));
memcpy(outCam, &s_Cam, sizeof(s_Cam));
memcpy(outEmissives, s_EmissiveSpheres, s_EmissiveSphereCount * sizeof(s_EmissiveSpheres[0]));
*outEmissiveCount = s_EmissiveSphereCount;
}

View File

@@ -1,17 +0,0 @@
#pragma once
#include <stdint.h>
enum TestFlags
{
kFlagAnimate = (1 << 0),
kFlagProgressive = (1 << 1),
};
void InitializeTest();
void ShutdownTest();
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags);
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags);
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize);
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount);

View File

@@ -1,79 +0,0 @@
// Copyright (c) 2013 Doug Binks
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#pragma once
#include <stdint.h>
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#undef GetObject
#include <intrin.h>
extern "C" void _ReadWriteBarrier();
#pragma intrinsic(_ReadWriteBarrier)
#pragma intrinsic(_InterlockedCompareExchange)
#pragma intrinsic(_InterlockedExchangeAdd)
// Memory Barriers to prevent CPU and Compiler re-ordering
#define BASE_MEMORYBARRIER_ACQUIRE() _ReadWriteBarrier()
#define BASE_MEMORYBARRIER_RELEASE() _ReadWriteBarrier()
#define BASE_ALIGN(x) __declspec( align( x ) )
#else
#define BASE_MEMORYBARRIER_ACQUIRE() __asm__ __volatile__("": : :"memory")
#define BASE_MEMORYBARRIER_RELEASE() __asm__ __volatile__("": : :"memory")
#define BASE_ALIGN(x) __attribute__ ((aligned( x )))
#endif
namespace enki
{
// Atomically performs: if( *pDest == compareWith ) { *pDest = swapTo; }
// returns old *pDest (so if successfull, returns compareWith)
inline uint32_t AtomicCompareAndSwap( volatile uint32_t* pDest, uint32_t swapTo, uint32_t compareWith )
{
#ifdef _WIN32
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
return _InterlockedCompareExchange( (volatile long*)pDest,swapTo, compareWith );
#else
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
#endif
}
inline uint64_t AtomicCompareAndSwap( volatile uint64_t* pDest, uint64_t swapTo, uint64_t compareWith )
{
#ifdef _WIN32
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
return _InterlockedCompareExchange64( (__int64 volatile*)pDest, swapTo, compareWith );
#else
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
#endif
}
// Atomically performs: tmp = *pDest; *pDest += value; return tmp;
inline int32_t AtomicAdd( volatile int32_t* pDest, int32_t value )
{
#ifdef _WIN32
return _InterlockedExchangeAdd( (long*)pDest, value );
#else
return __sync_fetch_and_add( pDest, value );
#endif
}
}

View File

@@ -1,240 +0,0 @@
// Copyright (c) 2013 Doug Binks
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#pragma once
#include <stdint.h>
#include <assert.h>
#include "Atomics.h"
#include <string.h>
namespace enki
{
// LockLessMultiReadPipe - Single writer, multiple reader thread safe pipe using (semi) lockless programming
// Readers can only read from the back of the pipe
// The single writer can write to the front of the pipe, and read from both ends (a writer can be a reader)
// for many of the principles used here, see http://msdn.microsoft.com/en-us/library/windows/desktop/ee418650(v=vs.85).aspx
// Note: using log2 sizes so we do not need to clamp (multi-operation)
// T is the contained type
// Note this is not true lockless as the use of flags as a form of lock state.
template<uint8_t cSizeLog2, typename T> class LockLessMultiReadPipe
{
public:
LockLessMultiReadPipe();
~LockLessMultiReadPipe() {}
// ReaderTryReadBack returns false if we were unable to read
// This is thread safe for both multiple readers and the writer
bool ReaderTryReadBack( T* pOut );
// WriterTryReadFront returns false if we were unable to read
// This is thread safe for the single writer, but should not be called by readers
bool WriterTryReadFront( T* pOut );
// WriterTryWriteFront returns false if we were unable to write
// This is thread safe for the single writer, but should not be called by readers
bool WriterTryWriteFront( const T& in );
// IsPipeEmpty() is a utility function, not intended for general use
// Should only be used very prudently.
bool IsPipeEmpty() const
{
return 0 == m_WriteIndex - m_ReadCount;
}
void Clear()
{
m_WriteIndex = 0;
m_ReadIndex = 0;
m_ReadCount = 0;
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
}
private:
const static uint32_t ms_cSize = ( 1 << cSizeLog2 );
const static uint32_t ms_cIndexMask = ms_cSize - 1;
const static uint32_t FLAG_INVALID = 0xFFFFFFFF; // 32bit for CAS
const static uint32_t FLAG_CAN_WRITE = 0x00000000; // 32bit for CAS
const static uint32_t FLAG_CAN_READ = 0x11111111; // 32bit for CAS
T m_Buffer[ ms_cSize ];
// read and write indexes allow fast access to the pipe, but actual access
// controlled by the access flags.
volatile uint32_t BASE_ALIGN(4) m_WriteIndex;
volatile uint32_t BASE_ALIGN(4) m_ReadCount;
volatile uint32_t m_Flags[ ms_cSize ];
volatile uint32_t BASE_ALIGN(4) m_ReadIndex;
};
template<uint8_t cSizeLog2, typename T> inline
LockLessMultiReadPipe<cSizeLog2,T>::LockLessMultiReadPipe()
: m_WriteIndex(0)
, m_ReadIndex(0)
, m_ReadCount(0)
{
assert( cSizeLog2 < 32 );
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
}
template<uint8_t cSizeLog2, typename T> inline
bool LockLessMultiReadPipe<cSizeLog2,T>::ReaderTryReadBack( T* pOut )
{
uint32_t actualReadIndex;
uint32_t readCount = m_ReadCount;
// We get hold of read index for consistency,
// and do first pass starting at read count
uint32_t readIndexToUse = readCount;
while(true)
{
uint32_t writeIndex = m_WriteIndex;
// power of two sizes ensures we can use a simple calc without modulus
uint32_t numInPipe = writeIndex - readCount;
if( 0 == numInPipe )
{
return false;
}
if( readIndexToUse >= writeIndex )
{
// move back to start
readIndexToUse = m_ReadIndex;
}
// power of two sizes ensures we can perform AND for a modulus
actualReadIndex = readIndexToUse & ms_cIndexMask;
// Multiple potential readers mean we should check if the data is valid,
// using an atomic compare exchange
uint32_t previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
if( FLAG_CAN_READ == previous )
{
break;
}
++readIndexToUse;
//update known readcount
readCount = m_ReadCount;
}
// we update the read index using an atomic add, as we've only read one piece of data.
// this ensure consistency of the read index, and the above loop ensures readers
// only read from unread data
AtomicAdd( (volatile int32_t*)&m_ReadCount, 1 );
BASE_MEMORYBARRIER_ACQUIRE();
// now read data, ensuring we do so after above reads & CAS
*pOut = m_Buffer[ actualReadIndex ];
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
return true;
}
template<uint8_t cSizeLog2, typename T> inline
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryReadFront( T* pOut )
{
uint32_t writeIndex = m_WriteIndex;
uint32_t frontReadIndex = writeIndex;
// Multiple potential readers mean we should check if the data is valid,
// using an atomic compare exchange - which acts as a form of lock (so not quite lockless really).
uint32_t previous = FLAG_INVALID;
uint32_t actualReadIndex = 0;
while( true )
{
// power of two sizes ensures we can use a simple calc without modulus
uint32_t readCount = m_ReadCount;
uint32_t numInPipe = writeIndex - readCount;
if( 0 == numInPipe || 0 == frontReadIndex )
{
// frontReadIndex can get to 0 here if that item was just being read by another thread.
m_ReadIndex = readCount;
return false;
}
--frontReadIndex;
actualReadIndex = frontReadIndex & ms_cIndexMask;
previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
if( FLAG_CAN_READ == previous )
{
break;
}
else if( m_ReadIndex >= frontReadIndex )
{
return false;
}
}
// now read data, ensuring we do so after above reads & CAS
*pOut = m_Buffer[ actualReadIndex ];
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
BASE_MEMORYBARRIER_RELEASE();
// 32-bit aligned stores are atomic, and writer owns the write index
// we only move one back as this is as many as we have read, not where we have read from.
--m_WriteIndex;
return true;
}
template<uint8_t cSizeLog2, typename T> inline
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryWriteFront( const T& in )
{
// The writer 'owns' the write index, and readers can only reduce
// the amount of data in the pipe.
// We get hold of both values for consistency and to reduce false sharing
// impacting more than one access
uint32_t writeIndex = m_WriteIndex;
// power of two sizes ensures we can perform AND for a modulus
uint32_t actualWriteIndex = writeIndex & ms_cIndexMask;
// a reader may still be reading this item, as there are multiple readers
if( m_Flags[ actualWriteIndex ] != FLAG_CAN_WRITE )
{
return false; // still being read, so have caught up with tail.
}
// as we are the only writer we can update the data without atomics
// whilst the write index has not been updated
m_Buffer[ actualWriteIndex ] = in;
m_Flags[ actualWriteIndex ] = FLAG_CAN_READ;
// We need to ensure the above writes occur prior to updating the write index,
// otherwise another thread might read before it's finished
BASE_MEMORYBARRIER_RELEASE();
// 32-bit aligned stores are atomic, and the writer controls the write index
++writeIndex;
m_WriteIndex = writeIndex;
return true;
}
}

Some files were not shown because too many files have changed in this diff Show More