Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5ea4a326f3 | ||
|
|
62957310a0 |
@@ -1,18 +0,0 @@
|
||||
# Empirical format config, based on observed style guide
|
||||
# Use this only as an help to fit the surrounding code style - don't reformat whole files at once
|
||||
---
|
||||
BasedOnStyle: LLVM
|
||||
AllowShortIfStatementsOnASingleLine: WithoutElse
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
BreakBeforeBraces: Allman
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakStringLiterals: false
|
||||
ColumnLimit: 120
|
||||
FixNamespaceComments: false
|
||||
IndentPPDirectives: AfterHash
|
||||
IndentWidth: 4
|
||||
PointerAlignment: Left
|
||||
SpaceBeforeParens: Never
|
||||
SpacesInParentheses: true
|
||||
TabWidth: 4
|
||||
61
.clang-tidy
@@ -1,61 +0,0 @@
|
||||
---
|
||||
Checks:
|
||||
'
|
||||
clang-diagnostic-*,
|
||||
clang-analyzer-*,
|
||||
bugprone-*,
|
||||
google-*,
|
||||
misc-*,
|
||||
modernize-*,
|
||||
performance-*,
|
||||
readability-*,
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-bugprone-implicit-widening-of-multiplication-result,
|
||||
-bugprone-narrowing-conversions,
|
||||
-bugprone-reserved-identifier,
|
||||
-google-readability-braces-around-statements,
|
||||
-google-readability-casting,
|
||||
-google-readability-function-size,
|
||||
-google-readability-todo,
|
||||
-google-readability-namespace-comments,
|
||||
-misc-confusable-identifiers,
|
||||
-misc-no-recursion,
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-deprecated-headers,
|
||||
-modernize-use-default-member-init,
|
||||
-modernize-use-trailing-return-type,
|
||||
-performance-no-int-to-ptr,
|
||||
-readability-braces-around-statements,
|
||||
-readability-else-after-return,
|
||||
-readability-function-cognitive-complexity,
|
||||
-readability-identifier-length,
|
||||
-readability-implicit-bool-conversion,
|
||||
-readability-isolate-declaration,
|
||||
-readability-magic-numbers,
|
||||
-readability-qualified-auto,
|
||||
-readability-uppercase-literal-suffix
|
||||
'
|
||||
WarningsAsErrors: ''
|
||||
HeaderFilterRegex: ''
|
||||
FormatStyle: none
|
||||
CheckOptions:
|
||||
llvm-else-after-return.WarnOnConditionVariables: 'false'
|
||||
modernize-loop-convert.MinConfidence: reasonable
|
||||
modernize-replace-auto-ptr.IncludeStyle: llvm
|
||||
modernize-pass-by-value.IncludeStyle: llvm
|
||||
google-readability-namespace-comments.ShortNamespaceLines: '10'
|
||||
google-readability-namespace-comments.SpacesBeforeComments: '2'
|
||||
cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: 'true'
|
||||
google-readability-braces-around-statements.ShortStatementLines: '1'
|
||||
cert-err33-c.CheckedFunctions: '::aligned_alloc;::asctime_s;::at_quick_exit;::atexit;::bsearch;::bsearch_s;::btowc;::c16rtomb;::c32rtomb;::calloc;::clock;::cnd_broadcast;::cnd_init;::cnd_signal;::cnd_timedwait;::cnd_wait;::ctime_s;::fclose;::fflush;::fgetc;::fgetpos;::fgets;::fgetwc;::fopen;::fopen_s;::fprintf;::fprintf_s;::fputc;::fputs;::fputwc;::fputws;::fread;::freopen;::freopen_s;::fscanf;::fscanf_s;::fseek;::fsetpos;::ftell;::fwprintf;::fwprintf_s;::fwrite;::fwscanf;::fwscanf_s;::getc;::getchar;::getenv;::getenv_s;::gets_s;::getwc;::getwchar;::gmtime;::gmtime_s;::localtime;::localtime_s;::malloc;::mbrtoc16;::mbrtoc32;::mbsrtowcs;::mbsrtowcs_s;::mbstowcs;::mbstowcs_s;::memchr;::mktime;::mtx_init;::mtx_lock;::mtx_timedlock;::mtx_trylock;::mtx_unlock;::printf_s;::putc;::putwc;::raise;::realloc;::remove;::rename;::scanf;::scanf_s;::setlocale;::setvbuf;::signal;::snprintf;::snprintf_s;::sprintf;::sprintf_s;::sscanf;::sscanf_s;::strchr;::strerror_s;::strftime;::strpbrk;::strrchr;::strstr;::strtod;::strtof;::strtoimax;::strtok;::strtok_s;::strtol;::strtold;::strtoll;::strtoul;::strtoull;::strtoumax;::strxfrm;::swprintf;::swprintf_s;::swscanf;::swscanf_s;::thrd_create;::thrd_detach;::thrd_join;::thrd_sleep;::time;::timespec_get;::tmpfile;::tmpfile_s;::tmpnam;::tmpnam_s;::tss_create;::tss_get;::tss_set;::ungetc;::ungetwc;::vfprintf;::vfprintf_s;::vfscanf;::vfscanf_s;::vfwprintf;::vfwprintf_s;::vfwscanf;::vfwscanf_s;::vprintf_s;::vscanf;::vscanf_s;::vsnprintf;::vsnprintf_s;::vsprintf;::vsprintf_s;::vsscanf;::vsscanf_s;::vswprintf;::vswprintf_s;::vswscanf;::vswscanf_s;::vwprintf_s;::vwscanf;::vwscanf_s;::wcrtomb;::wcschr;::wcsftime;::wcspbrk;::wcsrchr;::wcsrtombs;::wcsrtombs_s;::wcsstr;::wcstod;::wcstof;::wcstoimax;::wcstok;::wcstok_s;::wcstol;::wcstold;::wcstoll;::wcstombs;::wcstombs_s;::wcstoul;::wcstoull;::wcstoumax;::wcsxfrm;::wctob;::wctrans;::wctype;::wmemchr;::wprintf_s;::wscanf;::wscanf_s;'
|
||||
modernize-loop-convert.MaxCopySize: '16'
|
||||
cert-dcl16-c.NewSuffixes: 'L;LL;LU;LLU'
|
||||
cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField: 'false'
|
||||
cert-str34-c.DiagnoseSignedUnsignedCharComparisons: 'false'
|
||||
modernize-use-nullptr.NullMacros: 'NULL'
|
||||
llvm-qualified-auto.AddConstToQualified: 'false'
|
||||
modernize-loop-convert.NamingStyle: CamelCase
|
||||
llvm-else-after-return.WarnOnUnfixable: 'false'
|
||||
google-readability-function-size.StatementThreshold: '800'
|
||||
...
|
||||
|
||||
1
.github/FUNDING.yml
vendored
@@ -1 +0,0 @@
|
||||
github: wolfpld
|
||||
BIN
.github/sponsor.png
vendored
|
Before Width: | Height: | Size: 1.0 KiB |
83
.github/workflows/build.yml
vendored
@@ -1,83 +0,0 @@
|
||||
name: build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
env:
|
||||
CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm-cache
|
||||
|
||||
jobs:
|
||||
build:
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ windows-latest, macos-15 ]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- if: startsWith(matrix.os, 'windows')
|
||||
uses: microsoft/setup-msbuild@v2
|
||||
- if: startsWith(matrix.os, 'windows')
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.x'
|
||||
- if: startsWith(matrix.os, 'windows')
|
||||
run: pip install meson ninja
|
||||
- if: startsWith(matrix.os, 'macos')
|
||||
name: Install macos dependencies
|
||||
run: brew install pkg-config glfw meson
|
||||
- name: Trust git repo
|
||||
run: git config --global --add safe.directory '*'
|
||||
- name: Profiler GUI
|
||||
run: |
|
||||
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
||||
cmake --build profiler/build --parallel --config Release
|
||||
- name: Update utility
|
||||
run: |
|
||||
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build update/build --parallel --config Release
|
||||
- name: Capture utility
|
||||
run: |
|
||||
cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build capture/build --parallel --config Release
|
||||
- name: Csvexport utility
|
||||
run: |
|
||||
cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build csvexport/build --parallel --config Release
|
||||
- name: Import utilities
|
||||
run: |
|
||||
cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build import/build --parallel --config Release
|
||||
- if: ${{ !startsWith(matrix.os, 'windows') }}
|
||||
name: Library
|
||||
run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib build && meson compile -C build && meson install -C build
|
||||
- if: ${{ !startsWith(matrix.os, 'windows') }}
|
||||
name: Find Artifacts
|
||||
id: find_artifacts
|
||||
run: |
|
||||
mkdir -p bin
|
||||
cp profiler/build/tracy-profiler bin
|
||||
cp update/build/tracy-update bin
|
||||
cp capture/build/tracy-capture bin
|
||||
cp csvexport/build/tracy-csvexport bin
|
||||
cp import/build/tracy-import-chrome bin
|
||||
cp import/build/tracy-import-fuchsia bin
|
||||
- if: startsWith(matrix.os, 'windows')
|
||||
name: Find Artifacts
|
||||
id: find_artifacts_windows
|
||||
run: |
|
||||
mkdir bin
|
||||
copy profiler\build\Release\tracy-profiler.exe bin
|
||||
copy update\build\Release\tracy-update.exe bin
|
||||
copy capture\build\Release\tracy-capture.exe bin
|
||||
copy csvexport\build\Release\tracy-csvexport.exe bin
|
||||
copy import\build\Release\tracy-import-chrome.exe bin
|
||||
copy import\build\Release\tracy-import-fuchsia.exe bin
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: ${{ matrix.os }}
|
||||
path: bin
|
||||
63
.github/workflows/emscripten.yml
vendored
@@ -1,63 +0,0 @@
|
||||
name: emscripten
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
env:
|
||||
CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm-cache
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
container: archlinux:base-devel
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed cmake git unzip python ninja zstd
|
||||
- name: Setup emscripten
|
||||
uses: mymindstorm/setup-emsdk@v14
|
||||
with:
|
||||
version: 3.1.67
|
||||
- name: Trust git repo
|
||||
run: git config --global --add safe.directory '*'
|
||||
- uses: actions/checkout@v4
|
||||
- name: Profiler GUI
|
||||
run: |
|
||||
cmake -G Ninja -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=MinSizeRel -DGIT_REV=${{ github.sha }} -DCMAKE_TOOLCHAIN_FILE=${{env.EMSDK}}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake
|
||||
cmake --build profiler/build --parallel
|
||||
- name: Compress artifacts
|
||||
run: |
|
||||
zstd -18 profiler/build/tracy-profiler.js profiler/build/tracy-profiler.wasm
|
||||
gzip -9 profiler/build/tracy-profiler.js profiler/build/tracy-profiler.wasm
|
||||
- name: Find Artifacts
|
||||
id: find_artifacts
|
||||
run: |
|
||||
mkdir -p bin
|
||||
cp profiler/build/index.html bin
|
||||
cp profiler/build/favicon.svg bin
|
||||
cp profiler/build/tracy-profiler.data bin
|
||||
cp profiler/build/tracy-profiler.js.gz bin
|
||||
cp profiler/build/tracy-profiler.js.zst bin
|
||||
cp profiler/build/tracy-profiler.wasm.gz bin
|
||||
cp profiler/build/tracy-profiler.wasm.zst bin
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: emscripten
|
||||
path: bin
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
if: github.ref == 'refs/heads/master'
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
- uses: wlixcc/SFTP-Deploy-Action@v1.2.4
|
||||
with:
|
||||
username: ${{ secrets.USERNAME }}
|
||||
server: ${{ secrets.SERVER }}
|
||||
port: ${{ secrets.PORT }}
|
||||
ssh_private_key: ${{ secrets.PRIVATE_KEY }}
|
||||
local_path: './emscripten/*'
|
||||
remote_path: ${{ secrets.REMOTE_PATH }}
|
||||
sftp_only: true
|
||||
24
.github/workflows/latex.yml
vendored
@@ -1,24 +0,0 @@
|
||||
name: Manual
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Compile LaTeX
|
||||
uses: xu-cheng/latex-action@v3
|
||||
with:
|
||||
working_directory: manual
|
||||
root_file: tracy.tex
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: manual
|
||||
path: manual/tracy.pdf
|
||||
80
.github/workflows/linux.yml
vendored
@@ -1,80 +0,0 @@
|
||||
name: linux
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
env:
|
||||
CPM_SOURCE_CACHE: ${{ github.workspace }}/cpm-cache
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
container: archlinux:base-devel
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: pacman -Syu --noconfirm && pacman -S --noconfirm --needed freetype2 debuginfod wayland dbus libxkbcommon libglvnd meson cmake git wayland-protocols nodejs
|
||||
- name: Trust git repo
|
||||
run: git config --global --add safe.directory '*'
|
||||
- uses: actions/checkout@v4
|
||||
- name: Profiler GUI
|
||||
run: |
|
||||
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -DGIT_REV=${{ github.sha }}
|
||||
cmake --build profiler/build --parallel
|
||||
- name: Update utility
|
||||
run: |
|
||||
cmake -B update/build -S update -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build update/build --parallel
|
||||
- name: Capture utility
|
||||
run: |
|
||||
cmake -B capture/build -S capture -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build capture/build --parallel
|
||||
- name: Csvexport utility
|
||||
run: |
|
||||
cmake -B csvexport/build -S csvexport -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build csvexport/build --parallel
|
||||
- name: Import utilities
|
||||
run: |
|
||||
cmake -B import/build -S import -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build import/build --parallel
|
||||
- name: Library
|
||||
run: meson setup -Dprefix=$GITHUB_WORKSPACE/bin/lib build && meson compile -C build && meson install -C build
|
||||
- name: Test application
|
||||
run: |
|
||||
# test compilation with different flags
|
||||
# we clean the build folder to reset cached variables between runs
|
||||
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build test/build --parallel
|
||||
rm -rf test/build
|
||||
|
||||
# same with TRACY_ON_DEMAND
|
||||
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_ON_DEMAND=ON .
|
||||
cmake --build test/build --parallel
|
||||
rm -rf test/build
|
||||
|
||||
# same with TRACY_DELAYED_INIT TRACY_MANUAL_LIFETIME
|
||||
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DELAYED_INIT=ON -DTRACY_MANUAL_LIFETIME=ON .
|
||||
cmake --build test/build --parallel
|
||||
rm -rf test/build
|
||||
|
||||
# same with TRACY_DEMANGLE
|
||||
cmake -B test/build -S test -DCMAKE_BUILD_TYPE=Release -DTRACY_DEMANGLE=ON .
|
||||
cmake --build test/build --parallel
|
||||
rm -rf test/build
|
||||
- name: Find Artifacts
|
||||
id: find_artifacts
|
||||
run: |
|
||||
mkdir -p bin
|
||||
cp profiler/build/tracy-profiler bin
|
||||
cp update/build/tracy-update bin
|
||||
cp capture/build/tracy-capture bin
|
||||
cp csvexport/build/tracy-csvexport bin
|
||||
cp import/build/tracy-import-chrome bin
|
||||
cp import/build/tracy-import-fuchsia bin
|
||||
strip bin/tracy-*
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: arch-linux
|
||||
path: bin
|
||||
35
.gitignore
vendored
@@ -1,36 +1,13 @@
|
||||
.vs
|
||||
_build
|
||||
_compiler
|
||||
tools/*
|
||||
*.opendb
|
||||
*.db
|
||||
*.vcxproj.user
|
||||
x64
|
||||
Release
|
||||
Debug
|
||||
*.d
|
||||
*.o
|
||||
*.so
|
||||
*.swp
|
||||
*.obj
|
||||
imgui.ini
|
||||
test/tracy_test
|
||||
test/tracy_test.exe
|
||||
*/build/unix/*-*
|
||||
manual/t*.aux
|
||||
manual/t*.log
|
||||
manual/t*.out
|
||||
manual/t*.pdf
|
||||
manual/t*.synctex.gz
|
||||
manual/t*.toc
|
||||
manual/t*.bbl
|
||||
manual/t*.blg
|
||||
manual/t*.fdb_latexmk
|
||||
manual/t*.fls
|
||||
profiler/build/win32/packages
|
||||
profiler/build/win32/Tracy.aps
|
||||
.deps/
|
||||
.dirstamp
|
||||
/_*/**
|
||||
/**/__pycache__/**
|
||||
extra/vswhere.exe
|
||||
extra/tracy-build
|
||||
.cache
|
||||
compile_commands.json
|
||||
profiler/build/wasm/Tracy-release.*
|
||||
profiler/build/wasm/Tracy-debug.*
|
||||
profiler/build/wasm/embed.tracy
|
||||
|
||||
2
.mailmap
@@ -1,2 +0,0 @@
|
||||
<wolf@nereid.pl> <wolf.pld@gmail.com>
|
||||
<wolf@nereid.pl> <bartosz.taudul@game-lion.com>
|
||||
7
.vscode/extensions.json
vendored
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"llvm-vs-code-extensions.vscode-clangd",
|
||||
"vadimcn.vscode-lldb",
|
||||
"ms-vscode.cmake-tools"
|
||||
]
|
||||
}
|
||||
13
.vscode/launch.json
vendored
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Launch",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"program": "${command:cmake.launchTargetPath}",
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}"
|
||||
}
|
||||
]
|
||||
}
|
||||
20
.vscode/settings.json
vendored
@@ -1,20 +0,0 @@
|
||||
{
|
||||
"cmake.configureOnOpen": true,
|
||||
"cmake.sourceDirectory": [
|
||||
"${workspaceFolder}/profiler",
|
||||
"${workspaceFolder}/capture",
|
||||
"${workspaceFolder}/csvexport",
|
||||
"${workspaceFolder}/import",
|
||||
"${workspaceFolder}/update",
|
||||
"${workspaceFolder}/test",
|
||||
"${workspaceFolder}",
|
||||
],
|
||||
"cmake.buildDirectory": "${sourceDirectory}/build",
|
||||
"cmake.autoSelectActiveFolder": false,
|
||||
"cmake.options.advanced": {
|
||||
"folder": { "statusBarVisibility": "visible" },
|
||||
"variant": { "statusBarVisibility": "compact" }
|
||||
},
|
||||
"cmake.copyCompileCommands": "${workspaceFolder}/compile_commands.json",
|
||||
"lldb.launch.initCommands": ["command script import ${workspaceRoot}/extra/natvis.py"],
|
||||
}
|
||||
3
AUTHORS
Normal file
@@ -0,0 +1,3 @@
|
||||
Bartosz Taudul <wolf.pld@gmail.com>
|
||||
Kamil Klimek <kamil.klimek@sharkbits.com>
|
||||
Bartosz Szreder <zgredder@gmail.com>
|
||||
273
CMakeLists.txt
@@ -1,273 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
# Run version helper script
|
||||
include(cmake/version.cmake)
|
||||
|
||||
project(Tracy LANGUAGES CXX VERSION ${TRACY_VERSION_STRING})
|
||||
|
||||
file(GENERATE OUTPUT .gitignore CONTENT "*")
|
||||
|
||||
if(${BUILD_SHARED_LIBS})
|
||||
set(DEFAULT_STATIC OFF)
|
||||
else()
|
||||
set(DEFAULT_STATIC ON)
|
||||
endif()
|
||||
|
||||
option(TRACY_STATIC "Whether to build Tracy as a static library" ${DEFAULT_STATIC})
|
||||
option(TRACY_Fortran "Build Fortran bindings" OFF)
|
||||
option(TRACY_LTO "Enable Link-Time optimization" OFF)
|
||||
|
||||
if(TRACY_Fortran)
|
||||
enable_language(Fortran)
|
||||
set(CMAKE_Fortran_VERSION 2003)
|
||||
endif()
|
||||
|
||||
if(TRACY_LTO OR CMAKE_INTERPROCEDURAL_OPTIMIZATION)
|
||||
include(CheckIPOSupported)
|
||||
check_ipo_supported(RESULT LTO_SUPPORTED)
|
||||
if(NOT LTO_SUPPORTED)
|
||||
message(WARNING "LTO is not supported!")
|
||||
endif()
|
||||
else()
|
||||
set(LTO_SUPPORTED OFF)
|
||||
endif()
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
set(TRACY_PUBLIC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/public)
|
||||
|
||||
if(LTO_SUPPORTED)
|
||||
set(TRACY_VISIBILITY "OBJECT")
|
||||
elseif(TRACY_STATIC)
|
||||
set(TRACY_VISIBILITY "STATIC")
|
||||
else()
|
||||
set(TRACY_VISIBILITY "SHARED")
|
||||
endif()
|
||||
|
||||
add_library(TracyClient ${TRACY_VISIBILITY} "${TRACY_PUBLIC_DIR}/TracyClient.cpp")
|
||||
target_compile_features(TracyClient PUBLIC cxx_std_11)
|
||||
set_target_properties(TracyClient PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${LTO_SUPPORTED})
|
||||
target_include_directories(TracyClient SYSTEM PUBLIC
|
||||
$<BUILD_INTERFACE:${TRACY_PUBLIC_DIR}>
|
||||
$<INSTALL_INTERFACE:include/tracy>)
|
||||
target_link_libraries(
|
||||
TracyClient
|
||||
PUBLIC
|
||||
Threads::Threads
|
||||
${CMAKE_DL_LIBS}
|
||||
)
|
||||
|
||||
if(TRACY_Fortran)
|
||||
add_library(TracyClientF90 ${TRACY_VISIBILITY} "${TRACY_PUBLIC_DIR}/TracyClient.F90")
|
||||
target_include_directories(TracyClientF90 PUBLIC
|
||||
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>
|
||||
$<INSTALL_INTERFACE:include/tracy>)
|
||||
target_link_libraries(
|
||||
TracyClientF90
|
||||
PUBLIC
|
||||
TracyClient
|
||||
)
|
||||
set_target_properties(TracyClientF90 PROPERTIES Fortran_MODULE_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
INTERPROCEDURAL_OPTIMIZATION ${LTO_SUPPORTED})
|
||||
endif()
|
||||
|
||||
# Public dependency on some libraries required when using Mingw
|
||||
if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} MATCHES "GNU|Clang")
|
||||
target_link_libraries(TracyClient PUBLIC ws2_32 dbghelp)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
|
||||
find_library(EXECINFO_LIBRARY NAMES execinfo REQUIRED)
|
||||
target_link_libraries(TracyClient PUBLIC ${EXECINFO_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(TRACY_LIBUNWIND_BACKTRACE)
|
||||
include(FindPkgConfig)
|
||||
pkg_check_modules(unwind REQUIRED libunwind)
|
||||
target_include_directories(TracyClient INTERFACE ${unwind_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyClient INTERFACE ${unwind_LINK_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(TRACY_DEBUGINFOD)
|
||||
include(FindPkgConfig)
|
||||
pkg_check_modules(debuginfod REQUIRED libdebuginfod)
|
||||
target_include_directories(TracyClient INTERFACE ${debuginfod_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyClient INTERFACE ${debuginfod_LINK_LIBRARIES})
|
||||
endif()
|
||||
|
||||
add_library(Tracy::TracyClient ALIAS TracyClient)
|
||||
if(TRACY_Fortran)
|
||||
add_library(Tracy::TracyClient_Fortran ALIAS TracyClientF90)
|
||||
endif()
|
||||
|
||||
macro(set_option option help value)
|
||||
option(${option} ${help} ${value})
|
||||
if(${option})
|
||||
message(STATUS "${option}: ON")
|
||||
target_compile_definitions(TracyClient PUBLIC ${option})
|
||||
else()
|
||||
message(STATUS "${option}: OFF")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
set_option(TRACY_ENABLE "Enable profiling" ON)
|
||||
set_option(TRACY_ON_DEMAND "On-demand profiling" OFF)
|
||||
set_option(TRACY_CALLSTACK "Enforce callstack collection for tracy regions" OFF)
|
||||
set_option(TRACY_NO_CALLSTACK "Disable all callstack related functionality" OFF)
|
||||
set_option(TRACY_NO_CALLSTACK_INLINES "Disables the inline functions in callstacks" OFF)
|
||||
set_option(TRACY_ONLY_LOCALHOST "Only listen on the localhost interface" OFF)
|
||||
set_option(TRACY_NO_BROADCAST "Disable client discovery by broadcast to local network" OFF)
|
||||
set_option(TRACY_ONLY_IPV4 "Tracy will only accept connections on IPv4 addresses (disable IPv6)" OFF)
|
||||
set_option(TRACY_NO_CODE_TRANSFER "Disable collection of source code" OFF)
|
||||
set_option(TRACY_NO_CONTEXT_SWITCH "Disable capture of context switches" OFF)
|
||||
set_option(TRACY_NO_EXIT "Client executable does not exit until all profile data is sent to server" OFF)
|
||||
set_option(TRACY_NO_SAMPLING "Disable call stack sampling" OFF)
|
||||
set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF)
|
||||
set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF)
|
||||
set_option(TRACY_NO_FRAME_IMAGE "Disable the frame image support and its thread" OFF)
|
||||
set_option(TRACY_NO_SYSTEM_TRACING "Disable systrace sampling" OFF)
|
||||
set_option(TRACY_PATCHABLE_NOPSLEDS "Enable nopsleds for efficient patching by system-level tools (e.g. rr)" OFF)
|
||||
set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF)
|
||||
set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF)
|
||||
set_option(TRACY_FIBERS "Enable fibers support" OFF)
|
||||
set_option(TRACY_NO_CRASH_HANDLER "Disable crash handling" OFF)
|
||||
set_option(TRACY_TIMER_FALLBACK "Use lower resolution timers" OFF)
|
||||
set_option(TRACY_LIBUNWIND_BACKTRACE "Use libunwind backtracing where supported" OFF)
|
||||
set_option(TRACY_SYMBOL_OFFLINE_RESOLVE "Instead of full runtime symbol resolution, only resolve the image path and offset to enable offline symbol resolution" OFF)
|
||||
set_option(TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT "Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation" OFF)
|
||||
set_option(TRACY_DEBUGINFOD "Enable debuginfod support" OFF)
|
||||
|
||||
# advanced
|
||||
set_option(TRACY_VERBOSE "[advanced] Verbose output from the profiler" OFF)
|
||||
mark_as_advanced(TRACY_VERBOSE)
|
||||
set_option(TRACY_DEMANGLE "[advanced] Don't use default demangling function - You'll need to provide your own" OFF)
|
||||
mark_as_advanced(TRACY_DEMANGLE)
|
||||
|
||||
# handle incompatible combinations
|
||||
if(TRACY_MANUAL_LIFETIME AND NOT TRACY_DELAYED_INIT)
|
||||
message(FATAL_ERROR "TRACY_MANUAL_LIFETIME can not be activated with disabled TRACY_DELAYED_INIT")
|
||||
endif()
|
||||
|
||||
if(NOT TRACY_STATIC)
|
||||
target_compile_definitions(TracyClient PRIVATE TRACY_EXPORTS)
|
||||
target_compile_definitions(TracyClient PUBLIC TRACY_IMPORTS)
|
||||
endif()
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
set_target_properties(TracyClient PROPERTIES VERSION ${PROJECT_VERSION})
|
||||
if(TRACY_Fortran)
|
||||
set_target_properties(TracyClientF90 PROPERTIES VERSION ${PROJECT_VERSION})
|
||||
endif()
|
||||
|
||||
set(tracy_includes
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyC.h
|
||||
${TRACY_PUBLIC_DIR}/tracy/Tracy.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyCUDA.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyD3D11.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyD3D12.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyLua.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyMetal.hmm
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyOpenCL.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyOpenGL.hpp
|
||||
${TRACY_PUBLIC_DIR}/tracy/TracyVulkan.hpp)
|
||||
|
||||
set(client_includes
|
||||
${TRACY_PUBLIC_DIR}/client/tracy_concurrentqueue.h
|
||||
${TRACY_PUBLIC_DIR}/client/tracy_rpmalloc.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/tracy_SPSCQueue.h
|
||||
${TRACY_PUBLIC_DIR}/client/TracyKCore.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyArmCpuTable.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyCallstack.h
|
||||
${TRACY_PUBLIC_DIR}/client/TracyCallstack.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyCpuid.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyDebug.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyDxt1.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyFastVector.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyLock.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyProfiler.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyRingBuffer.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyScoped.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyStringHelpers.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracySysPower.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracySysTime.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracySysTrace.hpp
|
||||
${TRACY_PUBLIC_DIR}/client/TracyThread.hpp)
|
||||
|
||||
set(common_includes
|
||||
${TRACY_PUBLIC_DIR}/common/tracy_lz4.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/tracy_lz4hc.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyAlign.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyAlloc.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyApi.h
|
||||
${TRACY_PUBLIC_DIR}/common/TracyColor.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyForceInline.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyMutex.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyProtocol.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyQueue.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracySocket.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyStackFrames.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracySystem.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyUwp.hpp
|
||||
${TRACY_PUBLIC_DIR}/common/TracyYield.hpp)
|
||||
|
||||
install(TARGETS TracyClient
|
||||
EXPORT TracyConfig
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT lib)
|
||||
if(TRACY_Fortran)
|
||||
install(TARGETS TracyClientF90
|
||||
EXPORT TracyConfig
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT lib)
|
||||
endif()
|
||||
# Export targets to build tree root
|
||||
export(TARGETS TracyClient
|
||||
NAMESPACE Tracy::
|
||||
FILE ${CMAKE_BINARY_DIR}/TracyTargets.cmake)
|
||||
if(TRACY_Fortran)
|
||||
export(TARGETS TracyClientF90
|
||||
NAMESPACE Tracy::
|
||||
APPEND
|
||||
FILE ${CMAKE_BINARY_DIR}/TracyTargets.cmake)
|
||||
endif()
|
||||
install(FILES ${tracy_includes}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tracy/tracy)
|
||||
install(FILES ${client_includes}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tracy/client)
|
||||
install(FILES ${common_includes}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tracy/common)
|
||||
if(TRACY_Fortran)
|
||||
if(${CMAKE_Fortran_COMPILER_ID} MATCHES "Cray")
|
||||
install(FILES ${PROJECT_BINARY_DIR}/TRACY.mod
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tracy)
|
||||
else()
|
||||
install(FILES ${PROJECT_BINARY_DIR}/tracy.mod
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tracy)
|
||||
endif()
|
||||
endif()
|
||||
install(EXPORT TracyConfig
|
||||
NAMESPACE Tracy::
|
||||
FILE TracyTargets.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME})
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/TracyConfig.cmake"
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME})
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TracyConfig.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME})
|
||||
|
||||
option(TRACY_CLIENT_PYTHON "Whether to build Tracy python client library" OFF)
|
||||
|
||||
if(TRACY_CLIENT_PYTHON)
|
||||
if(TRACY_STATIC)
|
||||
message(FATAL_ERROR "Python-bindings require a shared client library")
|
||||
endif()
|
||||
|
||||
add_subdirectory(python)
|
||||
endif()
|
||||
@@ -1,6 +0,0 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include(CMakeFindDependencyMacro)
|
||||
find_dependency(Threads REQUIRED)
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/TracyTargets.cmake")
|
||||
6
LICENSE
@@ -1,7 +1,7 @@
|
||||
Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the
|
||||
3-clause BSD license.
|
||||
tl;dr: Tracy is licensed under BSD 3-clause license.
|
||||
|
||||
Copyright (c) 2017-2025, Bartosz Taudul <wolf@nereid.pl>
|
||||
|
||||
Copyright (c) 2017, Bartosz Taudul <wolf.pld@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
||||
138
README.md
@@ -1,28 +1,126 @@
|
||||
# Tracy Profiler
|
||||
|
||||
[](https://github.com/sponsors/wolfpld/)
|
||||
|
||||
### A real time, nanosecond resolution, remote telemetry, hybrid frame and sampling profiler for games and other applications.
|
||||
|
||||
Tracy supports profiling CPU (Direct support is provided for C, C++, Lua, Python and Fortran integration. At the same time, third-party bindings to many other languages exist on the internet, such as [Rust](https://github.com/nagisa/rust_tracy_client), [Zig](https://github.com/tealsnow/zig-tracy), [C#](https://github.com/clibequilibrium/Tracy-CSharp), [OCaml](https://github.com/imandra-ai/ocaml-tracy), [Odin](https://github.com/oskarnp/odin-tracy), etc.), GPU (All major graphic APIs: OpenGL, Vulkan, Direct3D 11/12, Metal, OpenCL, CUDA.), memory allocations, locks, context switches, automatically attribute screenshots to captured frames, and much more.
|
||||
|
||||
- [Documentation](https://github.com/wolfpld/tracy/releases/latest/download/tracy.pdf) for usage and build process instructions
|
||||
- [Releases](https://github.com/wolfpld/tracy/releases) containing the documentation (`tracy.pdf`) and compiled Windows x64 binaries (`Tracy-<version>.7z`) as assets
|
||||
- [Changelog](NEWS)
|
||||
- [Interactive demo](https://tracy.nereid.pl/)
|
||||
Tracy is a real time, nanosecond resolution frame profiler that can be used for remote or embedded telemetry of your application. It can profile both CPU (C++, Lua) and GPU (OpenGL). It also can display locks held by threads and their interactions with each other.
|
||||
|
||||

|
||||
|
||||

|
||||
Tracy requires compiler support for C++11, Thread Local Storage and a way to workaround static initialization order fiasco. There are no other requirements. The following platforms are confirmed to be working (this is not a complete list):
|
||||
|
||||

|
||||
- Windows (x86, x64)
|
||||
- Linux (x86, x64, ARM, ARM64)
|
||||
- Android (ARM, x86)
|
||||
- FreeBSD (x64)
|
||||
- Cygwin (x64)
|
||||
- WSL (x64)
|
||||
- OSX (x64)
|
||||
|
||||
[An Introduction to Tracy Profiler in C++ - Marcos Slomp - CppCon 2023](https://youtu.be/ghXk3Bk5F2U?t=37)
|
||||
The following compilers are supported:
|
||||
|
||||
[Introduction to Tracy Profiler v0.2](https://www.youtube.com/watch?v=fB5B46lbapc)
|
||||
[New features in Tracy Profiler v0.3](https://www.youtube.com/watch?v=3SXpDpDh2Uo)
|
||||
[New features in Tracy Profiler v0.4](https://www.youtube.com/watch?v=eAkgkaO8B9o)
|
||||
[New features in Tracy Profiler v0.5](https://www.youtube.com/watch?v=P6E7qLMmzTQ)
|
||||
[New features in Tracy Profiler v0.6](https://www.youtube.com/watch?v=uJkrFgriuOo)
|
||||
[New features in Tracy Profiler v0.7](https://www.youtube.com/watch?v=_hU7vw00MZ4)
|
||||
[New features in Tracy Profiler v0.8](https://www.youtube.com/watch?v=30wpRpHTTag)
|
||||
- MSVC
|
||||
- gcc
|
||||
- clang
|
||||
|
||||
### High-level overview
|
||||
|
||||

|
||||
|
||||
Tracy is split into client and server side. The client side collects events using a high-efficiency queue and awaits for an incoming connection. The server part connects to client and receives collected data from the client, which is then reconstructed into a viewable timeline. The transfer is performed using a TCP connection.
|
||||
|
||||
### Performance impact
|
||||
|
||||
To check how much slowdown is introduced by using tracy, I have profiled [etcpak](https://bitbucket.org/wolfpld/etcpak), which is the fastest ETC texture compression utility there is. I used an 8192×8192 test image as input data and instrumented everything down to the 4×4 pixel block compression function (that's 4 million blocks to compress). It should be noted that tracy needs to calibrate its internal timers at each run. This introduces a delay of 115 ms (on my machine), which is negligible when doing lengthy profiling runs, but it skews the results of etcpak timing. The following times have this delay subtracted, to give focus on zone collection impact, which is the thing that really matters here.
|
||||
|
||||
| Scenario | Zones | Clean run | Profiling run | Difference |
|
||||
|-------------------------------------------------------|---------|-----------|---------------|------------|
|
||||
| Compression of an image to ETC1 format | 4194568 | 0.94 s | 1.003 s | +0.063 s |
|
||||
| Compression of an image to ETC2 format, with mip-maps | 5592822 | 1.034 s | 1.119 s | +0.085 s |
|
||||
|
||||
In both scenarios the per-zone time cost is at ~15 ns. This is in line with the measured 8 ns single event collection time (each zone has to report start and end event).
|
||||
|
||||
## Usage instructions
|
||||
|
||||
#### Initial client setup
|
||||
|
||||
Copy files from `tracy/client` and `tracy/common` to your project. Add `tracy/TracyClient.cpp` to source files list. That's all. Tracy is now integrated into your application.
|
||||
|
||||
In the default configuration tracy is disabled. To enable it, add a `TRACY_ENABLE` define.
|
||||
|
||||
If you want to profile a short-lived application, add a `TRACY_NO_EXIT` define. In this configuration tracy will not exit until an incoming connection is made, even if the application has already finished.
|
||||
|
||||
#### Marking zones
|
||||
|
||||
To begin data collection, tracy requires that you manually instrument your application (automatic tracing of every entered function is not feasible due to the amount of data that would generate). All the user-facing interface is contained in the `tracy/Tracy.hpp` header file.
|
||||
|
||||
To slice the program's execution recording into frame-sized chunks, put the `FrameMark` macro after you have completed rendering the frame. Ideally that would be right after the swap buffers command. Note that this step is optional, as some applications (for example: a compression utility) do not have the concept of a frame.
|
||||
|
||||
To record a zone's execution time add the `ZoneScoped` macro at the beginning of the scope you want to measure. This will automatically record function name, source file name and location. Optionally you may use the `ZoneScopedC( 0xRRGGBB )` macro to set a custom color for the zone. Note that the color value will be constant in the recording (don't try to parametrize it). You may also set a custom name for the zone, using the `ZoneScopedN( name )` macro, where name is a string literal. Color and name may be combined by using the `ZoneScopedNC( name, color )` macro.
|
||||
|
||||
Use the `ZoneText( const char* text, size_t size )` macro to add a custom text string that will be displayed along the zone information (for example, name of the file you are opening). Note that every time `ZoneText` is invoked, a memory allocation is performed to store an internal copy of the data. The provided string is not used by tracy after ZoneText returns.
|
||||
|
||||
#### Marking locks
|
||||
|
||||
Tracy can collect and display lock interactions in threads.
|
||||
|
||||

|
||||
|
||||
To mark a lock (mutex) for event reporting, use the `TracyLockable( type, varname )` macro. Note that the lock must implement a [Lockable concept](http://en.cppreference.com/w/cpp/concept/Lockable) (i.e. there's no support for timed mutices). For a concrete example, you would replace the line `std::mutex m_lock` with `TracyLockable( std::mutex, m_lock )`. You may use `TracyLockableN( type, varname, description )` to provide a custom lock name.
|
||||
|
||||
The standard `std::lock_guard` and `std::unique_lock` wrappers should use the `LockableBase( type )` macro for their template parameter (unless you're using C++17, with improved template argument deduction). For example, `std::lock_guard<LockableBase( std::mutex )> lock( m_lock )`.
|
||||
|
||||
To mark the location of lock being held, use the `LockMark( varname )` macro, after you have obtained the lock. Note that the varname must be a lock variable (a reference is also valid). This step is optional.
|
||||
|
||||
Similarly, you can use `TracySharedLockable`, `TracySharedLockableN` and `SharedLockableBase` to mark locks implementing the [SharedMutex concept](http://en.cppreference.com/w/cpp/concept/SharedMutex). Note that while there's no support for timed mutices in tracy, both `std::shared_mutex` and `std::shared_timed_mutex` may be used.
|
||||
|
||||
#### Plotting data
|
||||
|
||||
Tracy is able to capture and draw value changes over time. You may use it to analyse memory usage, draw call count, etc. To report data, use the `TracyPlot( name, value )` macro.
|
||||
|
||||

|
||||
|
||||
#### Message log
|
||||
|
||||
Fast navigation in large data set and correlation of zones with what was happening in application may be difficult. To ease these issues tracy provides a message log functionality. You can send messages (for example, your typical debug output) using the `TracyMessage( text, size )` macro (tracy will allocate memory for message storage). Alternatively, use `TracyMessageL( text )` for string literal messages. Messages are displayed on a chronological list and in the zone view.
|
||||
|
||||

|
||||
|
||||
#### Running the server
|
||||
|
||||
The easiest way to get going is to build the standalone server, available in the `standalone` directory. You can connect to localhost or remote clients and view the collected data right away.
|
||||
|
||||
Alternatively, you may want to embed the server in your application, the same which is running the client part of tracy. Doing so requires that you also include the `server` and `imgui` directories. Include the `tracy/server/TracyView.hpp` header file, create an instance of the `tracy::View` class and call its `Draw()` method every frame. Unfortunately, there's also the hard part - you need to integrate the imgui library into the innards of your program. How to do so is outside the scope of this document.
|
||||
|
||||
#### Lua support
|
||||
|
||||
To profile Lua code using tracy, include the `tracy/TracyLua.hpp` header file in your Lua wrapper and execute `tracy::LuaRegister( lua_State* )` function to add instrumentation support. In your Lua code, add `tracy.ZoneBegin()` and `tracy.ZoneEnd()` calls to mark execution zones. Double check if you have included all return paths! Use `tracy.ZoneBeginN( name )` to set zone name. Use `tracy.ZoneText( text )` to set zone text. Use `tracy.Message( text )` to send messages.
|
||||
|
||||
Even if tracy is disabled, you still have to pay the no-op function call cost. To prevent that you may want to use the `tracy::LuaRemove( char* script )` function, which will replace instrumentation calls with whitespace. This function does nothing if profiler is enabled.
|
||||
|
||||
#### GPU profiling
|
||||
|
||||
Tracy provides bindings for profiling OpenGL execution time on GPU. To use it, you will need to include the `tracy/TracyOpenGL.hpp` header file and declare each of your rendering contexts using the `TracyGpuContext` macro (typically you will only have one context). Tracy expects no more than one context per thread and no context migration.
|
||||
|
||||
To mark a GPU zone use the `TracyGpuZone( name )` macro, where `name` is a string literal name of the zone. Alternatively you may use `TracyGpuZoneC( name, color )` to specify zone color.
|
||||
|
||||
You also need to periodically collect the GPU events using the `TracyGpuCollect` macro. A good place to do it is after swap buffers function call.
|
||||
|
||||
GPU profiling is not supported on OSX, iOS (because Apple is unable to implement standards properly). Android devices do work, if GPU drivers are not broken. Disjoint events are not currently handled, so some readings may be a bit spotty. NVIDIA drivers are unable to provide consistent timing results when two OpenGL contexts are used simultaneously.
|
||||
|
||||
## Good practices
|
||||
|
||||
- Remember to set thread names for proper identification of threads. You may use the functions exposed in the `tracy/common/TracySystem.hpp` header to do so. Note that the max thread name length in pthreads is limited to 15 characters. Proper thread naming support is available in MSVC only if you are using Windows SDK 10.0.15063 or newer.
|
||||
- Enable the MSVC String Pooling option (`/GF`) or the gcc counterpart, `-fmerge-constants`. This will reduce number of queries the server needs to perform to the client. Note that these options are enabled in optimized builds by default.
|
||||
|
||||
## Practical considerations
|
||||
|
||||
Tracy's time measurement precision is not infinite. It's only as good as the system-provided timers are.
|
||||
|
||||
- On the embedded ARM-based systems you can expect to have 1 µs time resolution. Some hardware is able to provide tens to hundreds nanoseconds resolution.
|
||||
- On x86 the time resolution depends on the hardware implementation of the RDTSCP instruction and typically is a couple of nanoseconds. This may vary from one micro-architecture to another and requires a fairly modern (Sandy Bridge) processor for reliable results.
|
||||
|
||||
While the data collection is very lightweight, it is not completely free. Each recorded zone event has a cost, which tracy tries to calculate and display on the timeline view, as a red zone. Note that this is an *approximation* of the real cost, which ignores many important factors. For example, you can't determine the impact of cache effects. The CPU frequency may be reduced in some situations, which will increase the recorded time, but the displayed profiler cost will not compensate for that.
|
||||
|
||||

|
||||
|
||||
Lua instrumentation needs to perform additional work (including memory allocation) to store source location. This approximately doubles the data collection cost.
|
||||
|
||||
You may use named colors predefined in `common/TracyColor.hpp` (included by `Tracy.hpp`). Visual reference: [wikipedia](https://en.wikipedia.org/wiki/X11_color_names).
|
||||
|
||||
62
Tracy.hpp
Normal file
@@ -0,0 +1,62 @@
|
||||
#ifndef __TRACY_HPP__
|
||||
#define __TRACY_HPP__
|
||||
|
||||
#include "common/TracyColor.hpp"
|
||||
#include "common/TracySystem.hpp"
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#define ZoneScoped
|
||||
#define ZoneScopedN(x)
|
||||
#define ZoneScopedC(x)
|
||||
#define ZoneScopedNC(x,y)
|
||||
|
||||
#define ZoneText(x,y)
|
||||
#define ZoneName(x)
|
||||
|
||||
#define FrameMark
|
||||
|
||||
#define TracyLockable( type, varname ) type varname;
|
||||
#define TracyLockableN( type, varname, desc ) type varname;
|
||||
#define TracySharedLockable( type, varname ) type varname;
|
||||
#define TracySharedLockableN( type, varname, desc ) type varname;
|
||||
#define LockableBase( type ) type
|
||||
#define SharedLockableBase( type ) type
|
||||
#define LockMark(x) (void)x;
|
||||
|
||||
#define TracyPlot(x,y)
|
||||
|
||||
#define TracyMessage(x,y)
|
||||
#define TracyMessageL(x)
|
||||
|
||||
#else
|
||||
|
||||
#include "client/TracyLock.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "client/TracyScoped.hpp"
|
||||
|
||||
#define ZoneScoped static const tracy::SourceLocation __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone ___tracy_scoped_zone( &__tracy_source_location );
|
||||
#define ZoneScopedN( name ) static const tracy::SourceLocation __tracy_source_location { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone ___tracy_scoped_zone( &__tracy_source_location );
|
||||
#define ZoneScopedC( color ) static const tracy::SourceLocation __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone ___tracy_scoped_zone( &__tracy_source_location );
|
||||
#define ZoneScopedNC( name, color ) static const tracy::SourceLocation __tracy_source_location { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone ___tracy_scoped_zone( &__tracy_source_location );
|
||||
|
||||
#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size );
|
||||
|
||||
#define FrameMark tracy::Profiler::FrameMark();
|
||||
|
||||
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocation* { static const tracy::SourceLocation srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocation* { static const tracy::SourceLocation srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocation* { static const tracy::SourceLocation srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocation* { static const tracy::SourceLocation srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
|
||||
#define LockableBase( type ) tracy::Lockable<type>
|
||||
#define SharedLockableBase( type ) tracy::SharedLockable<type>
|
||||
#define LockMark( varname ) static const tracy::SourceLocation __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
|
||||
|
||||
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val );
|
||||
|
||||
#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size );
|
||||
#define TracyMessageL( txt ) tracy::Profiler::Message( txt );
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
25
TracyClient.cpp
Normal file
@@ -0,0 +1,25 @@
|
||||
//
|
||||
// Tracy profiler
|
||||
// ----------------
|
||||
//
|
||||
// For fast integration, compile and
|
||||
// link with this source file (and none
|
||||
// other).
|
||||
//
|
||||
|
||||
// Define TRACY_ENABLE to enable profiler.
|
||||
|
||||
#include "common/TracySystem.cpp"
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
|
||||
#include "client/TracyProfiler.cpp"
|
||||
#include "common/tracy_lz4.cpp"
|
||||
#include "common/TracySocket.cpp"
|
||||
#include "client/tracy_rpmalloc.cpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma comment(lib, "ws2_32.lib")
|
||||
#endif
|
||||
|
||||
#endif
|
||||
275
TracyLua.hpp
Normal file
@@ -0,0 +1,275 @@
|
||||
#ifndef __TRACYLUA_HPP__
|
||||
#define __TRACYLUA_HPP__
|
||||
|
||||
// Include this file after you include lua headers.
|
||||
|
||||
#ifndef TRACY_ENABLE
|
||||
|
||||
#include <string.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
static inline int noop( lua_State* L ) { return 0; }
|
||||
}
|
||||
|
||||
static inline void LuaRegister( lua_State* L )
|
||||
{
|
||||
lua_newtable( L );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBegin" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneBeginN" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneEnd" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "ZoneText" );
|
||||
lua_pushcfunction( L, detail::noop );
|
||||
lua_setfield( L, -2, "Message" );
|
||||
lua_setglobal( L, "tracy" );
|
||||
}
|
||||
|
||||
static inline char* FindEnd( char* ptr )
|
||||
{
|
||||
unsigned int cnt = 1;
|
||||
while( cnt != 0 )
|
||||
{
|
||||
if( *ptr == '(' ) cnt++;
|
||||
else if( *ptr == ')' ) cnt--;
|
||||
ptr++;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void LuaRemove( char* script )
|
||||
{
|
||||
while( *script )
|
||||
{
|
||||
if( strncmp( script, "tracy.", 6 ) == 0 )
|
||||
{
|
||||
if( strncmp( script + 6, "Zone", 4 ) == 0 )
|
||||
{
|
||||
if( strncmp( script + 10, "End()", 5 ) == 0 )
|
||||
{
|
||||
memset( script, ' ', 15 );
|
||||
script += 15;
|
||||
}
|
||||
else if( strncmp( script + 10, "Begin()", 7 ) == 0 )
|
||||
{
|
||||
memset( script, ' ', 17 );
|
||||
script += 17;
|
||||
}
|
||||
else if( strncmp( script + 10, "Text(", 5 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 15 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else if( strncmp( script + 10, "BeginN(", 7 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 17 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else
|
||||
{
|
||||
script += 10;
|
||||
}
|
||||
}
|
||||
else if( strncmp( script + 6, "Message(", 8 ) == 0 )
|
||||
{
|
||||
auto end = FindEnd( script + 14 );
|
||||
memset( script, ' ', end - script );
|
||||
script = end;
|
||||
}
|
||||
else
|
||||
{
|
||||
script += 6;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
script++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include "common/TracyColor.hpp"
|
||||
#include "common/TracySystem.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
static inline int LuaZoneBegin( lua_State* L )
|
||||
{
|
||||
const uint32_t color = Color::DeepSkyBlue3;
|
||||
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
|
||||
const uint32_t line = dbg.currentline;
|
||||
const auto func = dbg.name ? dbg.name : dbg.short_src;
|
||||
const auto fsz = strlen( func );
|
||||
const auto ssz = strlen( dbg.source );
|
||||
|
||||
// Data layout:
|
||||
// 4b payload size
|
||||
// 4b color
|
||||
// 4b source line
|
||||
// fsz function name
|
||||
// 1b null terminator
|
||||
// ssz source file name
|
||||
// 1b null terminator
|
||||
const uint32_t sz = 4 + 4 + 4 + fsz + 1 + ssz + 1;
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 4 );
|
||||
memcpy( ptr + 4, &color, 4 );
|
||||
memcpy( ptr + 8, &line, 4 );
|
||||
memcpy( ptr + 12, func, fsz+1 );
|
||||
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneBeginAllocSrcLoc;
|
||||
item->zoneBegin.time = Profiler::GetTime( item->zoneBegin.cpu );
|
||||
item->zoneBegin.thread = GetThreadHandle();
|
||||
item->zoneBegin.srcloc = (uint64_t)ptr;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneBeginN( lua_State* L )
|
||||
{
|
||||
const uint32_t color = Color::DeepSkyBlue3;
|
||||
|
||||
lua_Debug dbg;
|
||||
lua_getstack( L, 1, &dbg );
|
||||
lua_getinfo( L, "Snl", &dbg );
|
||||
|
||||
const uint32_t line = dbg.currentline;
|
||||
const auto func = dbg.name ? dbg.name : dbg.short_src;
|
||||
size_t nsz;
|
||||
const auto name = lua_tolstring( L, 1, &nsz );
|
||||
const auto fsz = strlen( func );
|
||||
const auto ssz = strlen( dbg.source );
|
||||
|
||||
// Data layout:
|
||||
// 4b payload size
|
||||
// 4b color
|
||||
// 4b source line
|
||||
// fsz function name
|
||||
// 1b null terminator
|
||||
// ssz source file name
|
||||
// 1b null terminator
|
||||
// nsz zone name
|
||||
const uint32_t sz = 4 + 4 + 4 + fsz + 1 + ssz + 1 + nsz;
|
||||
auto ptr = (char*)tracy_malloc( sz );
|
||||
memcpy( ptr, &sz, 4 );
|
||||
memcpy( ptr + 4, &color, 4 );
|
||||
memcpy( ptr + 8, &line, 4 );
|
||||
memcpy( ptr + 12, func, fsz+1 );
|
||||
memcpy( ptr + 12 + fsz + 1, dbg.source, ssz + 1 );
|
||||
memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nsz );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneBeginAllocSrcLoc;
|
||||
item->zoneBegin.time = Profiler::GetTime( item->zoneBegin.cpu );
|
||||
item->zoneBegin.thread = GetThreadHandle();
|
||||
item->zoneBegin.srcloc = (uint64_t)ptr;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneEnd( lua_State* L )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneEnd;
|
||||
item->zoneEnd.time = Profiler::GetTime( item->zoneEnd.cpu );
|
||||
item->zoneEnd.thread = GetThreadHandle();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaZoneText( lua_State* L )
|
||||
{
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneText;
|
||||
item->zoneText.thread = GetThreadHandle();
|
||||
item->zoneText.text = (uint64_t)ptr;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int LuaMessage( lua_State* L )
|
||||
{
|
||||
auto txt = lua_tostring( L, 1 );
|
||||
const auto size = strlen( txt );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::Message;
|
||||
item->message.time = Profiler::GetTime();
|
||||
item->message.thread = GetThreadHandle();
|
||||
item->message.text = (uint64_t)ptr;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static inline void LuaRegister( lua_State* L )
|
||||
{
|
||||
lua_newtable( L );
|
||||
lua_pushcfunction( L, detail::LuaZoneBegin );
|
||||
lua_setfield( L, -2, "ZoneBegin" );
|
||||
lua_pushcfunction( L, detail::LuaZoneBeginN );
|
||||
lua_setfield( L, -2, "ZoneBeginN" );
|
||||
lua_pushcfunction( L, detail::LuaZoneEnd );
|
||||
lua_setfield( L, -2, "ZoneEnd" );
|
||||
lua_pushcfunction( L, detail::LuaZoneText );
|
||||
lua_setfield( L, -2, "ZoneText" );
|
||||
lua_pushcfunction( L, detail::LuaMessage );
|
||||
lua_setfield( L, -2, "Message" );
|
||||
lua_setglobal( L, "tracy" );
|
||||
}
|
||||
|
||||
static inline void LuaRemove( char* script ) {}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
181
TracyOpenGL.hpp
Normal file
@@ -0,0 +1,181 @@
|
||||
#ifndef __TRACYOPENGL_HPP__
|
||||
#define __TRACYOPENGL_HPP__
|
||||
|
||||
// Include this file after you include OpenGL 3.2 headers.
|
||||
|
||||
#if !defined TRACY_ENABLE || defined __APPLE__
|
||||
|
||||
#define TracyGpuContext
|
||||
#define TracyGpuZone(x)
|
||||
#define TracyGpuZoneC(x,y)
|
||||
#define TracyGpuCollect
|
||||
|
||||
#else
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "Tracy.hpp"
|
||||
#include "client/TracyProfiler.hpp"
|
||||
#include "common/TracyAlloc.hpp"
|
||||
|
||||
#define TracyGpuContext tracy::s_gpuCtx.ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::s_gpuCtx.ptr) tracy::GpuCtx;
|
||||
#define TracyGpuZone( name ) static const tracy::SourceLocation __tracy_gpu_source_location { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope ___tracy_gpu_zone( &__tracy_gpu_source_location );
|
||||
#define TracyGpuZoneC( name, color ) static const tracy::SourceLocation __tracy_gpu_source_location { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope ___tracy_gpu_zone( &__tracy_gpu_source_location );
|
||||
#define TracyGpuCollect tracy::s_gpuCtx.ptr->Collect();
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
extern std::atomic<uint16_t> s_gpuCtxCounter;
|
||||
|
||||
class GpuCtx
|
||||
{
|
||||
friend class GpuCtxScope;
|
||||
|
||||
enum { QueryCount = 64 * 1024 };
|
||||
|
||||
public:
|
||||
GpuCtx()
|
||||
: m_context( s_gpuCtxCounter.fetch_add( 1, std::memory_order_relaxed ) )
|
||||
, m_head( 0 )
|
||||
, m_tail( 0 )
|
||||
{
|
||||
glGenQueries( QueryCount, m_query );
|
||||
|
||||
int64_t tgpu;
|
||||
glGetInteger64v( GL_TIMESTAMP, &tgpu );
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
|
||||
GLint bits;
|
||||
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::GpuNewContext;
|
||||
item->gpuNewContext.cpuTime = tcpu;
|
||||
item->gpuNewContext.gpuTime = tgpu;
|
||||
item->gpuNewContext.thread = GetThreadHandle();
|
||||
item->gpuNewContext.context = m_context;
|
||||
item->gpuNewContext.accuracyBits = bits;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
void Collect()
|
||||
{
|
||||
ZoneScopedC( Color::Red4 );
|
||||
|
||||
auto start = m_tail;
|
||||
auto end = m_head + QueryCount;
|
||||
auto cnt = ( end - start ) % QueryCount;
|
||||
while( cnt > 1 )
|
||||
{
|
||||
auto mid = start + cnt / 2;
|
||||
GLint available;
|
||||
glGetQueryObjectiv( m_query[mid % QueryCount], GL_QUERY_RESULT_AVAILABLE, &available );
|
||||
if( available )
|
||||
{
|
||||
start = mid;
|
||||
}
|
||||
else
|
||||
{
|
||||
end = mid;
|
||||
}
|
||||
cnt = ( end - start ) % QueryCount;
|
||||
}
|
||||
|
||||
start %= QueryCount;
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
|
||||
while( m_tail != start )
|
||||
{
|
||||
uint64_t time;
|
||||
glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
|
||||
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::GpuTime;
|
||||
item->gpuTime.gpuTime = (int64_t)time;
|
||||
item->gpuTime.context = m_context;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
m_tail = ( m_tail + 1 ) % QueryCount;
|
||||
}
|
||||
|
||||
{
|
||||
int64_t tgpu;
|
||||
glGetInteger64v( GL_TIMESTAMP, &tgpu );
|
||||
int64_t tcpu = Profiler::GetTime();
|
||||
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::GpuResync;
|
||||
item->gpuResync.cpuTime = tcpu;
|
||||
item->gpuResync.gpuTime = tgpu;
|
||||
item->gpuResync.context = m_context;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
tracy_force_inline unsigned int NextQueryId()
|
||||
{
|
||||
const auto id = m_head;
|
||||
m_head = ( m_head + 1 ) % QueryCount;
|
||||
assert( m_head != m_tail );
|
||||
return m_query[id];
|
||||
}
|
||||
|
||||
tracy_force_inline uint16_t GetId() const
|
||||
{
|
||||
return m_context;
|
||||
}
|
||||
|
||||
unsigned int m_query[QueryCount];
|
||||
uint16_t m_context;
|
||||
|
||||
unsigned int m_head;
|
||||
unsigned int m_tail;
|
||||
};
|
||||
|
||||
extern thread_local GpuCtxWrapper s_gpuCtx;
|
||||
|
||||
class GpuCtxScope
|
||||
{
|
||||
public:
|
||||
tracy_force_inline GpuCtxScope( const SourceLocation* srcloc )
|
||||
{
|
||||
glQueryCounter( s_gpuCtx.ptr->NextQueryId(), GL_TIMESTAMP );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::GpuZoneBegin;
|
||||
item->gpuZoneBegin.cpuTime = Profiler::GetTime();
|
||||
item->gpuZoneBegin.srcloc = (uint64_t)srcloc;
|
||||
item->gpuZoneBegin.context = s_gpuCtx.ptr->GetId();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
tracy_force_inline ~GpuCtxScope()
|
||||
{
|
||||
glQueryCounter( s_gpuCtx.ptr->NextQueryId(), GL_TIMESTAMP );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::GpuZoneEnd;
|
||||
item->gpuZoneEnd.cpuTime = Profiler::GetTime();
|
||||
item->gpuZoneEnd.context = s_gpuCtx.ptr->GetId();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,28 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
|
||||
option(NO_STATISTICS "Disable calculation of statistics" ON)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
|
||||
project(
|
||||
tracy-capture
|
||||
LANGUAGES C CXX
|
||||
VERSION ${TRACY_VERSION_STRING}
|
||||
)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
|
||||
|
||||
set(PROGRAM_FILES
|
||||
src/capture.cpp
|
||||
)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
|
||||
set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})
|
||||
|
||||
install(TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
12
capture/build/unix/Makefile
Normal file
@@ -0,0 +1,12 @@
|
||||
all: debug
|
||||
|
||||
debug:
|
||||
@+make -f debug.mk all
|
||||
|
||||
release:
|
||||
@+make -f release.mk all
|
||||
|
||||
clean:
|
||||
@+make -f build.mk clean
|
||||
|
||||
.PHONY: all clean debug release
|
||||
51
capture/build/unix/build.mk
Normal file
@@ -0,0 +1,51 @@
|
||||
CFLAGS +=
|
||||
CXXFLAGS := $(CFLAGS) -std=gnu++14
|
||||
DEFINES +=
|
||||
INCLUDES :=
|
||||
LIBS := -lpthread
|
||||
IMAGE := capture
|
||||
|
||||
FILTER :=
|
||||
|
||||
BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(IMAGE).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(IMAGE).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
|
||||
|
||||
SRC := $(filter-out $(FILTER),$(BASE))
|
||||
SRC2 := $(filter-out $(FILTER),$(BASE2))
|
||||
|
||||
OBJ := $(SRC:%.cpp=%.o)
|
||||
OBJ2 := $(SRC2:%.c=%.o)
|
||||
|
||||
all: $(IMAGE)
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
%.d : %.cpp
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
%.o: %.c
|
||||
$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
|
||||
|
||||
%.d : %.c
|
||||
@echo Resolving dependencies of $<
|
||||
@mkdir -p $(@D)
|
||||
@$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
|
||||
sed 's,.*\.o[ :]*,$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
|
||||
rm -f $@.$$$$
|
||||
|
||||
$(IMAGE): $(OBJ) $(OBJ2)
|
||||
$(CXX) $(CXXFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(LIBS) -o $@
|
||||
|
||||
ifneq "$(MAKECMDGOALS)" "clean"
|
||||
-include $(SRC:.cpp=.d) $(SRC2:.c=.d)
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -f $(OBJ) $(OBJ2) $(SRC:.cpp=.d) $(SRC2:.c=.d) $(IMAGE)
|
||||
|
||||
.PHONY: clean all
|
||||
10
capture/build/unix/debug.mk
Normal file
@@ -0,0 +1,10 @@
|
||||
ARCH := $(shell uname -m)
|
||||
|
||||
CFLAGS := -g3 -Wall
|
||||
DEFINES := -DDEBUG
|
||||
|
||||
ifeq ($(ARCH),x86_64)
|
||||
CFLAGS += -msse4.1
|
||||
endif
|
||||
|
||||
include build.mk
|
||||
10
capture/build/unix/release.mk
Normal file
@@ -0,0 +1,10 @@
|
||||
ARCH := $(shell uname -m)
|
||||
|
||||
CFLAGS := -O3 -s -fomit-frame-pointer
|
||||
DEFINES := -DNDEBUG
|
||||
|
||||
ifeq ($(ARCH),x86_64)
|
||||
CFLAGS += -msse4.1
|
||||
endif
|
||||
|
||||
include build.mk
|
||||
25
capture/build/win32/capture.sln
Normal file
@@ -0,0 +1,25 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.27428.2002
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "capture", "capture.vcxproj", "{447D58BF-94CD-4469-BB90-549C05D03E00}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Debug|x64.Build.0 = Debug|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.ActiveCfg = Release|x64
|
||||
{447D58BF-94CD-4469-BB90-549C05D03E00}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {3E51386C-43EA-44AC-9F24-AFAFE4D63ADE}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
163
capture/build/win32/capture.vcxproj
Normal file
@@ -0,0 +1,163 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>15.0</VCProjectVersion>
|
||||
<ProjectGuid>{447D58BF-94CD-4469-BB90-549C05D03E00}</ProjectGuid>
|
||||
<RootNamespace>capture</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<PreprocessorDefinitions>_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
<PreprocessorDefinitions>NDEBUG;_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;WIN32_LEAN_AND_MEAN;NOMINMAX;_USE_MATH_DEFINES;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<SubSystem>Console</SubSystem>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\TracySocket.cpp" />
|
||||
<ClCompile Include="..\..\..\common\TracySystem.cpp" />
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyMemory.cpp" />
|
||||
<ClCompile Include="..\..\..\server\TracyWorker.cpp" />
|
||||
<ClCompile Include="..\..\src\capture.cpp" />
|
||||
<ClCompile Include="..\..\src\getopt.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\TracyAlloc.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyColor.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyForceInline.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyProtocol.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracyQueue.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySocket.hpp" />
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp" />
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyEvent.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyMemory.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracySlab.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyVector.hpp" />
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp" />
|
||||
<ClInclude Include="..\..\..\server\tracy_benaphore.h" />
|
||||
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp" />
|
||||
<ClInclude Include="..\..\..\server\tracy_sema.h" />
|
||||
<ClInclude Include="..\..\src\getopt.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
99
capture/build/win32/capture.vcxproj.filters
Normal file
@@ -0,0 +1,99 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="src">
|
||||
<UniqueIdentifier>{729c80ee-4d26-4a5e-8f1f-6c075783eb56}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="server">
|
||||
<UniqueIdentifier>{cf23ef7b-7694-4154-830b-00cf053350ea}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="common">
|
||||
<UniqueIdentifier>{e39d3623-47cd-4752-8da9-3ea324f964c1}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\common\tracy_lz4.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\TracySocket.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\common\TracySystem.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyMemory.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\server\TracyWorker.cpp">
|
||||
<Filter>server</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\capture.cpp">
|
||||
<Filter>src</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\getopt.c">
|
||||
<Filter>src</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\..\common\tracy_lz4.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyAlloc.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyColor.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyForceInline.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyProtocol.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracyQueue.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracySocket.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\common\TracySystem.hpp">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\tracy_benaphore.h">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\tracy_flat_hash_map.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\tracy_sema.h">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyEvent.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyFileWrite.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyMemory.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyPopcnt.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracySlab.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyVector.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\..\server\TracyWorker.hpp">
|
||||
<Filter>server</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\src\getopt.h">
|
||||
<Filter>src</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -1,98 +1,105 @@
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
# include <io.h>
|
||||
#else
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <inttypes.h>
|
||||
#include <mutex>
|
||||
#include <signal.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "../../public/common/TracyProtocol.hpp"
|
||||
#include "../../public/common/TracyStackFrames.hpp"
|
||||
#include "../../server/tracy_benaphore.h"
|
||||
#include "../../server/TracyFileWrite.hpp"
|
||||
#include "../../server/TracyMemory.hpp"
|
||||
#include "../../server/TracyPrint.hpp"
|
||||
#include "../../server/TracySysUtil.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
#include "getopt.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
# include "../../getopt/getopt.h"
|
||||
#endif
|
||||
|
||||
|
||||
// This atomic is written by a signal handler (SigInt). Traditionally that would
|
||||
// have had to be `volatile sig_atomic_t`, and annoyingly, `bool` was
|
||||
// technically not allowed there, even though in practice it would work.
|
||||
// The good thing with C++11 atomics is that we can use atomic<bool> instead
|
||||
// here and be on the actually supported path.
|
||||
static std::atomic<bool> s_disconnect { false };
|
||||
|
||||
void SigInt( int )
|
||||
static const char* TimeToString( int64_t ns )
|
||||
{
|
||||
// Relaxed order is closest to a traditional `volatile` write.
|
||||
// We don't need stronger ordering since this signal handler doesn't do
|
||||
// anything else that would need to be ordered relatively to this.
|
||||
s_disconnect.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
enum { Pool = 8 };
|
||||
static char bufpool[Pool][64];
|
||||
static int bufsel = 0;
|
||||
char* buf = bufpool[bufsel];
|
||||
bufsel = ( bufsel + 1 ) % Pool;
|
||||
|
||||
static bool s_isStdoutATerminal = false;
|
||||
|
||||
void InitIsStdoutATerminal() {
|
||||
#ifdef _WIN32
|
||||
s_isStdoutATerminal = _isatty( fileno( stdout ) );
|
||||
#else
|
||||
s_isStdoutATerminal = isatty( fileno( stdout ) );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool IsStdoutATerminal() { return s_isStdoutATerminal; }
|
||||
|
||||
#define ANSI_RESET "\033[0m"
|
||||
#define ANSI_BOLD "\033[1m"
|
||||
#define ANSI_BLACK "\033[30m"
|
||||
#define ANSI_RED "\033[31m"
|
||||
#define ANSI_GREEN "\033[32m"
|
||||
#define ANSI_YELLOW "\033[33m"
|
||||
#define ANSI_BLUE "\033[34m"
|
||||
#define ANSI_MAGENTA "\033[35m"
|
||||
#define ANSI_CYAN "\033[36m"
|
||||
#define ANSI_ERASE_LINE "\033[2K"
|
||||
|
||||
// Like printf, but if stdout is a terminal, prepends the output with
|
||||
// the given `ansiEscape` and appends ANSI_RESET.
|
||||
void AnsiPrintf( const char* ansiEscape, const char* format, ... ) {
|
||||
if( IsStdoutATerminal() )
|
||||
const char* sign = "";
|
||||
if( ns < 0 )
|
||||
{
|
||||
// Prepend ansiEscape and append ANSI_RESET.
|
||||
char buf[256];
|
||||
va_list args;
|
||||
va_start( args, format );
|
||||
vsnprintf( buf, sizeof buf, format, args );
|
||||
va_end( args );
|
||||
printf( "%s%s" ANSI_RESET, ansiEscape, buf );
|
||||
sign = "-";
|
||||
ns = -ns;
|
||||
}
|
||||
|
||||
if( ns < 1000 )
|
||||
{
|
||||
sprintf( buf, "%s%" PRIi64 " ns", sign, ns );
|
||||
}
|
||||
else if( ns < 1000ll * 1000 )
|
||||
{
|
||||
sprintf( buf, "%s%.2f us", sign, ns / 1000. );
|
||||
}
|
||||
else if( ns < 1000ll * 1000 * 1000 )
|
||||
{
|
||||
sprintf( buf, "%s%.2f ms", sign, ns / ( 1000. * 1000. ) );
|
||||
}
|
||||
else if( ns < 1000ll * 1000 * 1000 * 60 )
|
||||
{
|
||||
sprintf( buf, "%s%.2f s", sign, ns / ( 1000. * 1000. * 1000. ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Just a normal printf.
|
||||
va_list args;
|
||||
va_start( args, format );
|
||||
vfprintf( stdout, format, args );
|
||||
va_end( args );
|
||||
const auto m = int64_t( ns / ( 1000ll * 1000 * 1000 * 60 ) );
|
||||
const auto s = int64_t( ns - m * ( 1000ll * 1000 * 1000 * 60 ) );
|
||||
sprintf( buf, "%s%" PRIi64 ":%04.1f", sign, m, s / ( 1000. * 1000. * 1000. ) );
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
[[noreturn]] void Usage()
|
||||
static const char* RealToString( double val, bool separator )
|
||||
{
|
||||
printf( "Usage: capture -o output.tracy [-a address] [-p port] [-f] [-s seconds] [-m memlimit]\n" );
|
||||
enum { Pool = 8 };
|
||||
static char bufpool[Pool][64];
|
||||
static int bufsel = 0;
|
||||
char* buf = bufpool[bufsel];
|
||||
bufsel = ( bufsel + 1 ) % Pool;
|
||||
|
||||
sprintf( buf, "%f", val );
|
||||
auto ptr = buf;
|
||||
if( *ptr == '-' ) ptr++;
|
||||
|
||||
const auto vbegin = ptr;
|
||||
|
||||
if( separator )
|
||||
{
|
||||
while( *ptr != '\0' && *ptr != ',' && *ptr != '.' ) ptr++;
|
||||
auto end = ptr;
|
||||
while( *end != '\0' ) end++;
|
||||
auto sz = end - ptr;
|
||||
|
||||
while( ptr - vbegin > 3 )
|
||||
{
|
||||
ptr -= 3;
|
||||
memmove( ptr+1, ptr, sz );
|
||||
*ptr = ',';
|
||||
sz += 4;
|
||||
}
|
||||
}
|
||||
|
||||
while( *ptr != '\0' && *ptr != ',' && *ptr != '.' ) ptr++;
|
||||
|
||||
if( *ptr == '\0' ) return buf;
|
||||
while( *ptr != '\0' ) ptr++;
|
||||
ptr--;
|
||||
while( *ptr == '0' && *ptr != ',' && *ptr != '.' ) ptr--;
|
||||
if( *ptr != '.' && *ptr != ',' ) ptr++;
|
||||
*ptr = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
void Usage()
|
||||
{
|
||||
printf( "Usage: capture -a address -o output.tracy\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
@@ -106,17 +113,11 @@ int main( int argc, char** argv )
|
||||
}
|
||||
#endif
|
||||
|
||||
InitIsStdoutATerminal();
|
||||
|
||||
bool overwrite = false;
|
||||
const char* address = "127.0.0.1";
|
||||
const char* address = nullptr;
|
||||
const char* output = nullptr;
|
||||
int port = 8086;
|
||||
int seconds = -1;
|
||||
int64_t memoryLimit = -1;
|
||||
|
||||
int c;
|
||||
while( ( c = getopt( argc, argv, "a:o:p:fs:m:" ) ) != -1 )
|
||||
while( ( c = getopt( argc, argv, "a:o:" ) ) != -1 )
|
||||
{
|
||||
switch( c )
|
||||
{
|
||||
@@ -126,18 +127,6 @@ int main( int argc, char** argv )
|
||||
case 'o':
|
||||
output = optarg;
|
||||
break;
|
||||
case 'p':
|
||||
port = atoi( optarg );
|
||||
break;
|
||||
case 'f':
|
||||
overwrite = true;
|
||||
break;
|
||||
case 's':
|
||||
seconds = atoi(optarg);
|
||||
break;
|
||||
case 'm':
|
||||
memoryLimit = std::clamp( atoll( optarg ), 1ll, 999ll ) * tracy::GetPhysicalMemorySize() / 100;
|
||||
break;
|
||||
default:
|
||||
Usage();
|
||||
break;
|
||||
@@ -146,218 +135,46 @@ int main( int argc, char** argv )
|
||||
|
||||
if( !address || !output ) Usage();
|
||||
|
||||
struct stat st;
|
||||
if( stat( output, &st ) == 0 && !overwrite )
|
||||
{
|
||||
printf( "Output file %s already exists! Use -f to force overwrite.\n", output );
|
||||
return 4;
|
||||
}
|
||||
|
||||
FILE* test = fopen( output, "wb" );
|
||||
if( !test )
|
||||
{
|
||||
printf( "Cannot open output file %s for writing!\n", output );
|
||||
return 5;
|
||||
}
|
||||
fclose( test );
|
||||
unlink( output );
|
||||
|
||||
printf( "Connecting to %s:%i...", address, port );
|
||||
printf( "Connecting to %s...", address );
|
||||
fflush( stdout );
|
||||
tracy::Worker worker( address, port, memoryLimit );
|
||||
while( !worker.HasData() )
|
||||
{
|
||||
const auto handshake = worker.GetHandshakeStatus();
|
||||
if( handshake == tracy::HandshakeProtocolMismatch )
|
||||
{
|
||||
printf( "\nThe client you are trying to connect to uses incompatible protocol version.\nMake sure you are using the same Tracy version on both client and server.\n" );
|
||||
return 1;
|
||||
}
|
||||
if( handshake == tracy::HandshakeNotAvailable )
|
||||
{
|
||||
printf( "\nThe client you are trying to connect to is no longer able to sent profiling data,\nbecause another server was already connected to it.\nYou can do the following:\n\n 1. Restart the client application.\n 2. Rebuild the client application with on-demand mode enabled.\n" );
|
||||
return 2;
|
||||
}
|
||||
if( handshake == tracy::HandshakeDropped )
|
||||
{
|
||||
printf( "\nThe client you are trying to connect to has disconnected during the initial\nconnection handshake. Please check your network configuration.\n" );
|
||||
return 3;
|
||||
}
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
}
|
||||
printf( "\nQueue delay: %s\nTimer resolution: %s\n", tracy::TimeToString( worker.GetDelay() ), tracy::TimeToString( worker.GetResolution() ) );
|
||||
tracy::Worker worker( address );
|
||||
while( !worker.HasData() ) std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
printf( "\nQueue delay: %s\nTimer resolution: %s\n", TimeToString( worker.GetDelay() ), TimeToString( worker.GetResolution() ) );
|
||||
|
||||
#ifdef _WIN32
|
||||
signal( SIGINT, SigInt );
|
||||
#else
|
||||
struct sigaction sigint, oldsigint;
|
||||
memset( &sigint, 0, sizeof( sigint ) );
|
||||
sigint.sa_handler = SigInt;
|
||||
sigaction( SIGINT, &sigint, &oldsigint );
|
||||
#endif
|
||||
|
||||
const auto firstTime = worker.GetFirstTime();
|
||||
auto& lock = worker.GetMbpsDataLock();
|
||||
|
||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||
while( worker.IsConnected() )
|
||||
{
|
||||
// Relaxed order is sufficient here because `s_disconnect` is only ever
|
||||
// set by this thread or by the SigInt handler, and that handler does
|
||||
// nothing else than storing `s_disconnect`.
|
||||
if( s_disconnect.load( std::memory_order_relaxed ) )
|
||||
{
|
||||
worker.Disconnect();
|
||||
// Relaxed order is sufficient because only this thread ever reads
|
||||
// this value.
|
||||
s_disconnect.store(false, std::memory_order_relaxed );
|
||||
break;
|
||||
}
|
||||
|
||||
lock.lock();
|
||||
const auto mbps = worker.GetMbpsData().back();
|
||||
const auto compRatio = worker.GetCompRatio();
|
||||
const auto netTotal = worker.GetDataTransferred();
|
||||
lock.unlock();
|
||||
|
||||
// Output progress info only if destination is a TTY to avoid bloating
|
||||
// log files (so this is not just about usage of ANSI color codes).
|
||||
if( IsStdoutATerminal() )
|
||||
if( mbps < 0.1f )
|
||||
{
|
||||
const char* unit = "Mbps";
|
||||
float unitsPerMbps = 1.f;
|
||||
if( mbps < 0.1f )
|
||||
{
|
||||
unit = "Kbps";
|
||||
unitsPerMbps = 1000.f;
|
||||
}
|
||||
AnsiPrintf( ANSI_ERASE_LINE ANSI_CYAN ANSI_BOLD, "\r%7.2f %s", mbps * unitsPerMbps, unit );
|
||||
printf( " /");
|
||||
AnsiPrintf( ANSI_CYAN ANSI_BOLD, "%5.1f%%", compRatio * 100.f );
|
||||
printf( " =");
|
||||
AnsiPrintf( ANSI_YELLOW ANSI_BOLD, "%7.2f Mbps", mbps / compRatio );
|
||||
printf( " | ");
|
||||
AnsiPrintf( ANSI_YELLOW, "Tx: ");
|
||||
AnsiPrintf( ANSI_GREEN, "%s", tracy::MemSizeToString( netTotal ) );
|
||||
printf( " | ");
|
||||
AnsiPrintf( ANSI_RED ANSI_BOLD, "%s", tracy::MemSizeToString( tracy::memUsage.load( std::memory_order_relaxed ) ) );
|
||||
if( memoryLimit > 0 )
|
||||
{
|
||||
printf( " / " );
|
||||
AnsiPrintf( ANSI_BLUE ANSI_BOLD, "%s", tracy::MemSizeToString( memoryLimit ) );
|
||||
}
|
||||
printf( " | ");
|
||||
AnsiPrintf( ANSI_RED, "%s", tracy::TimeToString( worker.GetLastTime() - firstTime ) );
|
||||
fflush( stdout );
|
||||
printf( "\33[2K\r\033[36;1m%7.2f Kbps", mbps * 1000.f );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "\33[2K\r\033[36;1m%7.2f Mbps", mbps );
|
||||
}
|
||||
printf( " \033[0m| Ratio: \033[36;1m%5.1f%% \033[0m| Real: \033[33;1m%7.2f Mbps \033[0m| Mem: \033[31;1m%.2f MB\033[0m", compRatio * 100.f, mbps / compRatio, tracy::memUsage.load( std::memory_order_relaxed ) / ( 1024.f * 1024.f ) );
|
||||
fflush( stdout );
|
||||
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 100 ) );
|
||||
if( seconds != -1 )
|
||||
{
|
||||
const auto dur = std::chrono::high_resolution_clock::now() - t0;
|
||||
if( std::chrono::duration_cast<std::chrono::seconds>(dur).count() >= seconds )
|
||||
{
|
||||
// Relaxed order is sufficient because only this thread ever reads
|
||||
// this value.
|
||||
s_disconnect.store(true, std::memory_order_relaxed );
|
||||
}
|
||||
}
|
||||
}
|
||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
const auto& failure = worker.GetFailureType();
|
||||
if( failure != tracy::Worker::Failure::None )
|
||||
{
|
||||
AnsiPrintf( ANSI_RED ANSI_BOLD, "\nInstrumentation failure: %s", tracy::Worker::GetFailureString( failure ) );
|
||||
auto& fd = worker.GetFailureData();
|
||||
if( !fd.message.empty() )
|
||||
{
|
||||
printf( "\nContext: %s", fd.message.c_str() );
|
||||
}
|
||||
if( fd.callstack != 0 )
|
||||
{
|
||||
AnsiPrintf( ANSI_BOLD, "\nFailure callstack:\n" );
|
||||
auto& cs = worker.GetCallstack( fd.callstack );
|
||||
int fidx = 0;
|
||||
for( auto& entry : cs )
|
||||
{
|
||||
auto frameData = worker.GetCallstackFrame( entry );
|
||||
if( !frameData )
|
||||
{
|
||||
printf( "%3i. %p\n", fidx++, (void*)worker.GetCanonicalPointer( entry ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto fsz = frameData->size;
|
||||
for( uint8_t f=0; f<fsz; f++ )
|
||||
{
|
||||
const auto& frame = frameData->data[f];
|
||||
auto txt = worker.GetString( frame.name );
|
||||
|
||||
if( fidx == 0 && f != fsz-1 )
|
||||
{
|
||||
auto test = tracy::s_tracyStackFrames;
|
||||
bool match = false;
|
||||
do
|
||||
{
|
||||
if( strcmp( txt, *test ) == 0 )
|
||||
{
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
while( *++test );
|
||||
if( match ) continue;
|
||||
}
|
||||
|
||||
if( f == fsz-1 )
|
||||
{
|
||||
printf( "%3i. ", fidx++ );
|
||||
}
|
||||
else
|
||||
{
|
||||
AnsiPrintf( ANSI_BLACK ANSI_BOLD, "inl. " );
|
||||
}
|
||||
AnsiPrintf( ANSI_CYAN, "%s ", txt );
|
||||
txt = worker.GetString( frame.file );
|
||||
if( frame.line == 0 )
|
||||
{
|
||||
AnsiPrintf( ANSI_YELLOW, "(%s)", txt );
|
||||
}
|
||||
else
|
||||
{
|
||||
AnsiPrintf( ANSI_YELLOW, "(%s:%" PRIu32 ")", txt, frame.line );
|
||||
}
|
||||
if( frameData->imageName.Active() )
|
||||
{
|
||||
AnsiPrintf( ANSI_MAGENTA, " %s\n", worker.GetString( frameData->imageName ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
printf( "\n" );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nElapsed time: %s\nSaving trace...",
|
||||
worker.GetFrameCount( *worker.GetFramesBase() ), tracy::TimeToString( worker.GetLastTime() - firstTime ), tracy::RealToString( worker.GetZoneCount() ),
|
||||
tracy::TimeToString( std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count() ) );
|
||||
printf( "\nFrames: %" PRIu64 "\nTime span: %s\nZones: %s\nSaving trace...", worker.GetFrameCount(), TimeToString( worker.GetLastTime() - worker.GetFrameBegin( 0 ) ), RealToString( worker.GetZoneCount(), true ) );
|
||||
fflush( stdout );
|
||||
auto f = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output, tracy::FileCompression::Zstd, 3, 4 ) );
|
||||
auto f = tracy::FileWrite::Open( output );
|
||||
if( f )
|
||||
{
|
||||
worker.Write( *f, false );
|
||||
AnsiPrintf( ANSI_GREEN ANSI_BOLD, " done!\n" );
|
||||
f->Finish();
|
||||
const auto stats = f->GetCompressionStatistics();
|
||||
printf( "Trace size %s (%.2f%% ratio)\n", tracy::MemSizeToString( stats.second ), 100.f * stats.second / stats.first );
|
||||
worker.Write( *f );
|
||||
printf( " \033[32;1mdone!\033[0m\n" );
|
||||
}
|
||||
else
|
||||
{
|
||||
AnsiPrintf( ANSI_RED ANSI_BOLD, " failed!\n");
|
||||
printf( " \033[31;1failed!\033[0m\n" );
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2012-2023, Kim Grasman <kim.grasman@gmail.com>
|
||||
* Copyright (c) 2012-2017, Kim Grasman <kim.grasman@gmail.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -159,7 +159,6 @@ int getopt_long(int argc, char* const argv[], const char* optstring,
|
||||
const struct option* match = NULL;
|
||||
int num_matches = 0;
|
||||
size_t argument_name_length = 0;
|
||||
size_t option_length = 0;
|
||||
const char* current_argument = NULL;
|
||||
int retval = -1;
|
||||
|
||||
@@ -176,16 +175,6 @@ int getopt_long(int argc, char* const argv[], const char* optstring,
|
||||
current_argument = argv[optind] + 2;
|
||||
argument_name_length = strcspn(current_argument, "=");
|
||||
for (; o->name; ++o) {
|
||||
/* Check for exact match first. */
|
||||
option_length = strlen(o->name);
|
||||
if (option_length == argument_name_length &&
|
||||
strncmp(o->name, current_argument, option_length) == 0) {
|
||||
match = o;
|
||||
num_matches = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
/* If not exact, count the number of abbreviated matches. */
|
||||
if (strncmp(o->name, current_argument, argument_name_length) == 0) {
|
||||
match = o;
|
||||
++num_matches;
|
||||
@@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2012-2023, Kim Grasman <kim.grasman@gmail.com>
|
||||
* Copyright (c) 2012-2017, Kim Grasman <kim.grasman@gmail.com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
293
client/TracyLock.hpp
Normal file
@@ -0,0 +1,293 @@
|
||||
#ifndef __TRACYLOCK_HPP__
|
||||
#define __TRACYLOCK_HPP__
|
||||
|
||||
#include <atomic>
|
||||
#include <limits>
|
||||
|
||||
#include "../common/TracySystem.hpp"
|
||||
#include "TracyProfiler.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
extern std::atomic<uint32_t> s_lockCounter;
|
||||
|
||||
template<class T>
|
||||
class Lockable
|
||||
{
|
||||
public:
|
||||
tracy_force_inline Lockable( const SourceLocation* srcloc )
|
||||
: m_id( s_lockCounter.fetch_add( 1, std::memory_order_relaxed ) )
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockAnnounce;
|
||||
item->lockAnnounce.id = m_id;
|
||||
item->lockAnnounce.lckloc = (uint64_t)srcloc;
|
||||
item->lockAnnounce.type = LockType::Lockable;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
Lockable( const Lockable& ) = delete;
|
||||
Lockable& operator=( const Lockable& ) = delete;
|
||||
|
||||
tracy_force_inline void lock()
|
||||
{
|
||||
const auto thread = GetThreadHandle();
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockWait;
|
||||
item->lockWait.id = m_id;
|
||||
item->lockWait.thread = thread;
|
||||
item->lockWait.time = Profiler::GetTime();
|
||||
item->lockWait.type = LockType::Lockable;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
m_lockable.lock();
|
||||
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockObtain;
|
||||
item->lockObtain.id = m_id;
|
||||
item->lockObtain.thread = thread;
|
||||
item->lockObtain.time = Profiler::GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void unlock()
|
||||
{
|
||||
m_lockable.unlock();
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockRelease;
|
||||
item->lockRelease.id = m_id;
|
||||
item->lockRelease.thread = GetThreadHandle();
|
||||
item->lockRelease.time = Profiler::GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
tracy_force_inline bool try_lock()
|
||||
{
|
||||
const auto ret = m_lockable.try_lock();
|
||||
if( ret )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockObtain;
|
||||
item->lockObtain.id = (uint64_t)&m_lockable;
|
||||
item->lockObtain.thread = GetThreadHandle();
|
||||
item->lockObtain.time = Profiler::GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
tracy_force_inline void Mark( const SourceLocation* srcloc ) const
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockMark;
|
||||
item->lockMark.id = m_id;
|
||||
item->lockMark.thread = GetThreadHandle();
|
||||
item->lockMark.srcloc = (uint64_t)srcloc;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
uint32_t m_id;
|
||||
};
|
||||
|
||||
|
||||
template<class T>
|
||||
class SharedLockable
|
||||
{
|
||||
public:
|
||||
tracy_force_inline SharedLockable( const SourceLocation* srcloc )
|
||||
: m_id( s_lockCounter.fetch_add( 1, std::memory_order_relaxed ) )
|
||||
{
|
||||
assert( m_id != std::numeric_limits<uint32_t>::max() );
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockAnnounce;
|
||||
item->lockAnnounce.id = m_id;
|
||||
item->lockAnnounce.lckloc = (uint64_t)srcloc;
|
||||
item->lockAnnounce.type = LockType::SharedLockable;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
SharedLockable( const SharedLockable& ) = delete;
|
||||
SharedLockable& operator=( const SharedLockable& ) = delete;
|
||||
|
||||
tracy_force_inline void lock()
|
||||
{
|
||||
const auto thread = GetThreadHandle();
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockWait;
|
||||
item->lockWait.id = m_id;
|
||||
item->lockWait.thread = thread;
|
||||
item->lockWait.time = Profiler::GetTime();
|
||||
item->lockWait.type = LockType::SharedLockable;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
m_lockable.lock();
|
||||
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockObtain;
|
||||
item->lockObtain.id = m_id;
|
||||
item->lockObtain.thread = thread;
|
||||
item->lockObtain.time = Profiler::GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void unlock()
|
||||
{
|
||||
m_lockable.unlock();
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockRelease;
|
||||
item->lockRelease.id = m_id;
|
||||
item->lockRelease.thread = GetThreadHandle();
|
||||
item->lockRelease.time = Profiler::GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
tracy_force_inline bool try_lock()
|
||||
{
|
||||
const auto ret = m_lockable.try_lock();
|
||||
if( ret )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockObtain;
|
||||
item->lockObtain.id = (uint64_t)&m_lockable;
|
||||
item->lockObtain.thread = GetThreadHandle();
|
||||
item->lockObtain.time = Profiler::GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
tracy_force_inline void lock_shared()
|
||||
{
|
||||
const auto thread = GetThreadHandle();
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockSharedWait;
|
||||
item->lockWait.id = m_id;
|
||||
item->lockWait.thread = thread;
|
||||
item->lockWait.time = Profiler::GetTime();
|
||||
item->lockWait.type = LockType::SharedLockable;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
m_lockable.lock_shared();
|
||||
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockSharedObtain;
|
||||
item->lockObtain.id = m_id;
|
||||
item->lockObtain.thread = thread;
|
||||
item->lockObtain.time = Profiler::GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
}
|
||||
|
||||
tracy_force_inline void unlock_shared()
|
||||
{
|
||||
m_lockable.unlock_shared();
|
||||
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockSharedRelease;
|
||||
item->lockRelease.id = m_id;
|
||||
item->lockRelease.thread = GetThreadHandle();
|
||||
item->lockRelease.time = Profiler::GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
tracy_force_inline bool try_lock_shared()
|
||||
{
|
||||
const auto ret = m_lockable.try_lock_shared();
|
||||
if( ret )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockSharedObtain;
|
||||
item->lockObtain.id = (uint64_t)&m_lockable;
|
||||
item->lockObtain.thread = GetThreadHandle();
|
||||
item->lockObtain.time = Profiler::GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
tracy_force_inline void Mark( const SourceLocation* srcloc ) const
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::LockMark;
|
||||
item->lockMark.id = m_id;
|
||||
item->lockMark.thread = GetThreadHandle();
|
||||
item->lockMark.srcloc = (uint64_t)srcloc;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
private:
|
||||
T m_lockable;
|
||||
uint32_t m_id;
|
||||
};
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
587
client/TracyProfiler.cpp
Normal file
@@ -0,0 +1,587 @@
|
||||
#ifdef TRACY_ENABLE
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <winsock2.h>
|
||||
# include <windows.h>
|
||||
#else
|
||||
# include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#ifdef _GNU_SOURCE
|
||||
# include <errno.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <assert.h>
|
||||
#include <chrono>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../common/TracyProtocol.hpp"
|
||||
#include "../common/TracySocket.hpp"
|
||||
#include "../common/TracySystem.hpp"
|
||||
#include "tracy_rpmalloc.hpp"
|
||||
#include "TracyScoped.hpp"
|
||||
#include "TracyProfiler.hpp"
|
||||
#include "TracyThread.hpp"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define init_order( val ) __attribute__ ((init_priority(val)))
|
||||
#else
|
||||
#define init_order(x)
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
struct RPMallocInit
|
||||
{
|
||||
RPMallocInit() { rpmalloc_initialize(); }
|
||||
};
|
||||
|
||||
struct RPMallocThreadInit
|
||||
{
|
||||
RPMallocThreadInit() { rpmalloc_thread_initialize(); }
|
||||
};
|
||||
|
||||
struct InitTimeWrapper
|
||||
{
|
||||
int64_t val;
|
||||
};
|
||||
|
||||
static const char* GetProcessName()
|
||||
{
|
||||
#if defined _MSC_VER
|
||||
static char buf[_MAX_PATH];
|
||||
GetModuleFileNameA( nullptr, buf, _MAX_PATH );
|
||||
const char* ptr = buf;
|
||||
while( *ptr != '\0' ) ptr++;
|
||||
while( ptr > buf && *ptr != '\\' && *ptr != '/' ) ptr--;
|
||||
if( ptr > buf ) ptr++;
|
||||
return ptr;
|
||||
#elif defined __ANDROID__
|
||||
# if __ANDROID_API__ >= 21
|
||||
auto buf = getprogname();
|
||||
if( buf ) return buf;
|
||||
# endif
|
||||
#elif defined _GNU_SOURCE || defined __CYGWIN__
|
||||
return program_invocation_short_name;
|
||||
#endif
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
enum { QueuePrealloc = 256 * 1024 };
|
||||
|
||||
// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this.
|
||||
|
||||
static Profiler* s_instance = nullptr;
|
||||
static Thread* s_thread = nullptr;
|
||||
|
||||
// 1a. But s_queue is needed for initialization of variables in point 2.
|
||||
extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
|
||||
|
||||
static thread_local RPMallocThreadInit init_order(106) s_rpmalloc_thread_init;
|
||||
|
||||
// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread.
|
||||
static thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue );
|
||||
thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) };
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// 1. Initialize these static variables before all other variables.
|
||||
# pragma warning( disable : 4075 )
|
||||
# pragma init_seg( ".CRT$XCB" )
|
||||
#endif
|
||||
|
||||
static InitTimeWrapper init_order(101) s_initTime { Profiler::GetTime() };
|
||||
static RPMallocInit init_order(102) s_rpmalloc_init;
|
||||
moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc );
|
||||
std::atomic<uint32_t> init_order(104) s_lockCounter( 0 );
|
||||
std::atomic<uint16_t> init_order(104) s_gpuCtxCounter( 0 );
|
||||
|
||||
thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr };
|
||||
|
||||
#ifdef TRACY_COLLECT_THREAD_NAMES
|
||||
struct ThreadNameData;
|
||||
std::atomic<ThreadNameData*> init_order(104) s_threadNameData( nullptr );
|
||||
#endif
|
||||
|
||||
static Profiler init_order(105) s_profiler;
|
||||
|
||||
|
||||
enum { BulkSize = TargetFrameSize / QueueItemSize };
|
||||
|
||||
Profiler::Profiler()
|
||||
: m_timeBegin( 0 )
|
||||
, m_mainThread( GetThreadHandle() )
|
||||
, m_epoch( std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count() )
|
||||
, m_shutdown( false )
|
||||
, m_sock( nullptr )
|
||||
, m_stream( LZ4_createStream() )
|
||||
, m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) )
|
||||
, m_bufferOffset( 0 )
|
||||
, m_bufferStart( 0 )
|
||||
, m_itemBuf( (QueueItem*)tracy_malloc( sizeof( QueueItem ) * BulkSize ) )
|
||||
, m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) )
|
||||
{
|
||||
assert( !s_instance );
|
||||
s_instance = this;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here.
|
||||
s_token_detail = moodycamel::ProducerToken( s_queue );
|
||||
s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) };
|
||||
#endif
|
||||
|
||||
CalibrateTimer();
|
||||
CalibrateDelay();
|
||||
|
||||
s_thread = (Thread*)tracy_malloc( sizeof( Thread ) );
|
||||
new(s_thread) Thread( LaunchWorker, this );
|
||||
SetThreadName( s_thread->Handle(), "Tracy Profiler" );
|
||||
|
||||
m_timeBegin.store( GetTime(), std::memory_order_relaxed );
|
||||
}
|
||||
|
||||
Profiler::~Profiler()
|
||||
{
|
||||
m_shutdown.store( true, std::memory_order_relaxed );
|
||||
s_thread->~Thread();
|
||||
tracy_free( s_thread );
|
||||
|
||||
tracy_free( m_lz4Buf );
|
||||
tracy_free( m_itemBuf );
|
||||
tracy_free( m_buffer );
|
||||
LZ4_freeStream( m_stream );
|
||||
|
||||
if( m_sock )
|
||||
{
|
||||
m_sock->~Socket();
|
||||
tracy_free( m_sock );
|
||||
}
|
||||
|
||||
assert( s_instance );
|
||||
s_instance = nullptr;
|
||||
}
|
||||
|
||||
bool Profiler::ShouldExit()
|
||||
{
|
||||
return s_instance->m_shutdown.load( std::memory_order_relaxed );
|
||||
}
|
||||
|
||||
void Profiler::Worker()
|
||||
{
|
||||
rpmalloc_thread_initialize();
|
||||
|
||||
const auto procname = GetProcessName();
|
||||
const auto pnsz = std::min<size_t>( strlen( procname ), WelcomeMessageProgramNameSize - 1 );
|
||||
|
||||
while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
|
||||
|
||||
WelcomeMessage welcome;
|
||||
welcome.timerMul = m_timerMul;
|
||||
welcome.initBegin = s_initTime.val;
|
||||
welcome.initEnd = m_timeBegin.load( std::memory_order_relaxed );
|
||||
welcome.delay = m_delay;
|
||||
welcome.resolution = m_resolution;
|
||||
welcome.epoch = m_epoch;
|
||||
memcpy( welcome.programName, procname, pnsz );
|
||||
memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz );
|
||||
|
||||
moodycamel::ConsumerToken token( s_queue );
|
||||
|
||||
ListenSocket listen;
|
||||
listen.Listen( "8086", 8 );
|
||||
|
||||
for(;;)
|
||||
{
|
||||
for(;;)
|
||||
{
|
||||
#ifndef TRACY_NO_EXIT
|
||||
if( ShouldExit() ) return;
|
||||
#endif
|
||||
m_sock = listen.Accept();
|
||||
if( m_sock ) break;
|
||||
}
|
||||
|
||||
m_sock->Send( &welcome, sizeof( welcome ) );
|
||||
LZ4_resetStream( m_stream );
|
||||
|
||||
for(;;)
|
||||
{
|
||||
const auto status = Dequeue( token );
|
||||
if( status == ConnectionLost )
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if( status == QueueEmpty )
|
||||
{
|
||||
if( ShouldExit() ) break;
|
||||
if( m_bufferOffset != m_bufferStart ) CommitData();
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
|
||||
}
|
||||
|
||||
while( m_sock->HasData() )
|
||||
{
|
||||
if( !HandleServerQuery() ) break;
|
||||
}
|
||||
}
|
||||
if( ShouldExit() ) break;
|
||||
}
|
||||
|
||||
for(;;)
|
||||
{
|
||||
const auto status = Dequeue( token );
|
||||
if( status == ConnectionLost )
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if( status == QueueEmpty )
|
||||
{
|
||||
if( m_bufferOffset != m_bufferStart ) CommitData();
|
||||
break;
|
||||
}
|
||||
|
||||
while( m_sock->HasData() )
|
||||
{
|
||||
if( !HandleServerQuery() ) break;
|
||||
}
|
||||
}
|
||||
|
||||
QueueItem terminate;
|
||||
terminate.hdr.type = QueueType::Terminate;
|
||||
if( !SendData( (const char*)&terminate, 1 ) ) return;
|
||||
for(;;)
|
||||
{
|
||||
if( m_sock->HasData() )
|
||||
{
|
||||
while( m_sock->HasData() )
|
||||
{
|
||||
if( !HandleServerQuery() )
|
||||
{
|
||||
if( m_bufferOffset != m_bufferStart ) CommitData();
|
||||
return;
|
||||
}
|
||||
}
|
||||
while( Dequeue( token ) == Success ) {}
|
||||
if( m_bufferOffset != m_bufferStart )
|
||||
{
|
||||
if( !CommitData() ) return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( m_bufferOffset != m_bufferStart ) CommitData();
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
|
||||
{
|
||||
const auto sz = s_queue.try_dequeue_bulk( token, m_itemBuf, BulkSize );
|
||||
if( sz > 0 )
|
||||
{
|
||||
auto end = m_itemBuf + sz;
|
||||
auto item = m_itemBuf;
|
||||
while( item != end )
|
||||
{
|
||||
uint64_t ptr;
|
||||
if( item->hdr.idx < (int)QueueType::Terminate )
|
||||
{
|
||||
switch( item->hdr.type )
|
||||
{
|
||||
case QueueType::ZoneText:
|
||||
ptr = item->zoneText.text;
|
||||
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
|
||||
tracy_free( (void*)ptr );
|
||||
break;
|
||||
case QueueType::Message:
|
||||
ptr = item->message.text;
|
||||
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
|
||||
tracy_free( (void*)ptr );
|
||||
break;
|
||||
case QueueType::ZoneBeginAllocSrcLoc:
|
||||
ptr = item->zoneBegin.srcloc;
|
||||
SendSourceLocationPayload( ptr );
|
||||
tracy_free( (void*)ptr );
|
||||
break;
|
||||
default:
|
||||
assert( false );
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !AppendData( item, QueueDataSize[item->hdr.idx] ) ) return ConnectionLost;
|
||||
item++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return QueueEmpty;
|
||||
}
|
||||
return Success;
|
||||
}
|
||||
|
||||
bool Profiler::AppendData( const void* data, size_t len )
|
||||
{
|
||||
auto ret = true;
|
||||
ret = NeedDataSize( len );
|
||||
memcpy( m_buffer + m_bufferOffset, data, len );
|
||||
m_bufferOffset += int( len );
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Profiler::CommitData()
|
||||
{
|
||||
bool ret = SendData( m_buffer + m_bufferStart, m_bufferOffset - m_bufferStart );
|
||||
if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0;
|
||||
m_bufferStart = m_bufferOffset;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Profiler::NeedDataSize( size_t len )
|
||||
{
|
||||
bool ret = true;
|
||||
if( m_bufferOffset - m_bufferStart + len > TargetFrameSize )
|
||||
{
|
||||
ret = CommitData();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Profiler::SendData( const char* data, size_t len )
|
||||
{
|
||||
const lz4sz_t lz4sz = LZ4_compress_fast_continue( m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 );
|
||||
memcpy( m_lz4Buf, &lz4sz, sizeof( lz4sz ) );
|
||||
return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1;
|
||||
}
|
||||
|
||||
bool Profiler::SendString( uint64_t str, const char* ptr, QueueType type )
|
||||
{
|
||||
assert( type == QueueType::StringData || type == QueueType::ThreadName || type == QueueType::CustomStringData || type == QueueType::PlotName );
|
||||
|
||||
QueueItem item;
|
||||
item.hdr.type = type;
|
||||
item.stringTransfer.ptr = str;
|
||||
|
||||
auto len = strlen( ptr );
|
||||
assert( len <= std::numeric_limits<uint16_t>::max() );
|
||||
auto l16 = uint16_t( len );
|
||||
|
||||
NeedDataSize( QueueDataSize[item.hdr.idx] + sizeof( l16 ) + l16 );
|
||||
|
||||
AppendData( &item, QueueDataSize[item.hdr.idx] );
|
||||
AppendData( &l16, sizeof( l16 ) );
|
||||
AppendData( ptr, l16 );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Profiler::SendSourceLocation( uint64_t ptr )
|
||||
{
|
||||
auto srcloc = (const SourceLocation*)ptr;
|
||||
QueueItem item;
|
||||
item.hdr.type = QueueType::SourceLocation;
|
||||
item.srcloc.name = (uint64_t)srcloc->name;
|
||||
item.srcloc.file = (uint64_t)srcloc->file;
|
||||
item.srcloc.function = (uint64_t)srcloc->function;
|
||||
item.srcloc.line = srcloc->line;
|
||||
item.srcloc.r = ( srcloc->color ) & 0xFF;
|
||||
item.srcloc.g = ( srcloc->color >> 8 ) & 0xFF;
|
||||
item.srcloc.b = ( srcloc->color >> 16 ) & 0xFF;
|
||||
AppendData( &item, QueueDataSize[item.hdr.idx] );
|
||||
}
|
||||
|
||||
bool Profiler::SendSourceLocationPayload( uint64_t _ptr )
|
||||
{
|
||||
auto ptr = (const char*)_ptr;
|
||||
|
||||
QueueItem item;
|
||||
item.hdr.type = QueueType::SourceLocationPayload;
|
||||
item.stringTransfer.ptr = _ptr;
|
||||
|
||||
const auto len = *((uint32_t*)ptr);
|
||||
assert( len <= std::numeric_limits<uint16_t>::max() );
|
||||
assert( len > 4 );
|
||||
const auto l16 = uint16_t( len - 4 );
|
||||
|
||||
NeedDataSize( QueueDataSize[item.hdr.idx] + sizeof( l16 ) + l16 );
|
||||
|
||||
AppendData( &item, QueueDataSize[item.hdr.idx] );
|
||||
AppendData( &l16, sizeof( l16 ) );
|
||||
AppendData( ptr + 4, l16 );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool DontExit() { return false; }
|
||||
|
||||
bool Profiler::HandleServerQuery()
|
||||
{
|
||||
timeval tv;
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 10000;
|
||||
|
||||
uint8_t type;
|
||||
if( !m_sock->Read( &type, sizeof( type ), &tv, DontExit ) ) return false;
|
||||
|
||||
uint64_t ptr;
|
||||
if( !m_sock->Read( &ptr, sizeof( ptr ), &tv, DontExit ) ) return false;
|
||||
|
||||
switch( type )
|
||||
{
|
||||
case ServerQueryString:
|
||||
SendString( ptr, (const char*)ptr, QueueType::StringData );
|
||||
break;
|
||||
case ServerQueryThreadString:
|
||||
if( ptr == m_mainThread )
|
||||
{
|
||||
SendString( ptr, "Main thread", QueueType::ThreadName );
|
||||
}
|
||||
else
|
||||
{
|
||||
SendString( ptr, GetThreadName( ptr ), QueueType::ThreadName );
|
||||
}
|
||||
break;
|
||||
case ServerQuerySourceLocation:
|
||||
SendSourceLocation( ptr );
|
||||
break;
|
||||
case ServerQueryPlotName:
|
||||
SendString( ptr, (const char*)ptr, QueueType::PlotName );
|
||||
break;
|
||||
case ServerQueryTerminate:
|
||||
return false;
|
||||
default:
|
||||
assert( false );
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Profiler::CalibrateTimer()
|
||||
{
|
||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||
uint32_t cpu;
|
||||
std::atomic_signal_fence( std::memory_order_acq_rel );
|
||||
const auto t0 = std::chrono::high_resolution_clock::now();
|
||||
const auto r0 = tracy_rdtscp( cpu );
|
||||
std::atomic_signal_fence( std::memory_order_acq_rel );
|
||||
std::this_thread::sleep_for( std::chrono::milliseconds( 200 ) );
|
||||
std::atomic_signal_fence( std::memory_order_acq_rel );
|
||||
const auto t1 = std::chrono::high_resolution_clock::now();
|
||||
const auto r1 = tracy_rdtscp( cpu );
|
||||
std::atomic_signal_fence( std::memory_order_acq_rel );
|
||||
|
||||
const auto dt = std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count();
|
||||
const auto dr = r1 - r0;
|
||||
|
||||
m_timerMul = double( dt ) / double( dr );
|
||||
#else
|
||||
m_timerMul = 1.;
|
||||
#endif
|
||||
}
|
||||
|
||||
class FakeZone
|
||||
{
|
||||
public:
|
||||
FakeZone( const SourceLocation* srcloc ) : m_id( (uint64_t)srcloc ) {}
|
||||
~FakeZone() {}
|
||||
|
||||
private:
|
||||
volatile uint64_t m_id;
|
||||
};
|
||||
|
||||
void Profiler::CalibrateDelay()
|
||||
{
|
||||
enum { Iterations = 50000 };
|
||||
enum { Events = Iterations * 2 }; // start + end
|
||||
static_assert( Events * 2 < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
|
||||
|
||||
moodycamel::ProducerToken ptoken_detail( s_queue );
|
||||
moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptoken = s_queue.get_explicit_producer( ptoken_detail );
|
||||
for( int i=0; i<Iterations; i++ )
|
||||
{
|
||||
static const tracy::SourceLocation __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
|
||||
{
|
||||
Magic magic;
|
||||
auto& tail = ptoken->get_tail_index();
|
||||
auto item = ptoken->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneBegin;
|
||||
item->zoneBegin.thread = GetThreadHandle();
|
||||
item->zoneBegin.time = GetTime( item->zoneBegin.cpu );
|
||||
item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
{
|
||||
Magic magic;
|
||||
auto& tail = ptoken->get_tail_index();
|
||||
auto item = ptoken->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneEnd;
|
||||
item->zoneEnd.thread = 0;
|
||||
item->zoneEnd.time = GetTime( item->zoneEnd.cpu );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
}
|
||||
const auto f0 = GetTime();
|
||||
for( int i=0; i<Iterations; i++ )
|
||||
{
|
||||
static const tracy::SourceLocation __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
|
||||
FakeZone ___tracy_scoped_zone( &__tracy_source_location );
|
||||
}
|
||||
const auto t0 = GetTime();
|
||||
for( int i=0; i<Iterations; i++ )
|
||||
{
|
||||
static const tracy::SourceLocation __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
|
||||
{
|
||||
Magic magic;
|
||||
auto& tail = ptoken->get_tail_index();
|
||||
auto item = ptoken->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneBegin;
|
||||
item->zoneBegin.thread = GetThreadHandle();
|
||||
item->zoneBegin.time = GetTime( item->zoneBegin.cpu );
|
||||
item->zoneBegin.srcloc = (uint64_t)&__tracy_source_location;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
{
|
||||
Magic magic;
|
||||
auto& tail = ptoken->get_tail_index();
|
||||
auto item = ptoken->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneEnd;
|
||||
item->zoneEnd.thread = 0;
|
||||
item->zoneEnd.time = GetTime( item->zoneEnd.cpu );
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
}
|
||||
const auto t1 = GetTime();
|
||||
const auto dt = t1 - t0;
|
||||
const auto df = t0 - f0;
|
||||
m_delay = ( dt - df ) / Events;
|
||||
|
||||
auto mindiff = std::numeric_limits<int64_t>::max();
|
||||
for( int i=0; i<Iterations * 10; i++ )
|
||||
{
|
||||
const auto t0 = GetTime();
|
||||
const auto t1 = GetTime();
|
||||
const auto dt = t1 - t0;
|
||||
if( dt > 0 && dt < mindiff ) mindiff = dt;
|
||||
}
|
||||
|
||||
m_resolution = mindiff;
|
||||
|
||||
enum { Bulk = 1000 };
|
||||
moodycamel::ConsumerToken token( s_queue );
|
||||
int left = Events * 2;
|
||||
QueueItem item[Bulk];
|
||||
while( left != 0 )
|
||||
{
|
||||
const auto sz = s_queue.try_dequeue_bulk( token, item, std::min( left, (int)Bulk ) );
|
||||
assert( sz > 0 );
|
||||
left -= (int)sz;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
231
client/TracyProfiler.hpp
Normal file
@@ -0,0 +1,231 @@
|
||||
#ifndef __TRACYPROFILER_HPP__
|
||||
#define __TRACYPROFILER_HPP__
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "concurrentqueue.h"
|
||||
#include "../common/tracy_lz4.hpp"
|
||||
#include "../common/TracyQueue.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "../common/TracySystem.hpp"
|
||||
|
||||
#if defined _MSC_VER || defined __CYGWIN__
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
#if defined _MSC_VER || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ )
|
||||
# define TRACY_RDTSCP_SUPPORTED
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class Socket;
|
||||
|
||||
struct SourceLocation
|
||||
{
|
||||
const char* name;
|
||||
const char* function;
|
||||
const char* file;
|
||||
uint32_t line;
|
||||
uint32_t color;
|
||||
};
|
||||
|
||||
struct ProducerWrapper
|
||||
{
|
||||
moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
|
||||
};
|
||||
|
||||
extern thread_local ProducerWrapper s_token;
|
||||
|
||||
class GpuCtx;
|
||||
struct GpuCtxWrapper
|
||||
{
|
||||
GpuCtx* ptr;
|
||||
};
|
||||
|
||||
using Magic = moodycamel::ConcurrentQueueDefaultTraits::index_t;
|
||||
|
||||
class Profiler
|
||||
{
|
||||
public:
|
||||
Profiler();
|
||||
~Profiler();
|
||||
|
||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||
static tracy_force_inline int64_t tracy_rdtscp( uint32_t& cpu )
|
||||
{
|
||||
#if defined _MSC_VER || defined __CYGWIN__
|
||||
const auto t = int64_t( __rdtscp( &cpu ) );
|
||||
return t;
|
||||
#elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
|
||||
uint32_t eax, edx;
|
||||
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx), "=c" (cpu) :: );
|
||||
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||
static tracy_force_inline int64_t tracy_rdtscp()
|
||||
{
|
||||
#if defined _MSC_VER || defined __CYGWIN__
|
||||
static unsigned int dontcare;
|
||||
const auto t = int64_t( __rdtscp( &dontcare ) );
|
||||
return t;
|
||||
#elif defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
|
||||
uint32_t eax, edx;
|
||||
asm volatile ( "rdtscp" : "=a" (eax), "=d" (edx) :: "%ecx" );
|
||||
return ( uint64_t( edx ) << 32 ) + uint64_t( eax );
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static tracy_force_inline int64_t GetTime( uint32_t& cpu )
|
||||
{
|
||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||
return tracy_rdtscp( cpu );
|
||||
#else
|
||||
cpu = 0xFFFFFFFF;
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline int64_t GetTime()
|
||||
{
|
||||
#ifdef TRACY_RDTSCP_SUPPORTED
|
||||
return tracy_rdtscp();
|
||||
#else
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
|
||||
#endif
|
||||
}
|
||||
|
||||
static tracy_force_inline void FrameMark()
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::FrameMarkMsg;
|
||||
item->frameMark.time = GetTime();
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void PlotData( const char* name, int64_t val )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::PlotData;
|
||||
item->plotData.name = (uint64_t)name;
|
||||
item->plotData.time = GetTime();
|
||||
item->plotData.type = PlotDataType::Int;
|
||||
item->plotData.data.i = val;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void PlotData( const char* name, float val )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::PlotData;
|
||||
item->plotData.name = (uint64_t)name;
|
||||
item->plotData.time = GetTime();
|
||||
item->plotData.type = PlotDataType::Float;
|
||||
item->plotData.data.f = val;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void PlotData( const char* name, double val )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::PlotData;
|
||||
item->plotData.name = (uint64_t)name;
|
||||
item->plotData.time = GetTime();
|
||||
item->plotData.type = PlotDataType::Double;
|
||||
item->plotData.data.d = val;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt, size_t size )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::Message;
|
||||
item->message.time = GetTime();
|
||||
item->message.thread = GetThreadHandle();
|
||||
item->message.text = (uint64_t)ptr;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static tracy_force_inline void Message( const char* txt )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::MessageLiteral;
|
||||
item->message.time = GetTime();
|
||||
item->message.thread = GetThreadHandle();
|
||||
item->message.text = (uint64_t)txt;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
static bool ShouldExit();
|
||||
|
||||
private:
|
||||
enum DequeueStatus { Success, ConnectionLost, QueueEmpty };
|
||||
|
||||
static void LaunchWorker( void* ptr ) { ((Profiler*)ptr)->Worker(); }
|
||||
void Worker();
|
||||
|
||||
DequeueStatus Dequeue( moodycamel::ConsumerToken& token );
|
||||
bool AppendData( const void* data, size_t len );
|
||||
bool CommitData();
|
||||
bool NeedDataSize( size_t len );
|
||||
|
||||
bool SendData( const char* data, size_t len );
|
||||
bool SendString( uint64_t ptr, const char* str, QueueType type );
|
||||
void SendSourceLocation( uint64_t ptr );
|
||||
bool SendSourceLocationPayload( uint64_t ptr );
|
||||
|
||||
bool HandleServerQuery();
|
||||
|
||||
void CalibrateTimer();
|
||||
void CalibrateDelay();
|
||||
|
||||
double m_timerMul;
|
||||
uint64_t m_resolution;
|
||||
uint64_t m_delay;
|
||||
std::atomic<int64_t> m_timeBegin;
|
||||
uint64_t m_mainThread;
|
||||
uint64_t m_epoch;
|
||||
std::atomic<bool> m_shutdown;
|
||||
Socket* m_sock;
|
||||
|
||||
LZ4_stream_t* m_stream;
|
||||
char* m_buffer;
|
||||
int m_bufferOffset;
|
||||
int m_bufferStart;
|
||||
|
||||
QueueItem* m_itemBuf;
|
||||
char* m_lz4Buf;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
65
client/TracyScoped.hpp
Normal file
@@ -0,0 +1,65 @@
|
||||
#ifndef __TRACYSCOPED_HPP__
|
||||
#define __TRACYSCOPED_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "../common/TracySystem.hpp"
|
||||
#include "../common/TracyAlloc.hpp"
|
||||
#include "TracyProfiler.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class ScopedZone
|
||||
{
|
||||
public:
|
||||
tracy_force_inline ScopedZone( const SourceLocation* srcloc )
|
||||
{
|
||||
const auto thread = GetThreadHandle();
|
||||
m_thread = thread;
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneBegin;
|
||||
item->zoneBegin.time = Profiler::GetTime( item->zoneBegin.cpu );
|
||||
item->zoneBegin.thread = thread;
|
||||
item->zoneBegin.srcloc = (uint64_t)srcloc;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
tracy_force_inline ~ScopedZone()
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneEnd;
|
||||
item->zoneEnd.time = Profiler::GetTime( item->zoneEnd.cpu );
|
||||
item->zoneEnd.thread = m_thread;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
tracy_force_inline void Text( const char* txt, size_t size )
|
||||
{
|
||||
Magic magic;
|
||||
auto& token = s_token.ptr;
|
||||
auto ptr = (char*)tracy_malloc( size+1 );
|
||||
memcpy( ptr, txt, size );
|
||||
ptr[size] = '\0';
|
||||
auto& tail = token->get_tail_index();
|
||||
auto item = token->enqueue_begin<moodycamel::CanAlloc>( magic );
|
||||
item->hdr.type = QueueType::ZoneText;
|
||||
item->zoneText.thread = m_thread;
|
||||
item->zoneText.text = (uint64_t)ptr;
|
||||
tail.store( magic + 1, std::memory_order_release );
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t m_thread;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,36 +1,16 @@
|
||||
#ifndef __TRACYTHREAD_HPP__
|
||||
#define __TRACYTHREAD_HPP__
|
||||
|
||||
#if defined _WIN32
|
||||
#ifdef _MSC_VER
|
||||
# include <windows.h>
|
||||
#else
|
||||
# include <pthread.h>
|
||||
#endif
|
||||
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
# include "tracy_rpmalloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
extern thread_local bool RpThreadInitDone;
|
||||
#endif
|
||||
|
||||
class ThreadExitHandler
|
||||
{
|
||||
public:
|
||||
~ThreadExitHandler()
|
||||
{
|
||||
#ifdef TRACY_MANUAL_LIFETIME
|
||||
rpmalloc_thread_finalize( 1 );
|
||||
RpThreadInitDone = false;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
#if defined _WIN32
|
||||
#ifdef _MSC_VER
|
||||
|
||||
class Thread
|
||||
{
|
||||
3672
client/concurrentqueue.h
Normal file
2090
client/tracy_rpmalloc.cpp
Normal file
151
client/tracy_rpmalloc.hpp
Normal file
@@ -0,0 +1,151 @@
|
||||
/* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson / Rampant Pixels
|
||||
*
|
||||
* This library provides a cross-platform lock free thread caching malloc implementation in C11.
|
||||
* The latest source code is always available at
|
||||
*
|
||||
* https://github.com/rampantpixels/rpmalloc
|
||||
*
|
||||
* This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
# define RPMALLOC_ATTRIBUTE __attribute__((__malloc__))
|
||||
# define RPMALLOC_RESTRICT
|
||||
# define RPMALLOC_CDECL
|
||||
#elif defined(_MSC_VER)
|
||||
# define RPMALLOC_ATTRIBUTE
|
||||
# define RPMALLOC_RESTRICT __declspec(restrict)
|
||||
# define RPMALLOC_CDECL __cdecl
|
||||
#else
|
||||
# define RPMALLOC_ATTRIBUTE
|
||||
# define RPMALLOC_RESTRICT
|
||||
# define RPMALLOC_CDECL
|
||||
#endif
|
||||
|
||||
//! Flag to rpaligned_realloc to not preserve content in reallocation
|
||||
#define RPMALLOC_NO_PRESERVE 1
|
||||
|
||||
typedef struct rpmalloc_global_statistics_t {
|
||||
//! Current amount of virtual memory mapped (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped;
|
||||
//! Current amount of memory in global caches for small and medium sizes (<64KiB)
|
||||
size_t cached;
|
||||
//! Total amount of memory mapped (only if ENABLE_STATISTICS=1)
|
||||
size_t mapped_total;
|
||||
//! Total amount of memory unmapped (only if ENABLE_STATISTICS=1)
|
||||
size_t unmapped_total;
|
||||
} rpmalloc_global_statistics_t;
|
||||
|
||||
typedef struct rpmalloc_thread_statistics_t {
|
||||
//! Current number of bytes available for allocation from active spans
|
||||
size_t active;
|
||||
//! Current number of bytes available in thread size class caches
|
||||
size_t sizecache;
|
||||
//! Current number of bytes available in thread span caches
|
||||
size_t spancache;
|
||||
//! Current number of bytes in pending deferred deallocations
|
||||
size_t deferred;
|
||||
//! Total number of bytes transitioned from thread cache to global cache
|
||||
size_t thread_to_global;
|
||||
//! Total number of bytes transitioned from global cache to thread cache
|
||||
size_t global_to_thread;
|
||||
} rpmalloc_thread_statistics_t;
|
||||
|
||||
typedef struct rpmalloc_config_t {
|
||||
//! Map memory pages for the given number of bytes. The returned address MUST be
|
||||
// aligned to the rpmalloc span size, which will always be a power of two.
|
||||
// Optionally the function can store an alignment offset in the offset variable
|
||||
// in case it performs alignment and the returned pointer is offset from the
|
||||
// actual start of the memory region due to this alignment. The alignment offset
|
||||
// will be passed to the memory unmap function. The alignment offset MUST NOT be
|
||||
// larger than 65535 (storable in an uint16_t), if it is you must use natural
|
||||
// alignment to shift it into 16 bits.
|
||||
void* (*memory_map)(size_t size, size_t* offset);
|
||||
//! Unmap the memory pages starting at address and spanning the given number of bytes.
|
||||
// If release is set to 1, the unmap is for an entire span range as returned by
|
||||
// a previous call to memory_map and that the entire range should be released.
|
||||
// If release is set to 0, the unmap is a partial decommit of a subset of the mapped
|
||||
// memory range.
|
||||
void (*memory_unmap)(void* address, size_t size, size_t offset, int release);
|
||||
//! Size of memory pages. The page size MUST be a power of two in [512,16384] range
|
||||
// (2^9 to 2^14) unless 0 - set to 0 to use system page size. All memory mapping
|
||||
// requests to memory_map will be made with size set to a multiple of the page size.
|
||||
size_t page_size;
|
||||
//! Size of a span of memory pages. MUST be a multiple of page size, and in [4096,262144]
|
||||
// range (unless 0 - set to 0 to use the default span size).
|
||||
size_t span_size;
|
||||
//! Number of spans to map at each request to map new virtual memory blocks. This can
|
||||
// be used to minimize the system call overhead at the cost of virtual memory address
|
||||
// space. The extra mapped pages will not be written until actually used, so physical
|
||||
// committed memory should not be affected in the default implementation.
|
||||
size_t span_map_count;
|
||||
//! Debug callback if memory guards are enabled. Called if a memory overwrite is detected
|
||||
void (*memory_overwrite)(void* address);
|
||||
} rpmalloc_config_t;
|
||||
|
||||
extern int
|
||||
rpmalloc_initialize(void);
|
||||
|
||||
extern int
|
||||
rpmalloc_initialize_config(const rpmalloc_config_t* config);
|
||||
|
||||
extern const rpmalloc_config_t*
|
||||
rpmalloc_config(void);
|
||||
|
||||
extern void
|
||||
rpmalloc_finalize(void);
|
||||
|
||||
extern void
|
||||
rpmalloc_thread_initialize(void);
|
||||
|
||||
extern void
|
||||
rpmalloc_thread_finalize(void);
|
||||
|
||||
extern void
|
||||
rpmalloc_thread_collect(void);
|
||||
|
||||
extern int
|
||||
rpmalloc_is_thread_initialized(void);
|
||||
|
||||
extern void
|
||||
rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats);
|
||||
|
||||
extern void
|
||||
rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats);
|
||||
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpmalloc(size_t size) RPMALLOC_ATTRIBUTE;
|
||||
|
||||
extern void
|
||||
rpfree(void* ptr);
|
||||
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIBUTE;
|
||||
|
||||
extern void*
|
||||
rprealloc(void* ptr, size_t size);
|
||||
|
||||
extern void*
|
||||
rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize, unsigned int flags);
|
||||
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
|
||||
|
||||
extern RPMALLOC_RESTRICT void*
|
||||
rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIBUTE;
|
||||
|
||||
extern int
|
||||
rpposix_memalign(void **memptr, size_t alignment, size_t size);
|
||||
|
||||
extern size_t
|
||||
rpmalloc_usable_size(void* ptr);
|
||||
|
||||
}
|
||||
1269
cmake/CPM.cmake
@@ -1,300 +0,0 @@
|
||||
#.rst:
|
||||
# ECMFindModuleHelpers
|
||||
# --------------------
|
||||
#
|
||||
# Helper macros for find modules: ecm_find_package_version_check(),
|
||||
# ecm_find_package_parse_components() and
|
||||
# ecm_find_package_handle_library_components().
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_find_package_version_check(<name>)
|
||||
#
|
||||
# Prints warnings if the CMake version or the project's required CMake version
|
||||
# is older than that required by extra-cmake-modules.
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_find_package_parse_components(<name>
|
||||
# RESULT_VAR <variable>
|
||||
# KNOWN_COMPONENTS <component1> [<component2> [...]]
|
||||
# [SKIP_DEPENDENCY_HANDLING])
|
||||
#
|
||||
# This macro will populate <variable> with a list of components found in
|
||||
# <name>_FIND_COMPONENTS, after checking that all those components are in the
|
||||
# list of KNOWN_COMPONENTS; if there are any unknown components, it will print
|
||||
# an error or warning (depending on the value of <name>_FIND_REQUIRED) and call
|
||||
# return().
|
||||
#
|
||||
# The order of components in <variable> is guaranteed to match the order they
|
||||
# are listed in the KNOWN_COMPONENTS argument.
|
||||
#
|
||||
# If SKIP_DEPENDENCY_HANDLING is not set, for each component the variable
|
||||
# <name>_<component>_component_deps will be checked for dependent components.
|
||||
# If <component> is listed in <name>_FIND_COMPONENTS, then all its (transitive)
|
||||
# dependencies will also be added to <variable>.
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_find_package_handle_library_components(<name>
|
||||
# COMPONENTS <component> [<component> [...]]
|
||||
# [SKIP_DEPENDENCY_HANDLING])
|
||||
# [SKIP_PKG_CONFIG])
|
||||
#
|
||||
# Creates an imported library target for each component. The operation of this
|
||||
# macro depends on the presence of a number of CMake variables.
|
||||
#
|
||||
# The <name>_<component>_lib variable should contain the name of this library,
|
||||
# and <name>_<component>_header variable should contain the name of a header
|
||||
# file associated with it (whatever relative path is normally passed to
|
||||
# '#include'). <name>_<component>_header_subdir variable can be used to specify
|
||||
# which subdirectory of the include path the headers will be found in.
|
||||
# ecm_find_package_components() will then search for the library
|
||||
# and include directory (creating appropriate cache variables) and create an
|
||||
# imported library target named <name>::<component>.
|
||||
#
|
||||
# Additional variables can be used to provide additional information:
|
||||
#
|
||||
# If SKIP_PKG_CONFIG, the <name>_<component>_pkg_config variable is set, and
|
||||
# pkg-config is found, the pkg-config module given by
|
||||
# <name>_<component>_pkg_config will be searched for and used to help locate the
|
||||
# library and header file. It will also be used to set
|
||||
# <name>_<component>_VERSION.
|
||||
#
|
||||
# Note that if version information is found via pkg-config,
|
||||
# <name>_<component>_FIND_VERSION can be set to require a particular version
|
||||
# for each component.
|
||||
#
|
||||
# If SKIP_DEPENDENCY_HANDLING is not set, the INTERFACE_LINK_LIBRARIES property
|
||||
# of the imported target for <component> will be set to contain the imported
|
||||
# targets for the components listed in <name>_<component>_component_deps.
|
||||
# <component>_FOUND will also be set to false if any of the components in
|
||||
# <name>_<component>_component_deps are not found. This requires the components
|
||||
# in <name>_<component>_component_deps to be listed before <component> in the
|
||||
# COMPONENTS argument.
|
||||
#
|
||||
# The following variables will be set:
|
||||
#
|
||||
# ``<name>_TARGETS``
|
||||
# the imported targets
|
||||
# ``<name>_LIBRARIES``
|
||||
# the found libraries
|
||||
# ``<name>_INCLUDE_DIRS``
|
||||
# the combined required include directories for the components
|
||||
# ``<name>_DEFINITIONS``
|
||||
# the "other" CFLAGS provided by pkg-config, if any
|
||||
# ``<name>_VERSION``
|
||||
# the value of ``<name>_<component>_VERSION`` for the first component that
|
||||
# has this variable set (note that components are searched for in the order
|
||||
# they are passed to the macro), although if it is already set, it will not
|
||||
# be altered
|
||||
#
|
||||
# Note that these variables are never cleared, so if
|
||||
# ecm_find_package_handle_library_components() is called multiple times with
|
||||
# different components (typically because of multiple find_package() calls) then
|
||||
# ``<name>_TARGETS``, for example, will contain all the targets found in any
|
||||
# call (although no duplicates).
|
||||
#
|
||||
# Since pre-1.0.0.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2014 Alex Merry <alex.merry@kde.org>
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
macro(ecm_find_package_version_check module_name)
|
||||
if(CMAKE_VERSION VERSION_LESS 2.8.12)
|
||||
message(FATAL_ERROR "CMake 2.8.12 is required by Find${module_name}.cmake")
|
||||
endif()
|
||||
if(CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 2.8.12)
|
||||
message(AUTHOR_WARNING "Your project should require at least CMake 2.8.12 to use Find${module_name}.cmake")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(ecm_find_package_parse_components module_name)
|
||||
set(ecm_fppc_options SKIP_DEPENDENCY_HANDLING)
|
||||
set(ecm_fppc_oneValueArgs RESULT_VAR)
|
||||
set(ecm_fppc_multiValueArgs KNOWN_COMPONENTS DEFAULT_COMPONENTS)
|
||||
cmake_parse_arguments(ECM_FPPC "${ecm_fppc_options}" "${ecm_fppc_oneValueArgs}" "${ecm_fppc_multiValueArgs}" ${ARGN})
|
||||
|
||||
if(ECM_FPPC_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_parse_components: ${ECM_FPPC_UNPARSED_ARGUMENTS}")
|
||||
endif()
|
||||
if(NOT ECM_FPPC_RESULT_VAR)
|
||||
message(FATAL_ERROR "Missing RESULT_VAR argument to ecm_find_package_parse_components")
|
||||
endif()
|
||||
if(NOT ECM_FPPC_KNOWN_COMPONENTS)
|
||||
message(FATAL_ERROR "Missing KNOWN_COMPONENTS argument to ecm_find_package_parse_components")
|
||||
endif()
|
||||
if(NOT ECM_FPPC_DEFAULT_COMPONENTS)
|
||||
set(ECM_FPPC_DEFAULT_COMPONENTS ${ECM_FPPC_KNOWN_COMPONENTS})
|
||||
endif()
|
||||
|
||||
if(${module_name}_FIND_COMPONENTS)
|
||||
set(ecm_fppc_requestedComps ${${module_name}_FIND_COMPONENTS})
|
||||
|
||||
if(NOT ECM_FPPC_SKIP_DEPENDENCY_HANDLING)
|
||||
# Make sure deps are included
|
||||
foreach(ecm_fppc_comp ${ecm_fppc_requestedComps})
|
||||
foreach(ecm_fppc_dep_comp ${${module_name}_${ecm_fppc_comp}_component_deps})
|
||||
list(FIND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}" ecm_fppc_index)
|
||||
if("${ecm_fppc_index}" STREQUAL "-1")
|
||||
if(NOT ${module_name}_FIND_QUIETLY)
|
||||
message(STATUS "${module_name}: ${ecm_fppc_comp} requires ${${module_name}_${ecm_fppc_comp}_component_deps}")
|
||||
endif()
|
||||
list(APPEND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}")
|
||||
endif()
|
||||
endforeach()
|
||||
endforeach()
|
||||
else()
|
||||
message(STATUS "Skipping dependency handling for ${module_name}")
|
||||
endif()
|
||||
list(REMOVE_DUPLICATES ecm_fppc_requestedComps)
|
||||
|
||||
# This makes sure components are listed in the same order as
|
||||
# KNOWN_COMPONENTS (potentially important for inter-dependencies)
|
||||
set(${ECM_FPPC_RESULT_VAR})
|
||||
foreach(ecm_fppc_comp ${ECM_FPPC_KNOWN_COMPONENTS})
|
||||
list(FIND ecm_fppc_requestedComps "${ecm_fppc_comp}" ecm_fppc_index)
|
||||
if(NOT "${ecm_fppc_index}" STREQUAL "-1")
|
||||
list(APPEND ${ECM_FPPC_RESULT_VAR} "${ecm_fppc_comp}")
|
||||
list(REMOVE_AT ecm_fppc_requestedComps ${ecm_fppc_index})
|
||||
endif()
|
||||
endforeach()
|
||||
# if there are any left, they are unknown components
|
||||
if(ecm_fppc_requestedComps)
|
||||
set(ecm_fppc_msgType STATUS)
|
||||
if(${module_name}_FIND_REQUIRED)
|
||||
set(ecm_fppc_msgType FATAL_ERROR)
|
||||
endif()
|
||||
if(NOT ${module_name}_FIND_QUIETLY)
|
||||
message(${ecm_fppc_msgType} "${module_name}: requested unknown components ${ecm_fppc_requestedComps}")
|
||||
endif()
|
||||
return()
|
||||
endif()
|
||||
else()
|
||||
set(${ECM_FPPC_RESULT_VAR} ${ECM_FPPC_DEFAULT_COMPONENTS})
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(ecm_find_package_handle_library_components module_name)
|
||||
set(ecm_fpwc_options SKIP_PKG_CONFIG SKIP_DEPENDENCY_HANDLING)
|
||||
set(ecm_fpwc_oneValueArgs)
|
||||
set(ecm_fpwc_multiValueArgs COMPONENTS)
|
||||
cmake_parse_arguments(ECM_FPWC "${ecm_fpwc_options}" "${ecm_fpwc_oneValueArgs}" "${ecm_fpwc_multiValueArgs}" ${ARGN})
|
||||
|
||||
if(ECM_FPWC_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unexpected arguments to ecm_find_package_handle_components: ${ECM_FPWC_UNPARSED_ARGUMENTS}")
|
||||
endif()
|
||||
if(NOT ECM_FPWC_COMPONENTS)
|
||||
message(FATAL_ERROR "Missing COMPONENTS argument to ecm_find_package_handle_components")
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package(PkgConfig QUIET)
|
||||
foreach(ecm_fpwc_comp ${ECM_FPWC_COMPONENTS})
|
||||
set(ecm_fpwc_dep_vars)
|
||||
set(ecm_fpwc_dep_targets)
|
||||
if(NOT SKIP_DEPENDENCY_HANDLING)
|
||||
foreach(ecm_fpwc_dep ${${module_name}_${ecm_fpwc_comp}_component_deps})
|
||||
list(APPEND ecm_fpwc_dep_vars "${module_name}_${ecm_fpwc_dep}_FOUND")
|
||||
list(APPEND ecm_fpwc_dep_targets "${module_name}::${ecm_fpwc_dep}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(NOT ECM_FPWC_SKIP_PKG_CONFIG AND ${module_name}_${ecm_fpwc_comp}_pkg_config)
|
||||
pkg_check_modules(PKG_${module_name}_${ecm_fpwc_comp} QUIET
|
||||
${${module_name}_${ecm_fpwc_comp}_pkg_config})
|
||||
endif()
|
||||
|
||||
find_path(${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
|
||||
NAMES ${${module_name}_${ecm_fpwc_comp}_header}
|
||||
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES ${${module_name}_${ecm_fpwc_comp}_header_subdir}
|
||||
)
|
||||
find_library(${module_name}_${ecm_fpwc_comp}_LIBRARY
|
||||
NAMES ${${module_name}_${ecm_fpwc_comp}_lib}
|
||||
HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
set(${module_name}_${ecm_fpwc_comp}_VERSION "${PKG_${module_name}_${ecm_fpwc_comp}_VERSION}")
|
||||
if(NOT ${module_name}_VERSION)
|
||||
set(${module_name}_VERSION ${${module_name}_${ecm_fpwc_comp}_VERSION})
|
||||
endif()
|
||||
|
||||
set(_name_mismatched_arg)
|
||||
if(NOT CMAKE_VERSION VERSION_LESS 3.17)
|
||||
set(_name_mismatched_arg NAME_MISMATCHED)
|
||||
endif()
|
||||
find_package_handle_standard_args(${module_name}_${ecm_fpwc_comp}
|
||||
FOUND_VAR
|
||||
${module_name}_${ecm_fpwc_comp}_FOUND
|
||||
REQUIRED_VARS
|
||||
${module_name}_${ecm_fpwc_comp}_LIBRARY
|
||||
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
|
||||
${ecm_fpwc_dep_vars}
|
||||
VERSION_VAR
|
||||
${module_name}_${ecm_fpwc_comp}_VERSION
|
||||
${_name_mismatched_arg}
|
||||
)
|
||||
|
||||
mark_as_advanced(
|
||||
${module_name}_${ecm_fpwc_comp}_LIBRARY
|
||||
${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR
|
||||
)
|
||||
|
||||
if(${module_name}_${ecm_fpwc_comp}_FOUND)
|
||||
list(APPEND ${module_name}_LIBRARIES
|
||||
"${${module_name}_${ecm_fpwc_comp}_LIBRARY}")
|
||||
list(APPEND ${module_name}_INCLUDE_DIRS
|
||||
"${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}")
|
||||
set(${module_name}_DEFINITIONS
|
||||
${${module_name}_DEFINITIONS}
|
||||
${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS})
|
||||
if(NOT TARGET ${module_name}::${ecm_fpwc_comp})
|
||||
add_library(${module_name}::${ecm_fpwc_comp} UNKNOWN IMPORTED)
|
||||
set_target_properties(${module_name}::${ecm_fpwc_comp} PROPERTIES
|
||||
IMPORTED_LOCATION "${${module_name}_${ecm_fpwc_comp}_LIBRARY}"
|
||||
INTERFACE_COMPILE_OPTIONS "${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}"
|
||||
INTERFACE_LINK_LIBRARIES "${ecm_fpwc_dep_targets}"
|
||||
)
|
||||
endif()
|
||||
list(APPEND ${module_name}_TARGETS
|
||||
"${module_name}::${ecm_fpwc_comp}")
|
||||
endif()
|
||||
endforeach()
|
||||
if(${module_name}_LIBRARIES)
|
||||
list(REMOVE_DUPLICATES ${module_name}_LIBRARIES)
|
||||
endif()
|
||||
if(${module_name}_INCLUDE_DIRS)
|
||||
list(REMOVE_DUPLICATES ${module_name}_INCLUDE_DIRS)
|
||||
endif()
|
||||
if(${module_name}_DEFINITIONS)
|
||||
list(REMOVE_DUPLICATES ${module_name}_DEFINITIONS)
|
||||
endif()
|
||||
if(${module_name}_TARGETS)
|
||||
list(REMOVE_DUPLICATES ${module_name}_TARGETS)
|
||||
endif()
|
||||
endmacro()
|
||||
@@ -1,170 +0,0 @@
|
||||
#.rst:
|
||||
# FindWaylandScanner
|
||||
# ------------------
|
||||
#
|
||||
# Try to find wayland-scanner.
|
||||
#
|
||||
# If the wayland-scanner executable is not in your PATH, you can provide
|
||||
# an alternative name or full path location with the ``WaylandScanner_EXECUTABLE``
|
||||
# variable.
|
||||
#
|
||||
# This will define the following variables:
|
||||
#
|
||||
# ``WaylandScanner_FOUND``
|
||||
# True if wayland-scanner is available.
|
||||
#
|
||||
# ``WaylandScanner_EXECUTABLE``
|
||||
# The wayland-scanner executable.
|
||||
#
|
||||
# If ``WaylandScanner_FOUND`` is TRUE, it will also define the following imported
|
||||
# target:
|
||||
#
|
||||
# ``Wayland::Scanner``
|
||||
# The wayland-scanner executable.
|
||||
#
|
||||
# This module provides the following functions to generate C protocol
|
||||
# implementations:
|
||||
#
|
||||
# - ``ecm_add_wayland_client_protocol``
|
||||
# - ``ecm_add_wayland_server_protocol``
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_add_wayland_client_protocol(<source_files_var>
|
||||
# PROTOCOL <xmlfile>
|
||||
# BASENAME <basename>)
|
||||
#
|
||||
# Generate Wayland client protocol files from ``<xmlfile>`` XML
|
||||
# definition for the ``<basename>`` interface and append those files
|
||||
# to ``<source_files_var>``.
|
||||
#
|
||||
# ::
|
||||
#
|
||||
# ecm_add_wayland_server_protocol(<source_files_var>
|
||||
# PROTOCOL <xmlfile>
|
||||
# BASENAME <basename>)
|
||||
#
|
||||
# Generate Wayland server protocol files from ``<xmlfile>`` XML
|
||||
# definition for the ``<basename>`` interface and append those files
|
||||
# to ``<source_files_var>``.
|
||||
#
|
||||
# Since 1.4.0.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2012-2014 Pier Luigi Fiorini <pierluigi.fiorini@gmail.com>
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# 3. The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#=============================================================================
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/ECMFindModuleHelpers.cmake)
|
||||
|
||||
ecm_find_package_version_check(WaylandScanner)
|
||||
|
||||
# Find wayland-scanner
|
||||
find_program(WaylandScanner_EXECUTABLE NAMES wayland-scanner)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(WaylandScanner
|
||||
FOUND_VAR
|
||||
WaylandScanner_FOUND
|
||||
REQUIRED_VARS
|
||||
WaylandScanner_EXECUTABLE
|
||||
)
|
||||
|
||||
mark_as_advanced(WaylandScanner_EXECUTABLE)
|
||||
|
||||
if(NOT TARGET Wayland::Scanner AND WaylandScanner_FOUND)
|
||||
add_executable(Wayland::Scanner IMPORTED)
|
||||
set_target_properties(Wayland::Scanner PROPERTIES
|
||||
IMPORTED_LOCATION "${WaylandScanner_EXECUTABLE}"
|
||||
)
|
||||
endif()
|
||||
|
||||
include(FeatureSummary)
|
||||
set_package_properties(WaylandScanner PROPERTIES
|
||||
URL "https://wayland.freedesktop.org/"
|
||||
DESCRIPTION "Executable that converts XML protocol files to C code"
|
||||
)
|
||||
|
||||
function(ecm_add_wayland_client_protocol out_var)
|
||||
# Parse arguments
|
||||
set(oneValueArgs PROTOCOL BASENAME)
|
||||
cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN})
|
||||
|
||||
if(ARGS_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_client_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"")
|
||||
endif()
|
||||
|
||||
get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE)
|
||||
set(_client_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-client-protocol.h")
|
||||
set(_code "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-protocol.c")
|
||||
|
||||
set_source_files_properties(${_client_header} GENERATED)
|
||||
set_source_files_properties(${_code} GENERATED)
|
||||
set_property(SOURCE ${_client_header} PROPERTY SKIP_AUTOMOC ON)
|
||||
|
||||
add_custom_command(OUTPUT "${_client_header}"
|
||||
COMMAND ${WaylandScanner_EXECUTABLE} client-header ${_infile} ${_client_header}
|
||||
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile}
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT "${_code}"
|
||||
COMMAND ${WaylandScanner_EXECUTABLE} private-code ${_infile} ${_code}
|
||||
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile} ${_client_header}
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
list(APPEND ${out_var} "${_client_header}" "${_code}")
|
||||
set(${out_var} ${${out_var}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
|
||||
function(ecm_add_wayland_server_protocol out_var)
|
||||
# Parse arguments
|
||||
set(oneValueArgs PROTOCOL BASENAME)
|
||||
cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN})
|
||||
|
||||
if(ARGS_UNPARSED_ARGUMENTS)
|
||||
message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_server_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"")
|
||||
endif()
|
||||
|
||||
ecm_add_wayland_client_protocol(${out_var}
|
||||
PROTOCOL ${ARGS_PROTOCOL}
|
||||
BASENAME ${ARGS_BASENAME})
|
||||
|
||||
get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE)
|
||||
set(_server_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-server-protocol.h")
|
||||
set_property(SOURCE ${_server_header} PROPERTY SKIP_AUTOMOC ON)
|
||||
set_source_files_properties(${_server_header} GENERATED)
|
||||
|
||||
add_custom_command(OUTPUT "${_server_header}"
|
||||
COMMAND ${WaylandScanner_EXECUTABLE} server-header ${_infile} ${_server_header}
|
||||
DEPENDS ${WaylandScanner_EXECUTABLE} ${_infile}
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
list(APPEND ${out_var} "${_server_header}")
|
||||
set(${out_var} ${${out_var}} PARENT_SCOPE)
|
||||
endfunction()
|
||||
@@ -1,62 +0,0 @@
|
||||
if (NOT NO_ISA_EXTENSIONS)
|
||||
include(CheckCXXCompilerFlag)
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||
CHECK_CXX_COMPILER_FLAG("-mcpu=native" COMPILER_SUPPORTS_MCPU_NATIVE)
|
||||
if(COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
add_compile_options(-mcpu=native)
|
||||
endif()
|
||||
else()
|
||||
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
if(COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
add_compile_options(-march=native)
|
||||
endif()
|
||||
endif()
|
||||
if(WIN32)
|
||||
add_compile_options(/arch:AVX2)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT LEGACY)
|
||||
set(USE_WAYLAND ON)
|
||||
else()
|
||||
set(USE_WAYLAND OFF)
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fexperimental-library>)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
add_definitions(-DNOMINMAX -DWIN32_LEAN_AND_MEAN -D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR)
|
||||
add_compile_options(/MP)
|
||||
endif()
|
||||
|
||||
if(EMSCRIPTEN)
|
||||
add_compile_options(-pthread -DIMGUI_IMPL_OPENGL_ES2)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug" AND NOT EMSCRIPTEN)
|
||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
find_program(MOLD_LINKER mold)
|
||||
if(MOLD_LINKER)
|
||||
set(CMAKE_LINKER_TYPE "MOLD")
|
||||
endif()
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
add_compile_options(-fno-eliminate-unused-debug-types)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_program(CCACHE ccache)
|
||||
if(CCACHE)
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
|
||||
endif()
|
||||
|
||||
file(GENERATE OUTPUT .gitignore CONTENT "*")
|
||||
|
||||
set(CMAKE_COLOR_DIAGNOSTICS ON)
|
||||
@@ -1,12 +0,0 @@
|
||||
diff --git a/extra_symbols.txt b/extra_symbols.txt
|
||||
index b95bb58..6b8f616 100644
|
||||
--- a/extra_symbols.txt
|
||||
+++ b/extra_symbols.txt
|
||||
@@ -1,3 +1,7 @@
|
||||
+glCompressedTexImage2D
|
||||
+GL_LINEAR_MIPMAP_LINEAR
|
||||
+GL_TEXTURE_WRAP_S
|
||||
+GL_TEXTURE_WRAP_T
|
||||
glReadPixels
|
||||
glClearColor
|
||||
glClear
|
||||
@@ -1,14 +0,0 @@
|
||||
diff '--color=auto' -ruN 72d8f61727dc878102157113d1998f86b852d20e/imconfig.h new/imconfig.h
|
||||
--- 72d8f61727dc878102157113d1998f86b852d20e/imconfig.h 2024-09-27 14:28:05.568760349 +0200
|
||||
+++ new/imconfig.h 2024-09-27 14:29:47.310243707 +0200
|
||||
@@ -113,6 +113,10 @@
|
||||
// Read about ImGuiBackendFlags_RendererHasVtxOffset for details.
|
||||
//#define ImDrawIdx unsigned int
|
||||
|
||||
+#ifdef __EMSCRIPTEN__
|
||||
+#define ImDrawIdx unsigned int
|
||||
+#endif
|
||||
+
|
||||
//---- Override ImDrawCallback signature (will need to modify renderer backends accordingly)
|
||||
//struct ImDrawList;
|
||||
//struct ImDrawCmd;
|
||||
@@ -1,56 +0,0 @@
|
||||
diff --git a/backends/imgui_impl_opengl3_loader.h b/backends/imgui_impl_opengl3_loader.h
|
||||
index d6ffa5a2d..e48372c64 100644
|
||||
--- a/backends/imgui_impl_opengl3_loader.h
|
||||
+++ b/backends/imgui_impl_opengl3_loader.h
|
||||
@@ -179,6 +179,7 @@ typedef khronos_uint8_t GLubyte;
|
||||
#define GL_VERSION 0x1F02
|
||||
#define GL_EXTENSIONS 0x1F03
|
||||
#define GL_LINEAR 0x2601
|
||||
+#define GL_LINEAR_MIPMAP_LINEAR 0x2703
|
||||
#define GL_TEXTURE_MAG_FILTER 0x2800
|
||||
#define GL_TEXTURE_MIN_FILTER 0x2801
|
||||
#define GL_TEXTURE_WRAP_S 0x2802
|
||||
@@ -241,8 +242,10 @@ GLAPI void APIENTRY glGenTextures (GLsizei n, GLuint *textures);
|
||||
#define GL_TEXTURE0 0x84C0
|
||||
#define GL_ACTIVE_TEXTURE 0x84E0
|
||||
typedef void (APIENTRYP PFNGLACTIVETEXTUREPROC) (GLenum texture);
|
||||
+typedef void (APIENTRYP PFNGLCOMPRESSEDTEXIMAGE2DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data);
|
||||
#ifdef GL_GLEXT_PROTOTYPES
|
||||
GLAPI void APIENTRY glActiveTexture (GLenum texture);
|
||||
+GLAPI void APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data);
|
||||
#endif
|
||||
#endif /* GL_VERSION_1_3 */
|
||||
#ifndef GL_VERSION_1_4
|
||||
@@ -478,7 +481,7 @@ GL3W_API GL3WglProc imgl3wGetProcAddress(const char *proc);
|
||||
|
||||
/* gl3w internal state */
|
||||
union ImGL3WProcs {
|
||||
- GL3WglProc ptr[59];
|
||||
+ GL3WglProc ptr[60];
|
||||
struct {
|
||||
PFNGLACTIVETEXTUREPROC ActiveTexture;
|
||||
PFNGLATTACHSHADERPROC AttachShader;
|
||||
@@ -494,6 +497,7 @@ union ImGL3WProcs {
|
||||
PFNGLCLEARPROC Clear;
|
||||
PFNGLCLEARCOLORPROC ClearColor;
|
||||
PFNGLCOMPILESHADERPROC CompileShader;
|
||||
+ PFNGLCOMPRESSEDTEXIMAGE2DPROC CompressedTexImage2D;
|
||||
PFNGLCREATEPROGRAMPROC CreateProgram;
|
||||
PFNGLCREATESHADERPROC CreateShader;
|
||||
PFNGLDELETEBUFFERSPROC DeleteBuffers;
|
||||
@@ -559,6 +563,7 @@ GL3W_API extern union ImGL3WProcs imgl3wProcs;
|
||||
#define glClear imgl3wProcs.gl.Clear
|
||||
#define glClearColor imgl3wProcs.gl.ClearColor
|
||||
#define glCompileShader imgl3wProcs.gl.CompileShader
|
||||
+#define glCompressedTexImage2D imgl3wProcs.gl.CompressedTexImage2D
|
||||
#define glCreateProgram imgl3wProcs.gl.CreateProgram
|
||||
#define glCreateShader imgl3wProcs.gl.CreateShader
|
||||
#define glDeleteBuffers imgl3wProcs.gl.DeleteBuffers
|
||||
@@ -854,6 +859,7 @@ static const char *proc_names[] = {
|
||||
"glClear",
|
||||
"glClearColor",
|
||||
"glCompileShader",
|
||||
+ "glCompressedTexImage2D",
|
||||
"glCreateProgram",
|
||||
"glCreateShader",
|
||||
"glDeleteBuffers",
|
||||
@@ -1,14 +0,0 @@
|
||||
diff --git i/include/ppqsort/parameters.h w/include/ppqsort/parameters.h
|
||||
index 115c3a1..3f4b669 100644
|
||||
--- i/include/ppqsort/parameters.h
|
||||
+++ w/include/ppqsort/parameters.h
|
||||
@@ -3,7 +3,8 @@
|
||||
#include <bit>
|
||||
#include <execution>
|
||||
|
||||
-#ifndef NDEBUG
|
||||
+//#ifndef NDEBUG
|
||||
+#if 0
|
||||
#include <bitset>
|
||||
#include <iostream>
|
||||
#include <syncstream>
|
||||
@@ -1,35 +0,0 @@
|
||||
set(TRACY_COMMON_DIR ${CMAKE_CURRENT_LIST_DIR}/../public/common)
|
||||
|
||||
set(TRACY_COMMON_SOURCES
|
||||
tracy_lz4.cpp
|
||||
tracy_lz4hc.cpp
|
||||
TracySocket.cpp
|
||||
TracyStackFrames.cpp
|
||||
TracySystem.cpp
|
||||
)
|
||||
|
||||
list(TRANSFORM TRACY_COMMON_SOURCES PREPEND "${TRACY_COMMON_DIR}/")
|
||||
|
||||
|
||||
set(TRACY_SERVER_DIR ${CMAKE_CURRENT_LIST_DIR}/../server)
|
||||
|
||||
set(TRACY_SERVER_SOURCES
|
||||
TracyMemory.cpp
|
||||
TracyMmap.cpp
|
||||
TracyPrint.cpp
|
||||
TracySysUtil.cpp
|
||||
TracyTaskDispatch.cpp
|
||||
TracyTextureCompression.cpp
|
||||
TracyThreadCompress.cpp
|
||||
TracyWorker.cpp
|
||||
)
|
||||
|
||||
list(TRANSFORM TRACY_SERVER_SOURCES PREPEND "${TRACY_SERVER_DIR}/")
|
||||
|
||||
|
||||
add_library(TracyServer STATIC EXCLUDE_FROM_ALL ${TRACY_COMMON_SOURCES} ${TRACY_SERVER_SOURCES})
|
||||
target_include_directories(TracyServer PUBLIC ${TRACY_COMMON_DIR} ${TRACY_SERVER_DIR})
|
||||
target_link_libraries(TracyServer PUBLIC TracyCapstone libzstd PPQSort::PPQSort)
|
||||
if(NO_STATISTICS)
|
||||
target_compile_definitions(TracyServer PUBLIC TRACY_NO_STATISTICS)
|
||||
endif()
|
||||
@@ -1,193 +0,0 @@
|
||||
# Vendor Specific CMake
|
||||
# The Tracy project keeps most vendor source locally
|
||||
|
||||
set (ROOT_DIR "${CMAKE_CURRENT_LIST_DIR}/../")
|
||||
|
||||
# Dependencies are taken from the system first and if not found, they are pulled with CPM and built from source
|
||||
|
||||
include(FindPkgConfig)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/CPM.cmake)
|
||||
|
||||
option(DOWNLOAD_CAPSTONE "Force download capstone" ON)
|
||||
option(DOWNLOAD_GLFW "Force download glfw" OFF)
|
||||
option(DOWNLOAD_FREETYPE "Force download freetype" OFF)
|
||||
|
||||
# capstone
|
||||
|
||||
pkg_check_modules(CAPSTONE capstone)
|
||||
if(CAPSTONE_FOUND AND NOT DOWNLOAD_CAPSTONE)
|
||||
message(STATUS "Capstone found: ${CAPSTONE}")
|
||||
add_library(TracyCapstone INTERFACE)
|
||||
target_include_directories(TracyCapstone INTERFACE ${CAPSTONE_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyCapstone INTERFACE ${CAPSTONE_LINK_LIBRARIES})
|
||||
else()
|
||||
CPMAddPackage(
|
||||
NAME capstone
|
||||
GITHUB_REPOSITORY capstone-engine/capstone
|
||||
GIT_TAG 6.0.0-Alpha1
|
||||
OPTIONS
|
||||
"CAPSTONE_X86_ATT_DISABLE ON"
|
||||
"CAPSTONE_ALPHA_SUPPORT OFF"
|
||||
"CAPSTONE_HPPA_SUPPORT OFF"
|
||||
"CAPSTONE_LOONGARCH_SUPPORT OFF"
|
||||
"CAPSTONE_M680X_SUPPORT OFF"
|
||||
"CAPSTONE_M68K_SUPPORT OFF"
|
||||
"CAPSTONE_MIPS_SUPPORT OFF"
|
||||
"CAPSTONE_MOS65XX_SUPPORT OFF"
|
||||
"CAPSTONE_PPC_SUPPORT OFF"
|
||||
"CAPSTONE_SPARC_SUPPORT OFF"
|
||||
"CAPSTONE_SYSTEMZ_SUPPORT OFF"
|
||||
"CAPSTONE_XCORE_SUPPORT OFF"
|
||||
"CAPSTONE_TRICORE_SUPPORT OFF"
|
||||
"CAPSTONE_TMS320C64X_SUPPORT OFF"
|
||||
"CAPSTONE_M680X_SUPPORT OFF"
|
||||
"CAPSTONE_EVM_SUPPORT OFF"
|
||||
"CAPSTONE_WASM_SUPPORT OFF"
|
||||
"CAPSTONE_BPF_SUPPORT OFF"
|
||||
"CAPSTONE_RISCV_SUPPORT OFF"
|
||||
"CAPSTONE_SH_SUPPORT OFF"
|
||||
"CAPSTONE_XTENSA_SUPPORT OFF"
|
||||
"CAPSTONE_BUILD_MACOS_THIN ON"
|
||||
EXCLUDE_FROM_ALL TRUE
|
||||
)
|
||||
add_library(TracyCapstone INTERFACE)
|
||||
target_include_directories(TracyCapstone INTERFACE ${capstone_SOURCE_DIR}/include/capstone)
|
||||
target_link_libraries(TracyCapstone INTERFACE capstone)
|
||||
endif()
|
||||
|
||||
# GLFW
|
||||
|
||||
if(NOT USE_WAYLAND AND NOT EMSCRIPTEN)
|
||||
pkg_check_modules(GLFW glfw3)
|
||||
if (GLFW_FOUND AND NOT DOWNLOAD_GLFW)
|
||||
add_library(TracyGlfw3 INTERFACE)
|
||||
target_include_directories(TracyGlfw3 INTERFACE ${GLFW_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyGlfw3 INTERFACE ${GLFW_LINK_LIBRARIES})
|
||||
else()
|
||||
CPMAddPackage(
|
||||
NAME glfw
|
||||
GITHUB_REPOSITORY glfw/glfw
|
||||
GIT_TAG 3.4
|
||||
OPTIONS
|
||||
"GLFW_BUILD_EXAMPLES OFF"
|
||||
"GLFW_BUILD_TESTS OFF"
|
||||
"GLFW_BUILD_DOCS OFF"
|
||||
"GLFW_INSTALL OFF"
|
||||
EXCLUDE_FROM_ALL TRUE
|
||||
)
|
||||
add_library(TracyGlfw3 INTERFACE)
|
||||
target_link_libraries(TracyGlfw3 INTERFACE glfw)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# freetype
|
||||
|
||||
pkg_check_modules(FREETYPE freetype2)
|
||||
if (FREETYPE_FOUND AND NOT DOWNLOAD_FREETYPE)
|
||||
add_library(TracyFreetype INTERFACE)
|
||||
target_include_directories(TracyFreetype INTERFACE ${FREETYPE_INCLUDE_DIRS})
|
||||
target_link_libraries(TracyFreetype INTERFACE ${FREETYPE_LINK_LIBRARIES})
|
||||
else()
|
||||
CPMAddPackage(
|
||||
NAME freetype
|
||||
GITHUB_REPOSITORY freetype/freetype
|
||||
GIT_TAG VER-2-13-3
|
||||
OPTIONS
|
||||
"FT_DISABLE_HARFBUZZ ON"
|
||||
"FT_WITH_HARFBUZZ OFF"
|
||||
EXCLUDE_FROM_ALL TRUE
|
||||
)
|
||||
add_library(TracyFreetype INTERFACE)
|
||||
target_link_libraries(TracyFreetype INTERFACE freetype)
|
||||
endif()
|
||||
|
||||
# Zstd
|
||||
|
||||
CPMAddPackage(
|
||||
NAME zstd
|
||||
GITHUB_REPOSITORY facebook/zstd
|
||||
GIT_TAG v1.5.7
|
||||
OPTIONS
|
||||
"ZSTD_BUILD_SHARED OFF"
|
||||
EXCLUDE_FROM_ALL TRUE
|
||||
SOURCE_SUBDIR build/cmake
|
||||
)
|
||||
|
||||
# Diff Template Library
|
||||
|
||||
set(DTL_DIR "${ROOT_DIR}/dtl")
|
||||
file(GLOB_RECURSE DTL_HEADERS CONFIGURE_DEPENDS RELATIVE ${DTL_DIR} "*.hpp")
|
||||
add_library(TracyDtl INTERFACE)
|
||||
target_sources(TracyDtl INTERFACE ${DTL_HEADERS})
|
||||
target_include_directories(TracyDtl INTERFACE ${DTL_DIR})
|
||||
|
||||
# Get Opt
|
||||
|
||||
set(GETOPT_DIR "${ROOT_DIR}/getopt")
|
||||
set(GETOPT_SOURCES ${GETOPT_DIR}/getopt.c)
|
||||
set(GETOPT_HEADERS ${GETOPT_DIR}/getopt.h)
|
||||
add_library(TracyGetOpt STATIC EXCLUDE_FROM_ALL ${GETOPT_SOURCES} ${GETOPT_HEADERS})
|
||||
target_include_directories(TracyGetOpt PUBLIC ${GETOPT_DIR})
|
||||
|
||||
# ImGui
|
||||
|
||||
CPMAddPackage(
|
||||
NAME ImGui
|
||||
GITHUB_REPOSITORY ocornut/imgui
|
||||
GIT_TAG v1.91.9b-docking
|
||||
DOWNLOAD_ONLY TRUE
|
||||
PATCHES
|
||||
"${CMAKE_CURRENT_LIST_DIR}/imgui-emscripten.patch"
|
||||
"${CMAKE_CURRENT_LIST_DIR}/imgui-loader.patch"
|
||||
)
|
||||
|
||||
set(IMGUI_SOURCES
|
||||
imgui_widgets.cpp
|
||||
imgui_draw.cpp
|
||||
imgui_demo.cpp
|
||||
imgui.cpp
|
||||
imgui_tables.cpp
|
||||
misc/freetype/imgui_freetype.cpp
|
||||
backends/imgui_impl_opengl3.cpp
|
||||
)
|
||||
|
||||
list(TRANSFORM IMGUI_SOURCES PREPEND "${ImGui_SOURCE_DIR}/")
|
||||
|
||||
add_library(TracyImGui STATIC EXCLUDE_FROM_ALL ${IMGUI_SOURCES})
|
||||
target_include_directories(TracyImGui PUBLIC ${ImGui_SOURCE_DIR})
|
||||
target_link_libraries(TracyImGui PUBLIC TracyFreetype)
|
||||
target_compile_definitions(TracyImGui PRIVATE "IMGUI_ENABLE_FREETYPE")
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
target_compile_definitions(TracyImGui PRIVATE "IMGUI_DISABLE_DEBUG_TOOLS")
|
||||
endif()
|
||||
|
||||
# NFD
|
||||
|
||||
if(NOT NO_FILESELECTOR AND NOT EMSCRIPTEN)
|
||||
if(GTK_FILESELECTOR)
|
||||
set(NFD_PORTAL OFF)
|
||||
else()
|
||||
set(NFD_PORTAL ON)
|
||||
endif()
|
||||
|
||||
CPMAddPackage(
|
||||
NAME nfd
|
||||
GITHUB_REPOSITORY btzy/nativefiledialog-extended
|
||||
GIT_TAG v1.2.1
|
||||
EXCLUDE_FROM_ALL TRUE
|
||||
OPTIONS
|
||||
"NFD_PORTAL ${NFD_PORTAL}"
|
||||
)
|
||||
endif()
|
||||
|
||||
# PPQSort
|
||||
|
||||
CPMAddPackage(
|
||||
NAME PPQSort
|
||||
GITHUB_REPOSITORY GabTux/PPQSort
|
||||
VERSION 1.0.5
|
||||
PATCHES
|
||||
"${CMAKE_CURRENT_LIST_DIR}/ppqsort-nodebug.patch"
|
||||
EXCLUDE_FROM_ALL TRUE
|
||||
)
|
||||
@@ -1,24 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
message("Parsing public/common/TracyVersion.hpp file")
|
||||
|
||||
file(READ "${CMAKE_CURRENT_LIST_DIR}/../public/common/TracyVersion.hpp" version)
|
||||
|
||||
# Note: This looks for a specific pattern in TracyVersion.hpp, if it changes
|
||||
# this needs updating.
|
||||
string(REGEX MATCH "Major = ([0-9]+)" _ ${version})
|
||||
|
||||
# This works do to the above () subexpression selection. See
|
||||
# https://cmake.org/cmake/help/latest/command/string.html#regex-match for more
|
||||
# details
|
||||
set(TRACY_VERSION_MAJOR ${CMAKE_MATCH_1})
|
||||
|
||||
string(REGEX MATCH "Minor = ([0-9]+)" _ ${version})
|
||||
set(TRACY_VERSION_MINOR ${CMAKE_MATCH_1})
|
||||
|
||||
string(REGEX MATCH "Patch = ([0-9]+)" _ ${version})
|
||||
set(TRACY_VERSION_PATCH ${CMAKE_MATCH_1})
|
||||
|
||||
set(TRACY_VERSION_STRING "${TRACY_VERSION_MAJOR}.${TRACY_VERSION_MINOR}.${TRACY_VERSION_PATCH}")
|
||||
|
||||
message("VERSION ${TRACY_VERSION_STRING}")
|
||||
31
common/TracyAlloc.hpp
Normal file
@@ -0,0 +1,31 @@
|
||||
#ifndef __TRACYALLOC_HPP__
|
||||
#define __TRACYALLOC_HPP__
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# include "../client/tracy_rpmalloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static inline void* tracy_malloc( size_t size )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
return rpmalloc( size );
|
||||
#else
|
||||
return malloc( size );
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void tracy_free( void* ptr )
|
||||
{
|
||||
#ifdef TRACY_ENABLE
|
||||
rpfree( ptr );
|
||||
#else
|
||||
free( ptr );
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -2,7 +2,7 @@
|
||||
#define __TRACYFORCEINLINE_HPP__
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define tracy_force_inline __attribute__((always_inline)) inline
|
||||
# define tracy_force_inline __attribute__((always_inline))
|
||||
#elif defined(_MSC_VER)
|
||||
# define tracy_force_inline __forceinline
|
||||
#else
|
||||
47
common/TracyProtocol.hpp
Normal file
@@ -0,0 +1,47 @@
|
||||
#ifndef __TRACYPROTOCOL_HPP__
|
||||
#define __TRACYPROTOCOL_HPP__
|
||||
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../common/tracy_lz4.hpp"
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
using lz4sz_t = uint32_t;
|
||||
|
||||
enum { TargetFrameSize = 256 * 1024 };
|
||||
enum { LZ4Size = LZ4_COMPRESSBOUND( TargetFrameSize ) };
|
||||
static_assert( LZ4Size <= std::numeric_limits<lz4sz_t>::max(), "LZ4Size greater than lz4sz_t" );
|
||||
static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" );
|
||||
|
||||
enum ServerQuery : uint8_t
|
||||
{
|
||||
ServerQueryTerminate,
|
||||
ServerQueryString,
|
||||
ServerQueryThreadString,
|
||||
ServerQuerySourceLocation,
|
||||
ServerQueryPlotName,
|
||||
};
|
||||
|
||||
enum { WelcomeMessageProgramNameSize = 64 };
|
||||
|
||||
#pragma pack( 1 )
|
||||
struct WelcomeMessage
|
||||
{
|
||||
double timerMul;
|
||||
uint64_t initBegin;
|
||||
uint64_t initEnd;
|
||||
uint64_t delay;
|
||||
uint64_t resolution;
|
||||
uint64_t epoch;
|
||||
char programName[WelcomeMessageProgramNameSize];
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
enum { WelcomeMessageSize = sizeof( WelcomeMessage ) };
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
268
common/TracyQueue.hpp
Normal file
@@ -0,0 +1,268 @@
|
||||
#ifndef __TRACYQUEUE_HPP__
|
||||
#define __TRACYQUEUE_HPP__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
enum class QueueType : uint8_t
|
||||
{
|
||||
ZoneText,
|
||||
Message,
|
||||
ZoneBeginAllocSrcLoc,
|
||||
Terminate,
|
||||
ZoneBegin,
|
||||
ZoneEnd,
|
||||
FrameMarkMsg,
|
||||
SourceLocation,
|
||||
LockAnnounce,
|
||||
LockWait,
|
||||
LockObtain,
|
||||
LockRelease,
|
||||
LockSharedWait,
|
||||
LockSharedObtain,
|
||||
LockSharedRelease,
|
||||
LockMark,
|
||||
PlotData,
|
||||
MessageLiteral,
|
||||
GpuNewContext,
|
||||
GpuZoneBegin,
|
||||
GpuZoneEnd,
|
||||
GpuTime,
|
||||
GpuResync,
|
||||
StringData,
|
||||
ThreadName,
|
||||
CustomStringData,
|
||||
PlotName,
|
||||
SourceLocationPayload,
|
||||
NUM_TYPES
|
||||
};
|
||||
|
||||
#pragma pack( 1 )
|
||||
|
||||
struct QueueZoneBegin
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
uint64_t srcloc; // ptr
|
||||
uint32_t cpu;
|
||||
};
|
||||
|
||||
struct QueueZoneEnd
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
uint32_t cpu;
|
||||
};
|
||||
|
||||
struct QueueStringTransfer
|
||||
{
|
||||
uint64_t ptr;
|
||||
};
|
||||
|
||||
struct QueueFrameMark
|
||||
{
|
||||
int64_t time;
|
||||
};
|
||||
|
||||
struct QueueSourceLocation
|
||||
{
|
||||
uint64_t name;
|
||||
uint64_t function; // ptr
|
||||
uint64_t file; // ptr
|
||||
uint32_t line;
|
||||
uint8_t r;
|
||||
uint8_t g;
|
||||
uint8_t b;
|
||||
};
|
||||
|
||||
struct QueueZoneText
|
||||
{
|
||||
uint64_t thread;
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
enum class LockType : uint8_t
|
||||
{
|
||||
Lockable,
|
||||
SharedLockable
|
||||
};
|
||||
|
||||
struct QueueLockAnnounce
|
||||
{
|
||||
uint32_t id;
|
||||
uint64_t lckloc; // ptr
|
||||
LockType type;
|
||||
};
|
||||
|
||||
struct QueueLockWait
|
||||
{
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
LockType type;
|
||||
};
|
||||
|
||||
struct QueueLockObtain
|
||||
{
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
};
|
||||
|
||||
struct QueueLockRelease
|
||||
{
|
||||
uint32_t id;
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
};
|
||||
|
||||
struct QueueLockMark
|
||||
{
|
||||
uint32_t id;
|
||||
uint64_t thread;
|
||||
uint64_t srcloc; // ptr
|
||||
};
|
||||
|
||||
enum class PlotDataType : uint8_t
|
||||
{
|
||||
Float,
|
||||
Double,
|
||||
Int
|
||||
};
|
||||
|
||||
struct QueuePlotData
|
||||
{
|
||||
uint64_t name; // ptr
|
||||
int64_t time;
|
||||
PlotDataType type;
|
||||
union
|
||||
{
|
||||
double d;
|
||||
float f;
|
||||
int64_t i;
|
||||
} data;
|
||||
};
|
||||
|
||||
struct QueueMessage
|
||||
{
|
||||
int64_t time;
|
||||
uint64_t thread;
|
||||
uint64_t text; // ptr
|
||||
};
|
||||
|
||||
struct QueueGpuNewContext
|
||||
{
|
||||
int64_t cpuTime;
|
||||
int64_t gpuTime;
|
||||
uint64_t thread;
|
||||
uint16_t context;
|
||||
uint8_t accuracyBits;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneBegin
|
||||
{
|
||||
int64_t cpuTime;
|
||||
uint64_t srcloc;
|
||||
uint16_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuZoneEnd
|
||||
{
|
||||
int64_t cpuTime;
|
||||
uint16_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuTime
|
||||
{
|
||||
int64_t gpuTime;
|
||||
uint16_t context;
|
||||
};
|
||||
|
||||
struct QueueGpuResync
|
||||
{
|
||||
int64_t cpuTime;
|
||||
int64_t gpuTime;
|
||||
uint16_t context;
|
||||
};
|
||||
|
||||
struct QueueHeader
|
||||
{
|
||||
union
|
||||
{
|
||||
QueueType type;
|
||||
uint8_t idx;
|
||||
};
|
||||
};
|
||||
|
||||
struct QueueItem
|
||||
{
|
||||
QueueHeader hdr;
|
||||
union
|
||||
{
|
||||
QueueZoneBegin zoneBegin;
|
||||
QueueZoneEnd zoneEnd;
|
||||
QueueStringTransfer stringTransfer;
|
||||
QueueFrameMark frameMark;
|
||||
QueueSourceLocation srcloc;
|
||||
QueueZoneText zoneText;
|
||||
QueueLockAnnounce lockAnnounce;
|
||||
QueueLockWait lockWait;
|
||||
QueueLockObtain lockObtain;
|
||||
QueueLockRelease lockRelease;
|
||||
QueueLockMark lockMark;
|
||||
QueuePlotData plotData;
|
||||
QueueMessage message;
|
||||
QueueGpuNewContext gpuNewContext;
|
||||
QueueGpuZoneBegin gpuZoneBegin;
|
||||
QueueGpuZoneEnd gpuZoneEnd;
|
||||
QueueGpuTime gpuTime;
|
||||
QueueGpuResync gpuResync;
|
||||
};
|
||||
};
|
||||
|
||||
#pragma pack()
|
||||
|
||||
enum { QueueItemSize = sizeof( QueueItem ) };
|
||||
|
||||
static const size_t QueueDataSize[] = {
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneText ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location
|
||||
// above items must be first
|
||||
sizeof( QueueHeader ), // terminate
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueZoneEnd ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueFrameMark ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueSourceLocation ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockWait ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockObtain ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockRelease ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockWait ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockObtain ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockRelease ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueLockMark ),
|
||||
sizeof( QueueHeader ) + sizeof( QueuePlotData ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuTime ),
|
||||
sizeof( QueueHeader ) + sizeof( QueueGpuResync ),
|
||||
// keep all QueueStringTransfer below
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // custom string data
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name
|
||||
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload
|
||||
};
|
||||
|
||||
static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
|
||||
static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType::NUM_TYPES, "QueueDataSize mismatch" );
|
||||
static_assert( sizeof( void* ) <= sizeof( uint64_t ), "Pointer size > 8 bytes" );
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
286
common/TracySocket.cpp
Normal file
@@ -0,0 +1,286 @@
|
||||
#include <assert.h>
|
||||
#include <new>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "TracyAlloc.hpp"
|
||||
#include "TracySocket.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# include <winsock2.h>
|
||||
# include <ws2tcpip.h>
|
||||
#else
|
||||
# include <sys/socket.h>
|
||||
# include <netdb.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifndef MSG_NOSIGNAL
|
||||
# define MSG_NOSIGNAL 0
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef _MSC_VER
|
||||
struct __wsinit
|
||||
{
|
||||
__wsinit()
|
||||
{
|
||||
WSADATA wsaData;
|
||||
if( WSAStartup( MAKEWORD( 2, 2 ), &wsaData ) != 0 )
|
||||
{
|
||||
fprintf( stderr, "Cannot init winsock.\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static __wsinit InitWinSock()
|
||||
{
|
||||
static __wsinit init;
|
||||
return init;
|
||||
}
|
||||
#endif
|
||||
|
||||
Socket::Socket()
|
||||
: m_sock( -1 )
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
InitWinSock();
|
||||
#endif
|
||||
}
|
||||
|
||||
Socket::Socket( int sock )
|
||||
: m_sock( sock )
|
||||
{
|
||||
}
|
||||
|
||||
Socket::~Socket()
|
||||
{
|
||||
if( m_sock != -1 )
|
||||
{
|
||||
Close();
|
||||
}
|
||||
}
|
||||
|
||||
bool Socket::Connect( const char* addr, const char* port )
|
||||
{
|
||||
assert( m_sock == -1 );
|
||||
|
||||
struct addrinfo hints;
|
||||
struct addrinfo *res, *ptr;
|
||||
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = AF_UNSPEC;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
|
||||
if( getaddrinfo( addr, port, &hints, &res ) != 0 ) return false;
|
||||
int sock;
|
||||
for( ptr = res; ptr; ptr = ptr->ai_next )
|
||||
{
|
||||
if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
|
||||
#if defined __APPLE__
|
||||
int val = 1;
|
||||
setsockopt( m_sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 )
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
closesocket( sock );
|
||||
#else
|
||||
close( sock );
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
freeaddrinfo( res );
|
||||
if( !ptr ) return false;
|
||||
|
||||
m_sock = sock;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Socket::Close()
|
||||
{
|
||||
assert( m_sock != -1 );
|
||||
#ifdef _MSC_VER
|
||||
closesocket( m_sock );
|
||||
#else
|
||||
close( m_sock );
|
||||
#endif
|
||||
m_sock = -1;
|
||||
}
|
||||
|
||||
int Socket::Send( const void* _buf, int len )
|
||||
{
|
||||
auto buf = (const char*)_buf;
|
||||
assert( m_sock != -1 );
|
||||
auto start = buf;
|
||||
while( len > 0 )
|
||||
{
|
||||
auto ret = send( m_sock, buf, len, MSG_NOSIGNAL );
|
||||
if( ret == -1 ) return -1;
|
||||
len -= ret;
|
||||
buf += ret;
|
||||
}
|
||||
return int( buf - start );
|
||||
}
|
||||
|
||||
int Socket::Recv( void* _buf, int len, const timeval* tv )
|
||||
{
|
||||
auto buf = (char*)_buf;
|
||||
|
||||
fd_set fds;
|
||||
FD_ZERO( &fds );
|
||||
FD_SET( m_sock, &fds );
|
||||
|
||||
#ifndef _WIN32
|
||||
timeval _tv = *tv;
|
||||
select( m_sock+1, &fds, nullptr, nullptr, &_tv );
|
||||
#else
|
||||
select( m_sock+1, &fds, nullptr, nullptr, tv );
|
||||
#endif
|
||||
if( FD_ISSET( m_sock, &fds ) )
|
||||
{
|
||||
return recv( m_sock, buf, len, 0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
bool Socket::Read( void* _buf, int len, const timeval* tv, std::function< bool() > exitCb )
|
||||
{
|
||||
auto buf = (char*)_buf;
|
||||
|
||||
while( len > 0 )
|
||||
{
|
||||
if( exitCb() ) return false;
|
||||
const auto sz = Recv( buf, len, tv );
|
||||
switch( sz )
|
||||
{
|
||||
case 0:
|
||||
return false;
|
||||
case -1:
|
||||
#ifdef _WIN32
|
||||
{
|
||||
auto err = WSAGetLastError();
|
||||
if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
len -= sz;
|
||||
buf += sz;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Socket::HasData()
|
||||
{
|
||||
struct timeval tv;
|
||||
memset( &tv, 0, sizeof( tv ) );
|
||||
|
||||
fd_set fds;
|
||||
FD_ZERO( &fds );
|
||||
FD_SET( m_sock, &fds );
|
||||
|
||||
select( m_sock+1, &fds, nullptr, nullptr, &tv );
|
||||
return FD_ISSET( m_sock, &fds );
|
||||
}
|
||||
|
||||
|
||||
ListenSocket::ListenSocket()
|
||||
: m_sock( -1 )
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
InitWinSock();
|
||||
#endif
|
||||
}
|
||||
|
||||
ListenSocket::~ListenSocket()
|
||||
{
|
||||
}
|
||||
|
||||
bool ListenSocket::Listen( const char* port, int backlog )
|
||||
{
|
||||
assert( m_sock == -1 );
|
||||
|
||||
struct addrinfo* res;
|
||||
struct addrinfo hints;
|
||||
|
||||
memset( &hints, 0, sizeof( hints ) );
|
||||
hints.ai_family = AF_INET6;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
hints.ai_flags = AI_PASSIVE;
|
||||
|
||||
if( getaddrinfo( nullptr, port, &hints, &res ) != 0 ) return false;
|
||||
|
||||
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
|
||||
#if defined _MSC_VER || defined __CYGWIN__
|
||||
unsigned long val = 0;
|
||||
setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
|
||||
#endif
|
||||
if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) return false;
|
||||
if( listen( m_sock, backlog ) == -1 ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
Socket* ListenSocket::Accept()
|
||||
{
|
||||
struct sockaddr_storage remote;
|
||||
socklen_t sz = sizeof( remote );
|
||||
|
||||
struct timeval tv;
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 10000;
|
||||
|
||||
fd_set fds;
|
||||
FD_ZERO( &fds );
|
||||
FD_SET( m_sock, &fds );
|
||||
|
||||
select( m_sock+1, &fds, nullptr, nullptr, &tv );
|
||||
if( FD_ISSET( m_sock, &fds ) )
|
||||
{
|
||||
int sock = accept( m_sock, (sockaddr*)&remote, &sz);
|
||||
#if defined __APPLE__
|
||||
int val = 1;
|
||||
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
|
||||
#endif
|
||||
if( sock == -1 )
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto ptr = (Socket*)tracy_malloc( sizeof( Socket ) );
|
||||
new(ptr) Socket( sock );
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void ListenSocket::Close()
|
||||
{
|
||||
assert( m_sock != -1 );
|
||||
#ifdef _MSC_VER
|
||||
closesocket( m_sock );
|
||||
#else
|
||||
close( m_sock );
|
||||
#endif
|
||||
m_sock = -1;
|
||||
}
|
||||
|
||||
}
|
||||
57
common/TracySocket.hpp
Normal file
@@ -0,0 +1,57 @@
|
||||
#ifndef __TRACYSOCKET_HPP__
|
||||
#define __TRACYSOCKET_HPP__
|
||||
|
||||
#include <functional>
|
||||
|
||||
struct timeval;
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
class Socket
|
||||
{
|
||||
public:
|
||||
Socket();
|
||||
Socket( int sock );
|
||||
~Socket();
|
||||
|
||||
bool Connect( const char* addr, const char* port );
|
||||
void Close();
|
||||
|
||||
int Send( const void* buf, int len );
|
||||
int Recv( void* buf, int len, const timeval* tv );
|
||||
|
||||
bool Read( void* buf, int len, const timeval* tv, std::function< bool() > exitCb );
|
||||
bool HasData();
|
||||
|
||||
Socket( const Socket& ) = delete;
|
||||
Socket( Socket&& ) = delete;
|
||||
Socket& operator=( const Socket& ) = delete;
|
||||
Socket& operator=( Socket&& ) = delete;
|
||||
|
||||
private:
|
||||
int m_sock;
|
||||
};
|
||||
|
||||
class ListenSocket
|
||||
{
|
||||
public:
|
||||
ListenSocket();
|
||||
~ListenSocket();
|
||||
|
||||
bool Listen( const char* port, int backlog );
|
||||
Socket* Accept();
|
||||
void Close();
|
||||
|
||||
ListenSocket( const ListenSocket& ) = delete;
|
||||
ListenSocket( ListenSocket&& ) = delete;
|
||||
ListenSocket& operator=( const ListenSocket& ) = delete;
|
||||
ListenSocket& operator=( ListenSocket&& ) = delete;
|
||||
|
||||
private:
|
||||
int m_sock;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
147
common/TracySystem.cpp
Normal file
@@ -0,0 +1,147 @@
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#else
|
||||
# include <pthread.h>
|
||||
# include <string.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "TracySystem.hpp"
|
||||
|
||||
#ifdef TRACY_COLLECT_THREAD_NAMES
|
||||
# include <atomic>
|
||||
# include "TracyAlloc.hpp"
|
||||
#endif
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
#ifdef TRACY_COLLECT_THREAD_NAMES
|
||||
struct ThreadNameData
|
||||
{
|
||||
uint64_t id;
|
||||
const char* name;
|
||||
ThreadNameData* next;
|
||||
};
|
||||
extern std::atomic<ThreadNameData*> s_threadNameData;
|
||||
#endif
|
||||
|
||||
void SetThreadName( std::thread& thread, const char* name )
|
||||
{
|
||||
SetThreadName( thread.native_handle(), name );
|
||||
}
|
||||
|
||||
void SetThreadName( std::thread::native_handle_type handle, const char* name )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
# ifdef NTDDI_WIN10_RS2
|
||||
wchar_t buf[256];
|
||||
mbstowcs( buf, name, 256 );
|
||||
SetThreadDescription( static_cast<HANDLE>( handle ), buf );
|
||||
# else
|
||||
const DWORD MS_VC_EXCEPTION=0x406D1388;
|
||||
# pragma pack( push, 8 )
|
||||
struct THREADNAME_INFO
|
||||
{
|
||||
DWORD dwType;
|
||||
LPCSTR szName;
|
||||
DWORD dwThreadID;
|
||||
DWORD dwFlags;
|
||||
};
|
||||
# pragma pack(pop)
|
||||
|
||||
DWORD ThreadId = GetThreadId( static_cast<HANDLE>( handle ) );
|
||||
THREADNAME_INFO info;
|
||||
info.dwType = 0x1000;
|
||||
info.szName = name;
|
||||
info.dwThreadID = ThreadId;
|
||||
info.dwFlags = 0;
|
||||
|
||||
__try
|
||||
{
|
||||
RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
|
||||
}
|
||||
__except(EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
}
|
||||
# endif
|
||||
#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__
|
||||
const auto sz = strlen( name );
|
||||
if( sz <= 15 )
|
||||
{
|
||||
pthread_setname_np( handle, name );
|
||||
}
|
||||
else
|
||||
{
|
||||
char buf[16];
|
||||
memcpy( buf, name, 15 );
|
||||
buf[15] = '\0';
|
||||
pthread_setname_np( handle, buf );
|
||||
}
|
||||
#endif
|
||||
#ifdef TRACY_COLLECT_THREAD_NAMES
|
||||
{
|
||||
const auto sz = strlen( name );
|
||||
char* buf = (char*)tracy_malloc( sz+1 );
|
||||
memcpy( buf, name, sz );
|
||||
buf[sz+1] = '\0';
|
||||
auto data = (ThreadNameData*)tracy_malloc( sizeof( ThreadNameData ) );
|
||||
# ifdef _WIN32
|
||||
data->id = GetThreadId( static_cast<HANDLE>( handle ) );
|
||||
# elif defined __APPLE__
|
||||
pthread_threadid_np( handle, &data->id );
|
||||
# else
|
||||
data->id = (uint64_t)handle;
|
||||
# endif
|
||||
data->name = buf;
|
||||
data->next = s_threadNameData.load( std::memory_order_relaxed );
|
||||
while( !s_threadNameData.compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
const char* GetThreadName( uint64_t id )
|
||||
{
|
||||
static char buf[256];
|
||||
#ifdef TRACY_COLLECT_THREAD_NAMES
|
||||
auto ptr = s_threadNameData.load( std::memory_order_relaxed );
|
||||
while( ptr )
|
||||
{
|
||||
if( ptr->id == id )
|
||||
{
|
||||
strcpy( buf, ptr->name );
|
||||
return buf;
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
#else
|
||||
# ifdef _WIN32
|
||||
# ifdef NTDDI_WIN10_RS2
|
||||
auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
|
||||
if( hnd != 0 )
|
||||
{
|
||||
PWSTR tmp;
|
||||
GetThreadDescription( hnd, &tmp );
|
||||
auto ret = wcstombs( buf, tmp, 256 );
|
||||
CloseHandle( hnd );
|
||||
if( ret != 0 )
|
||||
{
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
# elif defined _GNU_SOURCE && !defined __ANDROID__ && !defined __EMSCRIPTEN__
|
||||
if( pthread_getname_np( (pthread_t)id, buf, 256 ) == 0 )
|
||||
{
|
||||
return buf;
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
sprintf( buf, "%" PRIu64, id );
|
||||
return buf;
|
||||
}
|
||||
|
||||
}
|
||||
43
common/TracySystem.hpp
Normal file
@@ -0,0 +1,43 @@
|
||||
#ifndef __TRACYSYSTEM_HPP__
|
||||
#define __TRACYSYSTEM_HPP__
|
||||
|
||||
#ifdef TRACY_ENABLE
|
||||
# if defined __ANDROID__ || defined __CYGWIN__ || defined __APPLE__
|
||||
# define TRACY_COLLECT_THREAD_NAMES
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
|
||||
#else
|
||||
# include <pthread.h>
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <thread>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
static inline uint64_t GetThreadHandle()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( GetCurrentThreadId() );
|
||||
#elif defined __APPLE__
|
||||
uint64_t id;
|
||||
pthread_threadid_np( pthread_self(), &id );
|
||||
return id;
|
||||
#else
|
||||
static_assert( sizeof( decltype( pthread_self() ) ) <= sizeof( uint64_t ), "Thread handle too big to fit in protocol" );
|
||||
return uint64_t( pthread_self() );
|
||||
#endif
|
||||
}
|
||||
|
||||
void SetThreadName( std::thread& thread, const char* name );
|
||||
void SetThreadName( std::thread::native_handle_type handle, const char* name );
|
||||
const char* GetThreadName( uint64_t id );
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
1567
common/tracy_lz4.cpp
Normal file
476
common/tracy_lz4.hpp
Normal file
@@ -0,0 +1,476 @@
|
||||
/*
|
||||
* LZ4 - Fast LZ compression algorithm
|
||||
* Header File
|
||||
* Copyright (C) 2011-2017, Yann Collet.
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- LZ4 homepage : http://www.lz4.org
|
||||
- LZ4 source repository : https://github.com/lz4/lz4
|
||||
*/
|
||||
|
||||
|
||||
#ifndef TRACY_LZ4_H_2983827168210
|
||||
#define TRACY_LZ4_H_2983827168210
|
||||
|
||||
/* --- Dependency --- */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include <stdint.h>
|
||||
|
||||
namespace tracy
|
||||
{
|
||||
|
||||
/**
|
||||
Introduction
|
||||
|
||||
LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s per core,
|
||||
scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
|
||||
multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
|
||||
|
||||
The LZ4 compression library provides in-memory compression and decompression functions.
|
||||
Compression can be done in:
|
||||
- a single step (described as Simple Functions)
|
||||
- a single step, reusing a context (described in Advanced Functions)
|
||||
- unbounded multiple steps (described as Streaming compression)
|
||||
|
||||
lz4.h provides block compression functions. It gives full buffer control to user.
|
||||
Decompressing an lz4-compressed block also requires metadata (such as compressed size).
|
||||
Each application is free to encode such metadata in whichever way it wants.
|
||||
|
||||
An additional format, called LZ4 frame specification (doc/lz4_Frame_format.md),
|
||||
take care of encoding standard metadata alongside LZ4-compressed blocks.
|
||||
If your application requires interoperability, it's recommended to use it.
|
||||
A library is provided to take care of it, see lz4frame.h.
|
||||
*/
|
||||
|
||||
/*^***************************************************************
|
||||
* Export parameters
|
||||
*****************************************************************/
|
||||
/*
|
||||
* LZ4_DLL_EXPORT :
|
||||
* Enable exporting of functions when building a Windows DLL
|
||||
* LZ4LIB_VISIBILITY :
|
||||
* Control library symbols visibility.
|
||||
*/
|
||||
#ifndef LZ4LIB_VISIBILITY
|
||||
# if defined(__GNUC__) && (__GNUC__ >= 4)
|
||||
# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
|
||||
# else
|
||||
# define LZ4LIB_VISIBILITY
|
||||
# endif
|
||||
#endif
|
||||
#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
|
||||
# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
|
||||
#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
|
||||
# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
||||
#else
|
||||
# define LZ4LIB_API LZ4LIB_VISIBILITY
|
||||
#endif
|
||||
|
||||
/*------ Version ------*/
|
||||
#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
|
||||
#define LZ4_VERSION_MINOR 8 /* for new (non-breaking) interface capabilities */
|
||||
#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
|
||||
|
||||
#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
|
||||
|
||||
#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
|
||||
#define LZ4_QUOTE(str) #str
|
||||
#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
|
||||
#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
|
||||
|
||||
LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; to be used when checking dll version */
|
||||
LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; to be used when checking dll version */
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Tuning parameter
|
||||
**************************************/
|
||||
/*!
|
||||
* LZ4_MEMORY_USAGE :
|
||||
* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
|
||||
* Increasing memory usage improves compression ratio
|
||||
* Reduced memory usage can improve speed, due to cache effect
|
||||
* Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
|
||||
*/
|
||||
#ifndef LZ4_MEMORY_USAGE
|
||||
# define LZ4_MEMORY_USAGE 12
|
||||
#endif
|
||||
|
||||
/*-************************************
|
||||
* Simple Functions
|
||||
**************************************/
|
||||
/*! LZ4_compress_default() :
|
||||
Compresses 'srcSize' bytes from buffer 'src'
|
||||
into already allocated 'dst' buffer of size 'dstCapacity'.
|
||||
Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
|
||||
It also runs faster, so it's a recommended setting.
|
||||
If the function cannot compress 'src' into a limited 'dst' budget,
|
||||
compression stops *immediately*, and the function result is zero.
|
||||
As a consequence, 'dst' content is not valid.
|
||||
This function never writes outside 'dst' buffer, nor read outside 'source' buffer.
|
||||
srcSize : supported max value is LZ4_MAX_INPUT_VALUE
|
||||
dstCapacity : full or partial size of buffer 'dst' (which must be already allocated)
|
||||
return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
|
||||
or 0 if compression fails */
|
||||
LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
|
||||
|
||||
/*! LZ4_decompress_safe() :
|
||||
compressedSize : is the exact complete size of the compressed block.
|
||||
dstCapacity : is the size of destination buffer, which must be already allocated.
|
||||
return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
|
||||
If destination buffer is not large enough, decoding will stop and output an error code (negative value).
|
||||
If the source stream is detected malformed, the function will stop decoding and return a negative result.
|
||||
This function is protected against buffer overflow exploits, including malicious data packets.
|
||||
It never writes outside output buffer, nor reads outside input buffer.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Advanced Functions
|
||||
**************************************/
|
||||
#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
|
||||
#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
|
||||
|
||||
/*!
|
||||
LZ4_compressBound() :
|
||||
Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
|
||||
This function is primarily useful for memory allocation purposes (destination buffer size).
|
||||
Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
|
||||
Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
|
||||
inputSize : max supported value is LZ4_MAX_INPUT_SIZE
|
||||
return : maximum output size in a "worst case" scenario
|
||||
or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compressBound(int inputSize);
|
||||
|
||||
/*!
|
||||
LZ4_compress_fast() :
|
||||
Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
|
||||
The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
|
||||
It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
|
||||
An acceleration value of "1" is the same as regular LZ4_compress_default()
|
||||
Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
|
||||
|
||||
|
||||
/*!
|
||||
LZ4_compress_fast_extState() :
|
||||
Same compression function, just using an externally allocated memory space to store compression state.
|
||||
Use LZ4_sizeofState() to know how much memory must be allocated,
|
||||
and allocate it on 8-bytes boundaries (using malloc() typically).
|
||||
Then, provide it as 'void* state' to compression function.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_sizeofState(void);
|
||||
LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
|
||||
|
||||
|
||||
/*!
|
||||
LZ4_compress_destSize() :
|
||||
Reverse the logic : compresses as much data as possible from 'src' buffer
|
||||
into already allocated buffer 'dst' of size 'targetDestSize'.
|
||||
This function either compresses the entire 'src' content into 'dst' if it's large enough,
|
||||
or fill 'dst' buffer completely with as much data as possible from 'src'.
|
||||
*srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
|
||||
New value is necessarily <= old value.
|
||||
return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
|
||||
or 0 if compression fails
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
|
||||
|
||||
|
||||
/*!
|
||||
LZ4_decompress_fast() : (unsafe!!)
|
||||
originalSize : is the original uncompressed size
|
||||
return : the number of bytes read from the source buffer (in other words, the compressed size)
|
||||
If the source stream is detected malformed, the function will stop decoding and return a negative result.
|
||||
Destination buffer must be already allocated. Its size must be >= 'originalSize' bytes.
|
||||
note : This function respects memory boundaries for *properly formed* compressed data.
|
||||
It is a bit faster than LZ4_decompress_safe().
|
||||
However, it does not provide any protection against intentionally modified data stream (malicious input).
|
||||
Use this function in trusted environment only (data to decode comes from a trusted source).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
|
||||
|
||||
/*!
|
||||
LZ4_decompress_safe_partial() :
|
||||
This function decompress a compressed block of size 'srcSize' at position 'src'
|
||||
into destination buffer 'dst' of size 'dstCapacity'.
|
||||
The function will decompress a minimum of 'targetOutputSize' bytes, and stop after that.
|
||||
However, it's not accurate, and may write more than 'targetOutputSize' (but <= dstCapacity).
|
||||
@return : the number of bytes decoded in the destination buffer (necessarily <= dstCapacity)
|
||||
Note : this number can be < 'targetOutputSize' should the compressed block contain less data.
|
||||
Always control how many bytes were decoded.
|
||||
If the source stream is detected malformed, the function will stop decoding and return a negative result.
|
||||
This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
|
||||
|
||||
|
||||
/*-*********************************************
|
||||
* Streaming Compression Functions
|
||||
***********************************************/
|
||||
typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */
|
||||
|
||||
/*! LZ4_createStream() and LZ4_freeStream() :
|
||||
* LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
|
||||
* LZ4_freeStream() releases its memory.
|
||||
*/
|
||||
LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
|
||||
LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr);
|
||||
|
||||
/*! LZ4_resetStream() :
|
||||
* An LZ4_stream_t structure can be allocated once and re-used multiple times.
|
||||
* Use this function to start compressing a new stream.
|
||||
*/
|
||||
LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
|
||||
|
||||
/*! LZ4_loadDict() :
|
||||
* Use this function to load a static dictionary into LZ4_stream_t.
|
||||
* Any previous data will be forgotten, only 'dictionary' will remain in memory.
|
||||
* Loading a size of 0 is allowed, and is the same as reset.
|
||||
* @return : dictionary size, in bytes (necessarily <= 64 KB)
|
||||
*/
|
||||
LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
|
||||
|
||||
/*! LZ4_compress_fast_continue() :
|
||||
* Compress content into 'src' using data from previously compressed blocks, improving compression ratio.
|
||||
* 'dst' buffer must be already allocated.
|
||||
* If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
|
||||
*
|
||||
* Important : Up to 64KB of previously compressed data is assumed to remain present and unmodified in memory !
|
||||
* Special 1 : If input buffer is a double-buffer, it can have any size, including < 64 KB.
|
||||
* Special 2 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
|
||||
*
|
||||
* @return : size of compressed block
|
||||
* or 0 if there is an error (typically, compressed data cannot fit into 'dst')
|
||||
* After an error, the stream status is invalid, it can only be reset or freed.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
|
||||
|
||||
/*! LZ4_saveDict() :
|
||||
* If previously compressed data block is not guaranteed to remain available at its current memory location,
|
||||
* save it into a safer place (char* safeBuffer).
|
||||
* Note : it's not necessary to call LZ4_loadDict() after LZ4_saveDict(), dictionary is immediately usable.
|
||||
* @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
|
||||
|
||||
|
||||
/*-**********************************************
|
||||
* Streaming Decompression Functions
|
||||
* Bufferless synchronous API
|
||||
************************************************/
|
||||
typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* incomplete type (defined later) */
|
||||
|
||||
/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
|
||||
* creation / destruction of streaming decompression tracking structure.
|
||||
* A tracking structure can be re-used multiple times sequentially. */
|
||||
LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
|
||||
LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
|
||||
|
||||
/*! LZ4_setStreamDecode() :
|
||||
* An LZ4_streamDecode_t structure can be allocated once and re-used multiple times.
|
||||
* Use this function to start decompression of a new stream of blocks.
|
||||
* A dictionary can optionnally be set. Use NULL or size 0 for a simple reset order.
|
||||
* @return : 1 if OK, 0 if error
|
||||
*/
|
||||
LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
|
||||
|
||||
/*! LZ4_decompress_*_continue() :
|
||||
* These decoding functions allow decompression of consecutive blocks in "streaming" mode.
|
||||
* A block is an unsplittable entity, it must be presented entirely to a decompression function.
|
||||
* Decompression functions only accept one block at a time.
|
||||
* Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB).
|
||||
*
|
||||
* Special : if application sets a ring buffer for decompression, it must respect one of the following conditions :
|
||||
* - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
|
||||
* In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
|
||||
* - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
|
||||
* maxBlockSize is implementation dependent. It's the maximum size of any single block.
|
||||
* In which case, encoding and decoding buffers do not need to be synchronized,
|
||||
* and encoding ring buffer can have any size, including small ones ( < 64 KB).
|
||||
* - _At least_ 64 KB + 8 bytes + maxBlockSize.
|
||||
* In which case, encoding and decoding buffers do not need to be synchronized,
|
||||
* and encoding ring buffer can have any size, including larger than decoding buffer.
|
||||
* Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
|
||||
* and indicate where it is saved using LZ4_setStreamDecode() before decompressing next block.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
|
||||
LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
|
||||
|
||||
|
||||
/*! LZ4_decompress_*_usingDict() :
|
||||
* These decoding functions work the same as
|
||||
* a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
|
||||
* They are stand-alone, and don't need an LZ4_streamDecode_t structure.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
|
||||
LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
|
||||
|
||||
|
||||
/*^**********************************************
|
||||
* !!!!!! STATIC LINKING ONLY !!!!!!
|
||||
***********************************************/
|
||||
/*-************************************
|
||||
* Private definitions
|
||||
**************************************
|
||||
* Do not use these definitions.
|
||||
* They are exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
|
||||
* Using these definitions will expose code to API and/or ABI break in future versions of the library.
|
||||
**************************************/
|
||||
#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
|
||||
#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
|
||||
#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */
|
||||
|
||||
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
|
||||
typedef struct {
|
||||
uint32_t hashTable[LZ4_HASH_SIZE_U32];
|
||||
uint32_t currentOffset;
|
||||
uint32_t initCheck;
|
||||
const uint8_t* dictionary;
|
||||
uint8_t* bufferStart; /* obsolete, used for slideInputBuffer */
|
||||
uint32_t dictSize;
|
||||
} LZ4_stream_t_internal;
|
||||
|
||||
typedef struct {
|
||||
const uint8_t* externalDict;
|
||||
size_t extDictSize;
|
||||
const uint8_t* prefixEnd;
|
||||
size_t prefixSize;
|
||||
} LZ4_streamDecode_t_internal;
|
||||
|
||||
#else
|
||||
|
||||
typedef struct {
|
||||
unsigned int hashTable[LZ4_HASH_SIZE_U32];
|
||||
unsigned int currentOffset;
|
||||
unsigned int initCheck;
|
||||
const unsigned char* dictionary;
|
||||
unsigned char* bufferStart; /* obsolete, used for slideInputBuffer */
|
||||
unsigned int dictSize;
|
||||
} LZ4_stream_t_internal;
|
||||
|
||||
typedef struct {
|
||||
const unsigned char* externalDict;
|
||||
size_t extDictSize;
|
||||
const unsigned char* prefixEnd;
|
||||
size_t prefixSize;
|
||||
} LZ4_streamDecode_t_internal;
|
||||
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* LZ4_stream_t :
|
||||
* information structure to track an LZ4 stream.
|
||||
* init this structure before first use.
|
||||
* note : only use in association with static linking !
|
||||
* this definition is not API/ABI safe,
|
||||
* it may change in a future version !
|
||||
*/
|
||||
#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
|
||||
#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
|
||||
union LZ4_stream_u {
|
||||
unsigned long long table[LZ4_STREAMSIZE_U64];
|
||||
LZ4_stream_t_internal internal_donotuse;
|
||||
} ; /* previously typedef'd to LZ4_stream_t */
|
||||
|
||||
|
||||
/*!
|
||||
* LZ4_streamDecode_t :
|
||||
* information structure to track an LZ4 stream during decompression.
|
||||
* init this structure using LZ4_setStreamDecode (or memset()) before first use
|
||||
* note : only use in association with static linking !
|
||||
* this definition is not API/ABI safe,
|
||||
* and may change in a future version !
|
||||
*/
|
||||
#define LZ4_STREAMDECODESIZE_U64 4
|
||||
#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
|
||||
union LZ4_streamDecode_u {
|
||||
unsigned long long table[LZ4_STREAMDECODESIZE_U64];
|
||||
LZ4_streamDecode_t_internal internal_donotuse;
|
||||
} ; /* previously typedef'd to LZ4_streamDecode_t */
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Obsolete Functions
|
||||
**************************************/
|
||||
|
||||
/*! Deprecation warnings
|
||||
Should deprecation warnings be a problem,
|
||||
it is generally possible to disable them,
|
||||
typically with -Wno-deprecated-declarations for gcc
|
||||
or _CRT_SECURE_NO_WARNINGS in Visual.
|
||||
Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS */
|
||||
#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
|
||||
# define LZ4_DEPRECATED(message) /* disable deprecation warnings */
|
||||
#else
|
||||
# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
||||
# if defined(__clang__) /* clang doesn't handle mixed C++11 and CNU attributes */
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
|
||||
# elif defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
||||
# define LZ4_DEPRECATED(message) [[deprecated(message)]]
|
||||
# elif (LZ4_GCC_VERSION >= 405)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
|
||||
# elif (LZ4_GCC_VERSION >= 301)
|
||||
# define LZ4_DEPRECATED(message) __attribute__((deprecated))
|
||||
# elif defined(_MSC_VER)
|
||||
# define LZ4_DEPRECATED(message) __declspec(deprecated(message))
|
||||
# else
|
||||
# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
|
||||
# define LZ4_DEPRECATED(message)
|
||||
# endif
|
||||
#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
|
||||
|
||||
/* Obsolete compression functions */
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress (const char* source, char* dest, int sourceSize);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_default() instead") int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
|
||||
/* Obsolete decompression functions */
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast() instead") int LZ4_uncompress (const char* source, char* dest, int outputSize);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe() instead") int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
|
||||
|
||||
/* Obsolete streaming functions; use new streaming interface whenever possible */
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStream() instead") int LZ4_sizeofStreamState(void);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStream() instead") int LZ4_resetStreamState(void* state, char* inputBuffer);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDict() instead") char* LZ4_slideInputBuffer (void* state);
|
||||
|
||||
/* Obsolete streaming decoding functions */
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
|
||||
LZ4LIB_API LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
|
||||
|
||||
}
|
||||
|
||||
#endif /* LZ4_H_2983827168210 */
|
||||
@@ -1,29 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
option(NO_ISA_EXTENSIONS "Disable ISA extensions (don't pass -march=native or -mcpu=native to the compiler)" OFF)
|
||||
|
||||
set(NO_STATISTICS OFF)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/version.cmake)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
|
||||
project(
|
||||
tracy-csvexport
|
||||
LANGUAGES C CXX
|
||||
VERSION ${TRACY_VERSION_STRING}
|
||||
)
|
||||
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/config.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/vendor.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/server.cmake)
|
||||
|
||||
set(PROGRAM_FILES
|
||||
src/csvexport.cpp
|
||||
)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${PROGRAM_FILES} ${COMMON_FILES} ${SERVER_FILES})
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE TracyServer TracyGetOpt)
|
||||
set_property(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME})
|
||||
|
||||
install(TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
@@ -1,454 +0,0 @@
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../../server/TracyFileRead.hpp"
|
||||
#include "../../server/TracyWorker.hpp"
|
||||
#include "../../getopt/getopt.h"
|
||||
|
||||
void print_usage_exit(int e)
|
||||
{
|
||||
fprintf(stderr, "Extract statistics from a trace to a CSV format\n");
|
||||
fprintf(stderr, "Usage:\n");
|
||||
fprintf(stderr, " extract [OPTION...] <trace file>\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " -h, --help Print usage\n");
|
||||
fprintf(stderr, " -f, --filter arg Filter zone names (default: "")\n");
|
||||
fprintf(stderr, " -s, --sep arg CSV separator (default: ,)\n");
|
||||
fprintf(stderr, " -c, --case Case sensitive filtering\n");
|
||||
fprintf(stderr, " -e, --self Get self times\n");
|
||||
fprintf(stderr, " -u, --unwrap Report each cpu zone event\n");
|
||||
fprintf(stderr, " -g, --gpu Report each gpu zone event\n" );
|
||||
fprintf(stderr, " -m, --messages Report only messages\n");
|
||||
fprintf(stderr, " -p, --plot Report plot data (only with -u)\n");
|
||||
|
||||
exit(e);
|
||||
}
|
||||
|
||||
struct Args {
|
||||
const char* filter;
|
||||
const char* separator;
|
||||
const char* trace_file;
|
||||
bool case_sensitive;
|
||||
bool self_time;
|
||||
bool unwrap;
|
||||
bool show_gpu;
|
||||
bool unwrapMessages;
|
||||
bool plot;
|
||||
};
|
||||
|
||||
Args parse_args(int argc, char** argv)
|
||||
{
|
||||
if (argc == 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
Args args = { "", ",", "", false, false, false, false, false, false };
|
||||
|
||||
struct option long_opts[] = {
|
||||
{ "help", no_argument, NULL, 'h' },
|
||||
{ "filter", optional_argument, NULL, 'f' },
|
||||
{ "sep", optional_argument, NULL, 's' },
|
||||
{ "case", no_argument, NULL, 'c' },
|
||||
{ "self", no_argument, NULL, 'e' },
|
||||
{ "unwrap", no_argument, NULL, 'u' },
|
||||
{ "gpu", no_argument, NULL, 'g' },
|
||||
{ "messages", no_argument, NULL, 'm' },
|
||||
{ "plot", no_argument, NULL, 'p' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
int c;
|
||||
while ((c = getopt_long(argc, argv, "hf:s:ceugmp", long_opts, NULL)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case 'h':
|
||||
print_usage_exit(0);
|
||||
break;
|
||||
case 'f':
|
||||
args.filter = optarg;
|
||||
break;
|
||||
case 's':
|
||||
args.separator = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
args.case_sensitive = true;
|
||||
break;
|
||||
case 'e':
|
||||
args.self_time = true;
|
||||
break;
|
||||
case 'u':
|
||||
args.unwrap = true;
|
||||
break;
|
||||
case 'g':
|
||||
args.show_gpu = true;
|
||||
break;
|
||||
case 'm':
|
||||
args.unwrapMessages = true;
|
||||
break;
|
||||
case 'p':
|
||||
args.plot = true;
|
||||
break;
|
||||
default:
|
||||
print_usage_exit(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc != optind + 1)
|
||||
{
|
||||
print_usage_exit(1);
|
||||
}
|
||||
|
||||
args.trace_file = argv[optind];
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
bool is_substring(
|
||||
const char* term,
|
||||
const char* s,
|
||||
bool case_sensitive = false
|
||||
){
|
||||
auto new_term = std::string(term);
|
||||
auto new_s = std::string(s);
|
||||
|
||||
if (!case_sensitive) {
|
||||
std::transform(
|
||||
new_term.begin(),
|
||||
new_term.end(),
|
||||
new_term.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
|
||||
std::transform(
|
||||
new_s.begin(),
|
||||
new_s.end(),
|
||||
new_s.begin(),
|
||||
[](unsigned char c){ return std::tolower(c); }
|
||||
);
|
||||
}
|
||||
|
||||
return new_s.find(new_term) != std::string::npos;
|
||||
}
|
||||
|
||||
const char* get_name(int32_t id, const tracy::Worker& worker)
|
||||
{
|
||||
auto& srcloc = worker.GetSourceLocation(id);
|
||||
return worker.GetString(srcloc.name.active ? srcloc.name : srcloc.function);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::string join(const T& v, const char* sep) {
|
||||
std::ostringstream s;
|
||||
for (const auto& i : v) {
|
||||
if (&i != &v[0]) {
|
||||
s << sep;
|
||||
}
|
||||
s << i;
|
||||
}
|
||||
return s.str();
|
||||
}
|
||||
|
||||
// From TracyView.cpp
|
||||
int64_t GetZoneChildTimeFast(
|
||||
const tracy::Worker& worker,
|
||||
const tracy::ZoneEvent& zone
|
||||
){
|
||||
int64_t time = 0;
|
||||
if( zone.HasChildren() )
|
||||
{
|
||||
auto& children = worker.GetZoneChildren( zone.Child() );
|
||||
if( children.is_magic() )
|
||||
{
|
||||
auto& vec = *(tracy::Vector<tracy::ZoneEvent>*)&children;
|
||||
for( auto& v : vec )
|
||||
{
|
||||
assert( v.IsEndValid() );
|
||||
time += v.End() - v.Start();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( auto& v : children )
|
||||
{
|
||||
assert( v->IsEndValid() );
|
||||
time += v->End() - v->Start();
|
||||
}
|
||||
}
|
||||
}
|
||||
return time;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (!AttachConsole(ATTACH_PARENT_PROCESS))
|
||||
{
|
||||
AllocConsole();
|
||||
SetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), 0x07);
|
||||
}
|
||||
#endif
|
||||
|
||||
Args args = parse_args(argc, argv);
|
||||
|
||||
auto f = std::unique_ptr<tracy::FileRead>(
|
||||
tracy::FileRead::Open(args.trace_file)
|
||||
);
|
||||
if (!f)
|
||||
{
|
||||
fprintf(stderr, "Could not open file %s\n", args.trace_file);
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto worker = tracy::Worker(*f);
|
||||
|
||||
if (args.unwrapMessages)
|
||||
{
|
||||
const auto& msgs = worker.GetMessages();
|
||||
|
||||
if (msgs.size() > 0)
|
||||
{
|
||||
std::vector<const char*> columnsForMessages;
|
||||
columnsForMessages = {
|
||||
"MessageName", "total_ns"
|
||||
};
|
||||
std::string headerForMessages = join(columnsForMessages, args.separator);
|
||||
printf("%s\n", headerForMessages.data());
|
||||
|
||||
for(auto& it : msgs)
|
||||
{
|
||||
std::vector<std::string> values(columnsForMessages.size());
|
||||
|
||||
values[0] = worker.GetString(it->ref);
|
||||
values[1] = std::to_string(it->time);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("There are currently no messages!\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (!worker.AreSourceLocationZonesReady())
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
|
||||
if (args.show_gpu)
|
||||
{
|
||||
auto& gpu_slz = worker.GetGpuSourceLocationZones();
|
||||
tracy::Vector<decltype( gpu_slz.begin() )> gpu_slz_selected;
|
||||
gpu_slz_selected.reserve( gpu_slz.size() );
|
||||
|
||||
uint32_t total_cnt = 0;
|
||||
for (auto it = gpu_slz.begin(); it != gpu_slz.end(); ++it)
|
||||
{
|
||||
if (it->second.total != 0)
|
||||
{
|
||||
++total_cnt;
|
||||
if (args.filter[0] == '\0')
|
||||
{
|
||||
gpu_slz_selected.push_back_no_space_check( it );
|
||||
}
|
||||
else
|
||||
{
|
||||
auto name = get_name( it->first, worker );
|
||||
if (is_substring( args.filter, name, args.case_sensitive))
|
||||
{
|
||||
gpu_slz_selected.push_back_no_space_check( it );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char*> columns;
|
||||
columns = {"name", "src_file", "Time from start of program", "GPU execution time"};
|
||||
|
||||
std::string header = join(columns, args.separator);
|
||||
printf("%s\n", header.data());
|
||||
|
||||
const auto last_time = worker.GetLastTime();
|
||||
for (auto& it : gpu_slz_selected)
|
||||
{
|
||||
std::vector<std::string> values( columns.size() );
|
||||
|
||||
values[0] = get_name( it->first, worker );
|
||||
|
||||
const auto& srcloc = worker.GetSourceLocation( it->first );
|
||||
values[1] = worker.GetString( srcloc.file );
|
||||
|
||||
const auto& zone_data = it->second;
|
||||
for (const auto& zone_thread_data : zone_data.zones)
|
||||
{
|
||||
tracy::GpuEvent* gpu_event = zone_thread_data.Zone();
|
||||
const auto start = gpu_event->GpuStart();
|
||||
const auto end = gpu_event->GpuEnd();
|
||||
|
||||
values[2] = std::to_string( start );
|
||||
|
||||
auto timespan = end - start;
|
||||
values[3] = std::to_string( timespan );
|
||||
|
||||
std::string row = join( values, args.separator );
|
||||
printf( "%s\n", row.data() );
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto& slz = worker.GetSourceLocationZones();
|
||||
tracy::Vector<decltype(slz.begin())> slz_selected;
|
||||
slz_selected.reserve(slz.size());
|
||||
|
||||
uint32_t total_cnt = 0;
|
||||
for(auto it = slz.begin(); it != slz.end(); ++it)
|
||||
{
|
||||
if(it->second.total != 0)
|
||||
{
|
||||
++total_cnt;
|
||||
if(args.filter[0] == '\0')
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto name = get_name(it->first, worker);
|
||||
if(is_substring(args.filter, name, args.case_sensitive))
|
||||
{
|
||||
slz_selected.push_back_no_space_check(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char*> columns;
|
||||
if (args.unwrap)
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "ns_since_start", "exec_time_ns", "thread", "value"
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
columns = {
|
||||
"name", "src_file", "src_line", "total_ns", "total_perc",
|
||||
"counts", "mean_ns", "min_ns", "max_ns", "std_ns"
|
||||
};
|
||||
}
|
||||
std::string header = join(columns, args.separator);
|
||||
printf("%s\n", header.data());
|
||||
|
||||
const auto last_time = worker.GetLastTime();
|
||||
for(auto& it : slz_selected)
|
||||
{
|
||||
std::vector<std::string> values(columns.size());
|
||||
|
||||
values[0] = get_name(it->first, worker);
|
||||
|
||||
const auto& srcloc = worker.GetSourceLocation(it->first);
|
||||
values[1] = worker.GetString(srcloc.file);
|
||||
values[2] = std::to_string(srcloc.line);
|
||||
|
||||
const auto& zone_data = it->second;
|
||||
|
||||
if (args.unwrap)
|
||||
{
|
||||
int i = 0;
|
||||
for (const auto& zone_thread_data : zone_data.zones) {
|
||||
const auto zone_event = zone_thread_data.Zone();
|
||||
const auto tId = zone_thread_data.Thread();
|
||||
const auto start = zone_event->Start();
|
||||
const auto end = zone_event->End();
|
||||
|
||||
values[3] = std::to_string(start);
|
||||
|
||||
auto timespan = end - start;
|
||||
if (args.self_time) {
|
||||
timespan -= GetZoneChildTimeFast(worker, *zone_event);
|
||||
}
|
||||
values[4] = std::to_string(timespan);
|
||||
values[5] = std::to_string(tId);
|
||||
if (worker.HasZoneExtra(*zone_event)) {
|
||||
const auto& text = worker.GetZoneExtra(*zone_event).text;
|
||||
if (text.Active()) {
|
||||
values[6] = worker.GetString(text);
|
||||
}
|
||||
}
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto time = args.self_time ? zone_data.selfTotal : zone_data.total;
|
||||
values[3] = std::to_string(time);
|
||||
values[4] = std::to_string(100. * time / last_time);
|
||||
|
||||
values[5] = std::to_string(zone_data.zones.size());
|
||||
|
||||
const auto avg = (args.self_time ? zone_data.selfTotal : zone_data.total)
|
||||
/ zone_data.zones.size();
|
||||
values[6] = std::to_string(avg);
|
||||
|
||||
const auto tmin = args.self_time ? zone_data.selfMin : zone_data.min;
|
||||
const auto tmax = args.self_time ? zone_data.selfMax : zone_data.max;
|
||||
values[7] = std::to_string(tmin);
|
||||
values[8] = std::to_string(tmax);
|
||||
|
||||
const auto sz = zone_data.zones.size();
|
||||
const auto ss = zone_data.sumSq
|
||||
- 2. * zone_data.total * avg
|
||||
+ avg * avg * sz;
|
||||
double std = 0;
|
||||
if( sz > 1 )
|
||||
std = sqrt(ss / (sz - 1));
|
||||
values[9] = std::to_string(std);
|
||||
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
|
||||
if(args.plot && args.unwrap)
|
||||
{
|
||||
auto& plots = worker.GetPlots();
|
||||
for(const auto& plot : plots)
|
||||
{
|
||||
std::vector<std::string> values(columns.size());
|
||||
values[0] = worker.GetString(plot->name);
|
||||
|
||||
for(const auto& val : plot->data)
|
||||
{
|
||||
if (args.unwrap)
|
||||
{
|
||||
values[3] = std::to_string(val.time.Val());
|
||||
values[6] = std::to_string(val.val);
|
||||
}
|
||||
std::string row = join(values, args.separator);
|
||||
printf("%s\n", row.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
BIN
doc/cost.png
Normal file
|
After Width: | Height: | Size: 2.4 KiB |
370
doc/design.svg
Normal file
@@ -0,0 +1,370 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
id="svg8"
|
||||
version="1.1"
|
||||
viewBox="0 0 139.17125 37.041668"
|
||||
height="140"
|
||||
width="526.00159">
|
||||
<defs
|
||||
id="defs2">
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker6660"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path6658" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker6158"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path6156" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow1Send"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.2,0,0,-0.2,-1.2,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path4694" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker5984"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path5982" />
|
||||
</marker>
|
||||
<marker
|
||||
orient="auto"
|
||||
refY="0"
|
||||
refX="0"
|
||||
id="marker5482"
|
||||
style="overflow:visible">
|
||||
<path
|
||||
id="path5480"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)" />
|
||||
</marker>
|
||||
<marker
|
||||
orient="auto"
|
||||
refY="0"
|
||||
refX="0"
|
||||
id="marker5472"
|
||||
style="overflow:visible">
|
||||
<path
|
||||
id="path5470"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker5378"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path5376" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker5308"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path5306" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="Arrow1Mend"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path4688" />
|
||||
</marker>
|
||||
<marker
|
||||
orient="auto"
|
||||
refY="0"
|
||||
refX="0"
|
||||
id="marker5170"
|
||||
style="overflow:visible">
|
||||
<path
|
||||
id="path5168"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
transform="matrix(-0.8,0,0,-0.8,-10,0)" />
|
||||
</marker>
|
||||
<marker
|
||||
orient="auto"
|
||||
refY="0"
|
||||
refX="0"
|
||||
id="marker4963"
|
||||
style="overflow:visible">
|
||||
<path
|
||||
id="path4961"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
transform="matrix(-0.8,0,0,-0.8,-10,0)" />
|
||||
</marker>
|
||||
<marker
|
||||
style="overflow:visible"
|
||||
id="marker6158-2"
|
||||
refX="0"
|
||||
refY="0"
|
||||
orient="auto">
|
||||
<path
|
||||
transform="matrix(-0.4,0,0,-0.4,-4,0)"
|
||||
style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
|
||||
d="M 0,0 5,-5 -12.5,0 5,5 Z"
|
||||
id="path6156-2" />
|
||||
</marker>
|
||||
</defs>
|
||||
<metadata
|
||||
id="metadata5">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
transform="translate(-18.388332,-17.864582)"
|
||||
id="layer1">
|
||||
<g
|
||||
id="g4666">
|
||||
<rect
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect4607"
|
||||
width="17.197916"
|
||||
height="6.614583"
|
||||
x="18.520834"
|
||||
y="20.510416" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458302px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="20.968229"
|
||||
y="24.869841"
|
||||
id="text4611"><tspan
|
||||
id="tspan4609"
|
||||
x="20.968229"
|
||||
y="24.869841"
|
||||
style="stroke-width:0.26458332px">Thread 1</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g4661">
|
||||
<rect
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect4607-4"
|
||||
width="17.197916"
|
||||
height="6.6145835"
|
||||
x="18.520834"
|
||||
y="32.416668" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458349px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="20.979254"
|
||||
y="36.776093"
|
||||
id="text4611-8"><tspan
|
||||
id="tspan4609-9"
|
||||
x="20.979254"
|
||||
y="36.776093"
|
||||
style="stroke-width:0.26458332px">Thread 2</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g4671">
|
||||
<rect
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="rect4607-8"
|
||||
width="17.197916"
|
||||
height="6.6145835"
|
||||
x="18.520832"
|
||||
y="44.322918" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458349px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="20.951002"
|
||||
y="48.682343"
|
||||
id="text4611-89"><tspan
|
||||
id="tspan4609-6"
|
||||
x="20.951002"
|
||||
y="48.682343"
|
||||
style="stroke-width:0.26458332px">Thread 3</tspan></text>
|
||||
</g>
|
||||
<g
|
||||
id="g5096">
|
||||
<ellipse
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="path4644"
|
||||
cx="67.775978"
|
||||
cy="36.3787"
|
||||
rx="10.583333"
|
||||
ry="4.6302085" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="67.733261"
|
||||
y="35.623535"
|
||||
id="text4648"><tspan
|
||||
id="tspan4646"
|
||||
x="67.733261"
|
||||
y="35.623535"
|
||||
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px">Tracy</tspan><tspan
|
||||
x="67.733261"
|
||||
y="39.151314"
|
||||
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px"
|
||||
id="tspan4650">client</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path4673"
|
||||
d="m 37.041666,24.479166 19.84375,7.937502"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker5472)" />
|
||||
<path
|
||||
id="path4675"
|
||||
d="m 37.041666,46.968751 19.84375,-6.614584"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker5482)" />
|
||||
<path
|
||||
id="path4677"
|
||||
d="M 37.041667,36.385417 H 55.5625"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker5378)" />
|
||||
<path
|
||||
id="path5059"
|
||||
d="M 84.666667,17.864582 V 54.90625"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:1.05833327, 2.11666654;stroke-dashoffset:0;stroke-opacity:1" />
|
||||
<g
|
||||
id="g5106">
|
||||
<ellipse
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
id="path4644-1"
|
||||
cx="101.98283"
|
||||
cy="36.56768"
|
||||
rx="10.583333"
|
||||
ry="4.6302085" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="101.90772"
|
||||
y="35.812515"
|
||||
id="text4648-2"><tspan
|
||||
id="tspan4646-5"
|
||||
x="101.90772"
|
||||
y="35.812515"
|
||||
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px">Tracy</tspan><tspan
|
||||
x="101.90772"
|
||||
y="39.340294"
|
||||
style="text-align:center;text-anchor:middle;stroke-width:0.26458332px"
|
||||
id="tspan4650-1">server</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path5108"
|
||||
d="M 79.375,37.708333 H 89.958333"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker6660)" />
|
||||
<path
|
||||
id="path5110"
|
||||
d="M 89.958333,35.0625 H 79.375"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#marker5308)" />
|
||||
<g
|
||||
transform="translate(-2.64619,-1.3704153)"
|
||||
id="g6152">
|
||||
<ellipse
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
||||
id="path6114"
|
||||
cx="128.98439"
|
||||
cy="33.692333"
|
||||
rx="4.6302085"
|
||||
ry="1.2756696" />
|
||||
<path
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
||||
d="m 124.36251,41.677042 c -0.004,0.01582 -0.007,0.03168 -0.008,0.04754 5.3e-4,0.704384 2.07327,1.275328 4.62995,1.275373 2.55689,3.5e-5 4.62988,-0.570931 4.63048,-1.275373 -10e-4,-0.01585 -0.003,-0.03171 -0.006,-0.04754"
|
||||
id="path6114-1" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 124.35417,33.739583 v 8.021022"
|
||||
id="path6138" />
|
||||
<path
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 133.61458,33.739582 v 8.051744"
|
||||
id="path6140" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458302px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="126.89217"
|
||||
y="39.409225"
|
||||
id="text6144"><tspan
|
||||
id="tspan6142"
|
||||
x="126.89217"
|
||||
y="39.409225"
|
||||
style="stroke-width:0.26458332px">DB</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path6154"
|
||||
d="m 113.77082,36.385418 h 6.61459"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker6158)" />
|
||||
<g
|
||||
transform="translate(2.6458333,1.2715659e-6)"
|
||||
id="g6241">
|
||||
<rect
|
||||
style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.26499999;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
|
||||
id="rect6232"
|
||||
width="17.197916"
|
||||
height="5.291667"
|
||||
x="137.58333"
|
||||
y="33.739582" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-weight:normal;font-size:2.82222223px;line-height:6.61458302px;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26458332px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
x="140.8851"
|
||||
y="37.164005"
|
||||
id="text6236"><tspan
|
||||
id="tspan6234"
|
||||
x="140.8851"
|
||||
y="37.164005"
|
||||
style="stroke-width:0.26458332px">Display</tspan></text>
|
||||
</g>
|
||||
<path
|
||||
id="path6154-3"
|
||||
d="m 132.29166,36.385417 h 6.61459"
|
||||
style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.26499999;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker6158-2)" />
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 15 KiB |
BIN
doc/locks.png
Normal file
|
After Width: | Height: | Size: 8.3 KiB |
BIN
doc/messages.png
Normal file
|
After Width: | Height: | Size: 4.6 KiB |
BIN
doc/plot.png
Normal file
|
After Width: | Height: | Size: 9.3 KiB |
BIN
doc/profiler.png
|
Before Width: | Height: | Size: 213 KiB After Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 250 KiB |
|
Before Width: | Height: | Size: 234 KiB |
706
dtl/Diff.hpp
@@ -1,706 +0,0 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_DIFF_H
|
||||
#define DTL_DIFF_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* diff class template
|
||||
* sequence must support random_access_iterator.
|
||||
*/
|
||||
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
|
||||
class Diff
|
||||
{
|
||||
private :
|
||||
dtl_typedefs(elem, sequence)
|
||||
sequence A;
|
||||
sequence B;
|
||||
size_t M;
|
||||
size_t N;
|
||||
size_t delta;
|
||||
size_t offset;
|
||||
long long *fp;
|
||||
long long editDistance;
|
||||
Lcs< elem > lcs;
|
||||
Ses< elem > ses;
|
||||
editPath path;
|
||||
editPathCordinates pathCordinates;
|
||||
bool swapped;
|
||||
bool huge;
|
||||
bool trivial;
|
||||
bool editDistanceOnly;
|
||||
uniHunkVec uniHunks;
|
||||
comparator cmp;
|
||||
long long ox;
|
||||
long long oy;
|
||||
public :
|
||||
Diff () {}
|
||||
|
||||
Diff (const sequence& a,
|
||||
const sequence& b) : A(a), B(b), ses(false) {
|
||||
init();
|
||||
}
|
||||
|
||||
Diff (const sequence& a,
|
||||
const sequence& b,
|
||||
bool deletesFirst) : A(a), B(b), ses(deletesFirst) {
|
||||
init();
|
||||
}
|
||||
|
||||
Diff (const sequence& a,
|
||||
const sequence& b,
|
||||
const comparator& comp) : A(a), B(b), ses(false), cmp(comp) {
|
||||
init();
|
||||
}
|
||||
|
||||
Diff (const sequence& a,
|
||||
const sequence& b,
|
||||
bool deleteFirst,
|
||||
const comparator& comp) : A(a), B(b), ses(deleteFirst), cmp(comp) {
|
||||
init();
|
||||
}
|
||||
|
||||
~Diff() {}
|
||||
|
||||
long long getEditDistance () const {
|
||||
return editDistance;
|
||||
}
|
||||
|
||||
Lcs< elem > getLcs () const {
|
||||
return lcs;
|
||||
}
|
||||
|
||||
elemVec getLcsVec () const {
|
||||
return lcs.getSequence();
|
||||
}
|
||||
|
||||
Ses< elem > getSes () const {
|
||||
return ses;
|
||||
}
|
||||
|
||||
uniHunkVec getUniHunks () const {
|
||||
return uniHunks;
|
||||
}
|
||||
|
||||
/* These should be deprecated */
|
||||
bool isHuge () const {
|
||||
return huge;
|
||||
}
|
||||
|
||||
void onHuge () {
|
||||
this->huge = true;
|
||||
}
|
||||
|
||||
void offHuge () {
|
||||
this->huge = false;
|
||||
}
|
||||
|
||||
bool isUnserious () const {
|
||||
return trivial;
|
||||
}
|
||||
|
||||
void onUnserious () {
|
||||
this->trivial = true;
|
||||
}
|
||||
|
||||
void offUnserious () {
|
||||
this->trivial = false;
|
||||
}
|
||||
|
||||
void onOnlyEditDistance () {
|
||||
this->editDistanceOnly = true;
|
||||
}
|
||||
|
||||
/* These are the replacements for the above */
|
||||
bool hugeEnabled () const {
|
||||
return huge;
|
||||
}
|
||||
|
||||
void enableHuge () {
|
||||
this->huge = true;
|
||||
}
|
||||
|
||||
void disableHuge () {
|
||||
this->huge = false;
|
||||
}
|
||||
|
||||
bool trivialEnabled () const {
|
||||
return trivial;
|
||||
}
|
||||
|
||||
void enableTrivial () {
|
||||
this->trivial = true;
|
||||
}
|
||||
|
||||
void disableTrivial () {
|
||||
this->trivial = false;
|
||||
}
|
||||
|
||||
void editDistanceOnlyEnabled () {
|
||||
this->editDistanceOnly = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* patching with Unified Format Hunks
|
||||
*/
|
||||
sequence uniPatch (const sequence& seq) {
|
||||
elemList seqLst(seq.begin(), seq.end());
|
||||
sesElemVec shunk;
|
||||
sesElemVec_iter vsesIt;
|
||||
elemList_iter lstIt = seqLst.begin();
|
||||
long long inc_dec_total = 0;
|
||||
long long gap = 1;
|
||||
for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) {
|
||||
joinSesVec(shunk, it->common[0]);
|
||||
joinSesVec(shunk, it->change);
|
||||
joinSesVec(shunk, it->common[1]);
|
||||
it->a += inc_dec_total;
|
||||
inc_dec_total += it->inc_dec_count;
|
||||
for (long long i=0;i<it->a - gap;++i) {
|
||||
++lstIt;
|
||||
}
|
||||
gap = it->a + it->b + it->inc_dec_count;
|
||||
vsesIt = shunk.begin();
|
||||
while (vsesIt!=shunk.end()) {
|
||||
switch (vsesIt->second.type) {
|
||||
case SES_ADD :
|
||||
seqLst.insert(lstIt, vsesIt->first);
|
||||
break;
|
||||
case SES_DELETE :
|
||||
if (lstIt != seqLst.end()) {
|
||||
lstIt = seqLst.erase(lstIt);
|
||||
}
|
||||
break;
|
||||
case SES_COMMON :
|
||||
if (lstIt != seqLst.end()) {
|
||||
++lstIt;
|
||||
}
|
||||
break;
|
||||
default :
|
||||
// no fall-through
|
||||
break;
|
||||
}
|
||||
++vsesIt;
|
||||
}
|
||||
shunk.clear();
|
||||
}
|
||||
|
||||
sequence patchedSeq(seqLst.begin(), seqLst.end());
|
||||
return patchedSeq;
|
||||
}
|
||||
|
||||
/**
|
||||
* patching with Shortest Edit Script (SES)
|
||||
*/
|
||||
sequence patch (const sequence& seq) const {
|
||||
sesElemVec sesSeq = ses.getSequence();
|
||||
elemList seqLst(seq.begin(), seq.end());
|
||||
elemList_iter lstIt = seqLst.begin();
|
||||
for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) {
|
||||
switch (sesIt->second.type) {
|
||||
case SES_ADD :
|
||||
seqLst.insert(lstIt, sesIt->first);
|
||||
break;
|
||||
case SES_DELETE :
|
||||
lstIt = seqLst.erase(lstIt);
|
||||
break;
|
||||
case SES_COMMON :
|
||||
++lstIt;
|
||||
break;
|
||||
default :
|
||||
// no through
|
||||
break;
|
||||
}
|
||||
}
|
||||
sequence patchedSeq(seqLst.begin(), seqLst.end());
|
||||
return patchedSeq;
|
||||
}
|
||||
|
||||
/**
|
||||
* compose Longest Common Subsequence and Shortest Edit Script.
|
||||
* The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm"
|
||||
* described by Sun Wu, Udi Manber and Gene Myers
|
||||
*/
|
||||
void compose() {
|
||||
|
||||
if (isHuge()) {
|
||||
pathCordinates.reserve(MAX_CORDINATES_SIZE);
|
||||
}
|
||||
ox = 0;
|
||||
oy = 0;
|
||||
long long p = -1;
|
||||
fp = new long long[M + N + 3];
|
||||
fill(&fp[0], &fp[M + N + 3], -1);
|
||||
path = editPath(M + N + 3);
|
||||
fill(path.begin(), path.end(), -1);
|
||||
ONP:
|
||||
do {
|
||||
++p;
|
||||
for (long long k=-p;k<=static_cast<long long>(delta)-1;++k) {
|
||||
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
|
||||
}
|
||||
for (long long k=static_cast<long long>(delta)+p;k>=static_cast<long long>(delta)+1;--k) {
|
||||
fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
|
||||
}
|
||||
fp[delta+offset] = snake(static_cast<long long>(delta), fp[delta-1+offset]+1, fp[delta+1+offset]);
|
||||
} while (fp[delta+offset] != static_cast<long long>(N) && pathCordinates.size() < MAX_CORDINATES_SIZE);
|
||||
|
||||
editDistance += static_cast<long long>(delta) + 2 * p;
|
||||
long long r = path[delta+offset];
|
||||
P cordinate;
|
||||
editPathCordinates epc(0);
|
||||
|
||||
// recording edit distance only
|
||||
if (editDistanceOnly) {
|
||||
delete[] this->fp;
|
||||
return;
|
||||
}
|
||||
|
||||
while(r != -1) {
|
||||
cordinate.x = pathCordinates[(size_t)r].x;
|
||||
cordinate.y = pathCordinates[(size_t)r].y;
|
||||
epc.push_back(cordinate);
|
||||
r = pathCordinates[(size_t)r].k;
|
||||
}
|
||||
|
||||
// record Longest Common Subsequence & Shortest Edit Script
|
||||
if (!recordSequence(epc)) {
|
||||
pathCordinates.resize(0);
|
||||
epc.resize(0);
|
||||
p = -1;
|
||||
goto ONP;
|
||||
}
|
||||
delete[] this->fp;
|
||||
}
|
||||
|
||||
/**
|
||||
* print difference between A and B as an SES
|
||||
*/
|
||||
template < typename stream >
|
||||
void printSES (stream& out) const {
|
||||
sesElemVec ses_v = ses.getSequence();
|
||||
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
|
||||
}
|
||||
|
||||
void printSES (ostream& out = cout) const {
|
||||
printSES< ostream >(out);
|
||||
}
|
||||
|
||||
/**
|
||||
* print differences given an SES
|
||||
*/
|
||||
template < typename stream >
|
||||
static void printSES (const Ses< elem >& s, stream& out) {
|
||||
sesElemVec ses_v = s.getSequence();
|
||||
for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
|
||||
}
|
||||
|
||||
static void printSES (const Ses< elem >& s, ostream& out = cout) {
|
||||
printSES< ostream >(s, out);
|
||||
}
|
||||
|
||||
/**
|
||||
* print difference between A and B as an SES with custom printer
|
||||
*/
|
||||
template < typename stream, template < typename SEET, typename STRT > class PT >
|
||||
void printSES (stream& out) const {
|
||||
sesElemVec ses_v = ses.getSequence ();
|
||||
for_each (ses_v.begin (), ses_v.end(), PT < sesElem, stream > (out));
|
||||
}
|
||||
|
||||
/**
|
||||
* store difference between A and B as an SES with custom storage
|
||||
*/
|
||||
template < typename storedData, template < typename SEET, typename STRT > class ST >
|
||||
void storeSES(storedData& sd) const {
|
||||
sesElemVec ses_v = ses.getSequence();
|
||||
for_each(ses_v.begin(), ses_v.end(), ST < sesElem, storedData >(sd));
|
||||
}
|
||||
|
||||
/**
|
||||
* print difference between A and B in the Unified Format
|
||||
*/
|
||||
template < typename stream >
|
||||
void printUnifiedFormat (stream& out) const {
|
||||
for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out));
|
||||
}
|
||||
|
||||
void printUnifiedFormat (ostream& out = cout) const {
|
||||
printUnifiedFormat< ostream >(out);
|
||||
}
|
||||
|
||||
/**
|
||||
* print unified format difference with given unified format hunks
|
||||
*/
|
||||
template < typename stream >
|
||||
static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) {
|
||||
for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out));
|
||||
}
|
||||
|
||||
static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) {
|
||||
printUnifiedFormat< ostream >(hunks, out);
|
||||
}
|
||||
|
||||
/**
|
||||
* compose Unified Format Hunks from Shortest Edit Script
|
||||
*/
|
||||
void composeUnifiedHunks () {
|
||||
sesElemVec common[2];
|
||||
sesElemVec change;
|
||||
sesElemVec ses_v = ses.getSequence();
|
||||
long long l_cnt = 1;
|
||||
long long length = distance(ses_v.begin(), ses_v.end());
|
||||
long long middle = 0;
|
||||
bool isMiddle, isAfter;
|
||||
elemInfo einfo;
|
||||
long long a, b, c, d; // @@ -a,b +c,d @@
|
||||
long long inc_dec_count = 0;
|
||||
uniHunk< sesElem > hunk;
|
||||
sesElemVec adds;
|
||||
sesElemVec deletes;
|
||||
|
||||
isMiddle = isAfter = false;
|
||||
a = b = c = d = 0;
|
||||
|
||||
for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) {
|
||||
einfo = it->second;
|
||||
switch (einfo.type) {
|
||||
case SES_ADD :
|
||||
middle = 0;
|
||||
++inc_dec_count;
|
||||
adds.push_back(*it);
|
||||
if (!isMiddle) isMiddle = true;
|
||||
if (isMiddle) ++d;
|
||||
if (l_cnt >= length) {
|
||||
joinSesVec(change, deletes);
|
||||
joinSesVec(change, adds);
|
||||
isAfter = true;
|
||||
}
|
||||
break;
|
||||
case SES_DELETE :
|
||||
middle = 0;
|
||||
--inc_dec_count;
|
||||
deletes.push_back(*it);
|
||||
if (!isMiddle) isMiddle = true;
|
||||
if (isMiddle) ++b;
|
||||
if (l_cnt >= length) {
|
||||
joinSesVec(change, deletes);
|
||||
joinSesVec(change, adds);
|
||||
isAfter = true;
|
||||
}
|
||||
break;
|
||||
case SES_COMMON :
|
||||
++b;++d;
|
||||
if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) {
|
||||
if (static_cast<long long>(common[0].size()) < DTL_CONTEXT_SIZE) {
|
||||
if (a == 0 && c == 0) {
|
||||
if (!wasSwapped()) {
|
||||
a = einfo.beforeIdx;
|
||||
c = einfo.afterIdx;
|
||||
} else {
|
||||
a = einfo.afterIdx;
|
||||
c = einfo.beforeIdx;
|
||||
}
|
||||
}
|
||||
common[0].push_back(*it);
|
||||
} else {
|
||||
rotate(common[0].begin(), common[0].begin() + 1, common[0].end());
|
||||
common[0].pop_back();
|
||||
common[0].push_back(*it);
|
||||
++a;++c;
|
||||
--b;--d;
|
||||
}
|
||||
}
|
||||
if (isMiddle && !isAfter) {
|
||||
++middle;
|
||||
joinSesVec(change, deletes);
|
||||
joinSesVec(change, adds);
|
||||
change.push_back(*it);
|
||||
if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) {
|
||||
isAfter = true;
|
||||
}
|
||||
adds.clear();
|
||||
deletes.clear();
|
||||
}
|
||||
break;
|
||||
default :
|
||||
// no through
|
||||
break;
|
||||
}
|
||||
// compose unified format hunk
|
||||
if (isAfter && !change.empty()) {
|
||||
sesElemVec_iter cit = it;
|
||||
long long cnt = 0;
|
||||
for (long long i=0;i<DTL_SEPARATE_SIZE && (cit != ses_v.end());++i, ++cit) {
|
||||
if (cit->second.type == SES_COMMON) {
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) {
|
||||
middle = 0;
|
||||
isAfter = false;
|
||||
continue;
|
||||
}
|
||||
if (static_cast<long long>(common[0].size()) >= DTL_SEPARATE_SIZE) {
|
||||
long long c0size = static_cast<long long>(common[0].size());
|
||||
rotate(common[0].begin(),
|
||||
common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE,
|
||||
common[0].end());
|
||||
for (long long i=0;i<c0size - DTL_SEPARATE_SIZE;++i) {
|
||||
common[0].pop_back();
|
||||
}
|
||||
a += c0size - DTL_SEPARATE_SIZE;
|
||||
c += c0size - DTL_SEPARATE_SIZE;
|
||||
}
|
||||
if (a == 0) ++a;
|
||||
if (c == 0) ++c;
|
||||
if (wasSwapped()) swap(a, c);
|
||||
hunk.a = a;
|
||||
hunk.b = b;
|
||||
hunk.c = c;
|
||||
hunk.d = d;
|
||||
hunk.common[0] = common[0];
|
||||
hunk.change = change;
|
||||
hunk.common[1] = common[1];
|
||||
hunk.inc_dec_count = inc_dec_count;
|
||||
uniHunks.push_back(hunk);
|
||||
isMiddle = false;
|
||||
isAfter = false;
|
||||
common[0].clear();
|
||||
common[1].clear();
|
||||
adds.clear();
|
||||
deletes.clear();
|
||||
change.clear();
|
||||
a = b = c = d = middle = inc_dec_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* compose ses from stream
|
||||
*/
|
||||
template <typename stream>
|
||||
static Ses< elem > composeSesFromStream (stream& st)
|
||||
{
|
||||
elem line;
|
||||
Ses< elem > ret;
|
||||
long long x_idx, y_idx;
|
||||
x_idx = y_idx = 1;
|
||||
while (getline(st, line)) {
|
||||
elem mark(line.begin(), line.begin() + 1);
|
||||
elem e(line.begin() + 1, line.end());
|
||||
if (mark == SES_MARK_DELETE) {
|
||||
ret.addSequence(e, x_idx, 0, SES_DELETE);
|
||||
++x_idx;
|
||||
} else if (mark == SES_MARK_ADD) {
|
||||
ret.addSequence(e, y_idx, 0, SES_ADD);
|
||||
++y_idx;
|
||||
} else if (mark == SES_MARK_COMMON) {
|
||||
ret.addSequence(e, x_idx, y_idx, SES_COMMON);
|
||||
++x_idx;
|
||||
++y_idx;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private :
|
||||
/**
|
||||
* initialize
|
||||
*/
|
||||
void init () {
|
||||
M = distance(A.begin(), A.end());
|
||||
N = distance(B.begin(), B.end());
|
||||
if (M < N) {
|
||||
swapped = false;
|
||||
} else {
|
||||
swap(A, B);
|
||||
swap(M, N);
|
||||
swapped = true;
|
||||
}
|
||||
editDistance = 0;
|
||||
delta = N - M;
|
||||
offset = M + 1;
|
||||
huge = false;
|
||||
trivial = false;
|
||||
editDistanceOnly = false;
|
||||
fp = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* search shortest path and record the path
|
||||
*/
|
||||
long long snake(const long long& k, const long long& above, const long long& below) {
|
||||
long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset];
|
||||
long long y = max(above, below);
|
||||
long long x = y - k;
|
||||
while ((size_t)x < M && (size_t)y < N && (swapped ? cmp.impl(B[(size_t)y], A[(size_t)x]) : cmp.impl(A[(size_t)x], B[(size_t)y]))) {
|
||||
++x;++y;
|
||||
}
|
||||
|
||||
path[(size_t)k+offset] = static_cast<long long>(pathCordinates.size());
|
||||
if (!editDistanceOnly) {
|
||||
P p;
|
||||
p.x = x;p.y = y;p.k = r;
|
||||
pathCordinates.push_back(p);
|
||||
}
|
||||
return y;
|
||||
}
|
||||
|
||||
/**
|
||||
* record SES and LCS
|
||||
*/
|
||||
bool recordSequence (const editPathCordinates& v) {
|
||||
sequence_const_iter x(A.begin());
|
||||
sequence_const_iter y(B.begin());
|
||||
long long x_idx, y_idx; // line number for Unified Format
|
||||
long long px_idx, py_idx; // cordinates
|
||||
bool complete = false;
|
||||
x_idx = y_idx = 1;
|
||||
px_idx = py_idx = 0;
|
||||
for (size_t i=v.size()-1;!complete;--i) {
|
||||
while(px_idx < v[i].x || py_idx < v[i].y) {
|
||||
if (v[i].y - v[i].x > py_idx - px_idx) {
|
||||
if (!wasSwapped()) {
|
||||
ses.addSequence(*y, 0, y_idx + oy, SES_ADD);
|
||||
} else {
|
||||
ses.addSequence(*y, y_idx + oy, 0, SES_DELETE);
|
||||
}
|
||||
++y;
|
||||
++y_idx;
|
||||
++py_idx;
|
||||
} else if (v[i].y - v[i].x < py_idx - px_idx) {
|
||||
if (!wasSwapped()) {
|
||||
ses.addSequence(*x, x_idx + ox, 0, SES_DELETE);
|
||||
} else {
|
||||
ses.addSequence(*x, 0, x_idx + ox, SES_ADD);
|
||||
}
|
||||
++x;
|
||||
++x_idx;
|
||||
++px_idx;
|
||||
} else {
|
||||
if (!wasSwapped()) {
|
||||
lcs.addSequence(*x);
|
||||
ses.addSequence(*x, x_idx + ox, y_idx + oy, SES_COMMON);
|
||||
} else {
|
||||
lcs.addSequence(*y);
|
||||
ses.addSequence(*y, y_idx + oy, x_idx + ox, SES_COMMON);
|
||||
}
|
||||
++x;
|
||||
++y;
|
||||
++x_idx;
|
||||
++y_idx;
|
||||
++px_idx;
|
||||
++py_idx;
|
||||
}
|
||||
}
|
||||
if (i == 0) complete = true;
|
||||
}
|
||||
|
||||
if (x_idx > static_cast<long long>(M) && y_idx > static_cast<long long>(N)) {
|
||||
// all recording succeeded
|
||||
} else {
|
||||
// trivial difference
|
||||
if (trivialEnabled()) {
|
||||
if (!wasSwapped()) {
|
||||
recordOddSequence(x_idx, M, x, SES_DELETE);
|
||||
recordOddSequence(y_idx, N, y, SES_ADD);
|
||||
} else {
|
||||
recordOddSequence(x_idx, M, x, SES_ADD);
|
||||
recordOddSequence(y_idx, N, y, SES_DELETE);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// nontrivial difference
|
||||
sequence A_(A.begin() + (size_t)x_idx - 1, A.end());
|
||||
sequence B_(B.begin() + (size_t)y_idx - 1, B.end());
|
||||
A = A_;
|
||||
B = B_;
|
||||
M = distance(A.begin(), A.end());
|
||||
N = distance(B.begin(), B.end());
|
||||
delta = N - M;
|
||||
offset = M + 1;
|
||||
delete[] fp;
|
||||
fp = new long long[M + N + 3];
|
||||
fill(&fp[0], &fp[M + N + 3], -1);
|
||||
fill(path.begin(), path.end(), -1);
|
||||
ox = x_idx - 1;
|
||||
oy = y_idx - 1;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* record odd sequence in SES
|
||||
*/
|
||||
void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) {
|
||||
while(idx < length){
|
||||
ses.addSequence(*it, idx, 0, et);
|
||||
++it;
|
||||
++idx;
|
||||
++editDistance;
|
||||
}
|
||||
ses.addSequence(*it, idx, 0, et);
|
||||
++editDistance;
|
||||
}
|
||||
|
||||
/**
|
||||
* join SES vectors
|
||||
*/
|
||||
void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const {
|
||||
if (!s2.empty()) {
|
||||
for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
|
||||
s1.push_back(*vit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* check if the sequences have been swapped
|
||||
*/
|
||||
bool inline wasSwapped () const {
|
||||
return swapped;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_DIFF_H
|
||||
245
dtl/Diff3.hpp
@@ -1,245 +0,0 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_DIFF3_H
|
||||
#define DTL_DIFF3_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* diff3 class template
|
||||
* sequence must support random_access_iterator.
|
||||
*/
|
||||
template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
|
||||
class Diff3
|
||||
{
|
||||
private:
|
||||
dtl_typedefs(elem, sequence)
|
||||
sequence A;
|
||||
sequence B;
|
||||
sequence C;
|
||||
sequence S;
|
||||
Diff< elem, sequence, comparator > diff_ba;
|
||||
Diff< elem, sequence, comparator > diff_bc;
|
||||
bool conflict;
|
||||
elem csepabegin;
|
||||
elem csepa;
|
||||
elem csepaend;
|
||||
public :
|
||||
Diff3 () {}
|
||||
Diff3 (const sequence& a,
|
||||
const sequence& b,
|
||||
const sequence& c) : A(a), B(b), C(c),
|
||||
diff_ba(b, a), diff_bc(b, c),
|
||||
conflict(false) {}
|
||||
|
||||
~Diff3 () {}
|
||||
|
||||
bool isConflict () const {
|
||||
return conflict;
|
||||
}
|
||||
|
||||
sequence getMergedSequence () const {
|
||||
return S;
|
||||
}
|
||||
|
||||
/**
|
||||
* merge changes B and C into A
|
||||
*/
|
||||
bool merge () {
|
||||
if (diff_ba.getEditDistance() == 0) { // A == B
|
||||
if (diff_bc.getEditDistance() == 0) { // A == B == C
|
||||
S = B;
|
||||
return true;
|
||||
}
|
||||
S = C;
|
||||
return true;
|
||||
} else { // A != B
|
||||
if (diff_bc.getEditDistance() == 0) { // A != B == C
|
||||
S = A;
|
||||
return true;
|
||||
} else { // A != B != C
|
||||
S = merge_();
|
||||
if (isConflict()) { // conflict occured
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* compose differences
|
||||
*/
|
||||
void compose () {
|
||||
diff_ba.compose();
|
||||
diff_bc.compose();
|
||||
}
|
||||
|
||||
private :
|
||||
/**
|
||||
* merge implementation
|
||||
*/
|
||||
sequence merge_ () {
|
||||
elemVec seq;
|
||||
Ses< elem > ses_ba = diff_ba.getSes();
|
||||
Ses< elem > ses_bc = diff_bc.getSes();
|
||||
sesElemVec ses_ba_v = ses_ba.getSequence();
|
||||
sesElemVec ses_bc_v = ses_bc.getSequence();
|
||||
sesElemVec_iter ba_it = ses_ba_v.begin();
|
||||
sesElemVec_iter bc_it = ses_bc_v.begin();
|
||||
sesElemVec_iter ba_end = ses_ba_v.end();
|
||||
sesElemVec_iter bc_end = ses_bc_v.end();
|
||||
|
||||
while (!isEnd(ba_end, ba_it) || !isEnd(bc_end, bc_it)) {
|
||||
while (true) {
|
||||
if (!isEnd(ba_end, ba_it) &&
|
||||
!isEnd(bc_end, bc_it) &&
|
||||
ba_it->first == bc_it->first &&
|
||||
ba_it->second.type == SES_COMMON &&
|
||||
bc_it->second.type == SES_COMMON) {
|
||||
// do nothing
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
if (!isEnd(ba_end, ba_it)) seq.push_back(ba_it->first);
|
||||
else if (!isEnd(bc_end, bc_it)) seq.push_back(bc_it->first);
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
}
|
||||
if (isEnd(ba_end, ba_it) || isEnd(bc_end, bc_it)) break;
|
||||
if ( ba_it->second.type == SES_COMMON
|
||||
&& bc_it->second.type == SES_DELETE) {
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else if (ba_it->second.type == SES_COMMON &&
|
||||
bc_it->second.type == SES_ADD) {
|
||||
seq.push_back(bc_it->first);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else if (ba_it->second.type == SES_DELETE &&
|
||||
bc_it->second.type == SES_COMMON) {
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else if (ba_it->second.type == SES_DELETE &&
|
||||
bc_it->second.type == SES_DELETE) {
|
||||
if (ba_it->first == bc_it->first) {
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else {
|
||||
// conflict
|
||||
conflict = true;
|
||||
return B;
|
||||
}
|
||||
} else if (ba_it->second.type == SES_DELETE &&
|
||||
bc_it->second.type == SES_ADD) {
|
||||
// conflict
|
||||
conflict = true;
|
||||
return B;
|
||||
} else if (ba_it->second.type == SES_ADD &&
|
||||
bc_it->second.type == SES_COMMON) {
|
||||
seq.push_back(ba_it->first);
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
} else if (ba_it->second.type == SES_ADD &&
|
||||
bc_it->second.type == SES_DELETE) {
|
||||
// conflict
|
||||
conflict = true;
|
||||
return B;
|
||||
} else if (ba_it->second.type == SES_ADD &&
|
||||
bc_it->second.type == SES_ADD) {
|
||||
if (ba_it->first == bc_it->first) {
|
||||
seq.push_back(ba_it->first);
|
||||
forwardUntilEnd(ba_end, ba_it);
|
||||
forwardUntilEnd(bc_end, bc_it);
|
||||
} else {
|
||||
// conflict
|
||||
conflict = true;
|
||||
return B;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isEnd(ba_end, ba_it)) {
|
||||
addDecentSequence(bc_end, bc_it, seq);
|
||||
} else if (isEnd(bc_end, bc_it)) {
|
||||
addDecentSequence(ba_end, ba_it, seq);
|
||||
}
|
||||
|
||||
sequence mergedSeq(seq.begin(), seq.end());
|
||||
return mergedSeq;
|
||||
}
|
||||
|
||||
/**
|
||||
* join elem vectors
|
||||
*/
|
||||
void inline joinElemVec (elemVec& s1, elemVec& s2) const {
|
||||
if (!s2.empty()) {
|
||||
for (elemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
|
||||
s1.push_back(*vit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* check if sequence is at end
|
||||
*/
|
||||
template <typename T_iter>
|
||||
bool inline isEnd (const T_iter& end, const T_iter& it) const {
|
||||
return it == end ? true : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* increment iterator until iterator is at end
|
||||
*/
|
||||
template <typename T_iter>
|
||||
void inline forwardUntilEnd (const T_iter& end, T_iter& it) const {
|
||||
if (!isEnd(end, it)) ++it;
|
||||
}
|
||||
|
||||
/**
|
||||
* add elements whose SES's type is ADD
|
||||
*/
|
||||
void inline addDecentSequence (const sesElemVec_iter& end, sesElemVec_iter& it, elemVec& seq) const {
|
||||
while (!isEnd(end, it)) {
|
||||
if (it->second.type == SES_ADD) seq.push_back(it->first);
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_DIFF3_H
|
||||
55
dtl/Lcs.hpp
@@ -1,55 +0,0 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_LCS_H
|
||||
#define DTL_LCS_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* Longest Common Subsequence template class
|
||||
*/
|
||||
template <typename elem>
|
||||
class Lcs : public Sequence< elem >
|
||||
{
|
||||
public :
|
||||
Lcs () {}
|
||||
~Lcs () {}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_LCS_H
|
||||
@@ -1,65 +0,0 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_SEQUENCE_H
|
||||
#define DTL_SEQUENCE_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* sequence class template
|
||||
*/
|
||||
template <typename elem>
|
||||
class Sequence
|
||||
{
|
||||
public :
|
||||
typedef vector< elem > elemVec;
|
||||
Sequence () {}
|
||||
virtual ~Sequence () {}
|
||||
|
||||
elemVec getSequence () const {
|
||||
return sequence;
|
||||
}
|
||||
void addSequence (elem e) {
|
||||
sequence.push_back(e);
|
||||
}
|
||||
protected :
|
||||
elemVec sequence;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_SEQUENCE_H
|
||||
132
dtl/Ses.hpp
@@ -1,132 +0,0 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_SES_H
|
||||
#define DTL_SES_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* Shortest Edit Script template class
|
||||
*/
|
||||
template <typename elem>
|
||||
class Ses : public Sequence< elem >
|
||||
{
|
||||
private :
|
||||
typedef pair< elem, elemInfo > sesElem;
|
||||
typedef vector< sesElem > sesElemVec;
|
||||
public :
|
||||
|
||||
Ses () : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(false) {
|
||||
nextDeleteIdx = 0;
|
||||
}
|
||||
Ses (bool moveDel) : onlyAdd(true), onlyDelete(true), onlyCopy(true), deletesFirst(moveDel) {
|
||||
nextDeleteIdx = 0;
|
||||
}
|
||||
~Ses () {}
|
||||
|
||||
bool isOnlyAdd () const {
|
||||
return onlyAdd;
|
||||
}
|
||||
|
||||
bool isOnlyDelete () const {
|
||||
return onlyDelete;
|
||||
}
|
||||
|
||||
bool isOnlyCopy () const {
|
||||
return onlyCopy;
|
||||
}
|
||||
|
||||
bool isOnlyOneOperation () const {
|
||||
return isOnlyAdd() || isOnlyDelete() || isOnlyCopy();
|
||||
}
|
||||
|
||||
bool isChange () const {
|
||||
return !onlyCopy;
|
||||
}
|
||||
|
||||
using Sequence< elem >::addSequence;
|
||||
void addSequence (elem e, long long beforeIdx, long long afterIdx, const edit_t type) {
|
||||
elemInfo info;
|
||||
info.beforeIdx = beforeIdx;
|
||||
info.afterIdx = afterIdx;
|
||||
info.type = type;
|
||||
sesElem pe(e, info);
|
||||
if (!deletesFirst) {
|
||||
sequence.push_back(pe);
|
||||
}
|
||||
switch (type) {
|
||||
case SES_DELETE:
|
||||
onlyCopy = false;
|
||||
onlyAdd = false;
|
||||
if (deletesFirst) {
|
||||
sequence.insert(sequence.begin() + nextDeleteIdx, pe);
|
||||
nextDeleteIdx++;
|
||||
}
|
||||
break;
|
||||
case SES_COMMON:
|
||||
onlyAdd = false;
|
||||
onlyDelete = false;
|
||||
if (deletesFirst) {
|
||||
sequence.push_back(pe);
|
||||
nextDeleteIdx = sequence.size();
|
||||
}
|
||||
break;
|
||||
case SES_ADD:
|
||||
onlyDelete = false;
|
||||
onlyCopy = false;
|
||||
if (deletesFirst) {
|
||||
sequence.push_back(pe);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sesElemVec getSequence () const {
|
||||
return sequence;
|
||||
}
|
||||
private :
|
||||
sesElemVec sequence;
|
||||
bool onlyAdd;
|
||||
bool onlyDelete;
|
||||
bool onlyCopy;
|
||||
bool deletesFirst;
|
||||
size_t nextDeleteIdx;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_SES_H
|
||||
47
dtl/dtl.hpp
@@ -1,47 +0,0 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DTL_H
|
||||
#define DTL_H
|
||||
|
||||
#include "variables.hpp"
|
||||
#include "functors.hpp"
|
||||
#include "Sequence.hpp"
|
||||
#include "Lcs.hpp"
|
||||
#include "Ses.hpp"
|
||||
#include "Diff.hpp"
|
||||
#include "Diff3.hpp"
|
||||
|
||||
#endif // DTL_H
|
||||
151
dtl/functors.hpp
@@ -1,151 +0,0 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_FUNCTORS_H
|
||||
#define DTL_FUNCTORS_H
|
||||
|
||||
namespace dtl {
|
||||
|
||||
/**
|
||||
* printer class template
|
||||
*/
|
||||
template <typename sesElem, typename stream = ostream >
|
||||
class Printer
|
||||
{
|
||||
public :
|
||||
Printer () : out_(cout) {}
|
||||
Printer (stream& out) : out_(out) {}
|
||||
virtual ~Printer () {}
|
||||
virtual void operator() (const sesElem& se) const = 0;
|
||||
protected :
|
||||
stream& out_;
|
||||
};
|
||||
|
||||
/**
|
||||
* common element printer class template
|
||||
*/
|
||||
template <typename sesElem, typename stream = ostream >
|
||||
class CommonPrinter : public Printer < sesElem, stream >
|
||||
{
|
||||
public :
|
||||
CommonPrinter () : Printer < sesElem, stream > () {}
|
||||
CommonPrinter (stream& out) : Printer < sesElem, stream > (out) {}
|
||||
~CommonPrinter () {}
|
||||
void operator() (const sesElem& se) const {
|
||||
this->out_ << SES_MARK_COMMON << se.first << endl;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* ses element printer class template
|
||||
*/
|
||||
template <typename sesElem, typename stream = ostream >
|
||||
class ChangePrinter : public Printer < sesElem, stream >
|
||||
{
|
||||
public :
|
||||
ChangePrinter () : Printer < sesElem, stream > () {}
|
||||
ChangePrinter (stream& out) : Printer < sesElem, stream > (out) {}
|
||||
~ChangePrinter () {}
|
||||
void operator() (const sesElem& se) const {
|
||||
switch (se.second.type) {
|
||||
case SES_ADD:
|
||||
this->out_ << SES_MARK_ADD << se.first << endl;
|
||||
break;
|
||||
case SES_DELETE:
|
||||
this->out_ << SES_MARK_DELETE << se.first << endl;
|
||||
break;
|
||||
case SES_COMMON:
|
||||
this->out_ << SES_MARK_COMMON << se.first << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* unified format element printer class template
|
||||
*/
|
||||
template <typename sesElem, typename stream = ostream >
|
||||
class UniHunkPrinter
|
||||
{
|
||||
public :
|
||||
UniHunkPrinter () : out_(cout) {}
|
||||
UniHunkPrinter (stream& out) : out_(out) {}
|
||||
~UniHunkPrinter () {}
|
||||
void operator() (const uniHunk< sesElem >& hunk) const {
|
||||
out_ << "@@"
|
||||
<< " -" << hunk.a << "," << hunk.b
|
||||
<< " +" << hunk.c << "," << hunk.d
|
||||
<< " @@" << endl;
|
||||
|
||||
for_each(hunk.common[0].begin(), hunk.common[0].end(), CommonPrinter< sesElem, stream >(out_));
|
||||
for_each(hunk.change.begin(), hunk.change.end(), ChangePrinter< sesElem, stream >(out_));
|
||||
for_each(hunk.common[1].begin(), hunk.common[1].end(), CommonPrinter< sesElem, stream >(out_));
|
||||
}
|
||||
private :
|
||||
stream& out_;
|
||||
};
|
||||
|
||||
/**
|
||||
* storage class template
|
||||
*/
|
||||
template <typename sesElem, typename storedData >
|
||||
class Storage
|
||||
{
|
||||
public:
|
||||
Storage(storedData& sd) : storedData_(sd) {}
|
||||
virtual ~Storage() {}
|
||||
virtual void operator() (const sesElem& se) const = 0;
|
||||
protected:
|
||||
storedData& storedData_;
|
||||
};
|
||||
|
||||
/**
|
||||
* compare class template
|
||||
*/
|
||||
template <typename elem>
|
||||
class Compare
|
||||
{
|
||||
public :
|
||||
Compare () {}
|
||||
virtual ~Compare () {}
|
||||
virtual inline bool impl (const elem& e1, const elem& e2) const {
|
||||
return e1 == e2;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // DTL_FUNCTORS_H
|
||||
@@ -1,142 +0,0 @@
|
||||
/**
|
||||
dtl -- Diff Template Library
|
||||
|
||||
In short, Diff Template Library is distributed under so called "BSD license",
|
||||
|
||||
Copyright (c) 2015 Tatsuhiko Kubo <cubicdaiya@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the authors nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* If you use this library, you must include dtl.hpp only. */
|
||||
|
||||
#ifndef DTL_VARIABLES_H
|
||||
#define DTL_VARIABLES_H
|
||||
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
|
||||
namespace dtl {
|
||||
|
||||
using std::vector;
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::ostream;
|
||||
using std::list;
|
||||
using std::for_each;
|
||||
using std::distance;
|
||||
using std::fill;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::rotate;
|
||||
using std::swap;
|
||||
using std::max;
|
||||
|
||||
/**
|
||||
* version string
|
||||
*/
|
||||
const string version = "1.20";
|
||||
|
||||
/**
|
||||
* type of edit for SES
|
||||
*/
|
||||
typedef int edit_t;
|
||||
const edit_t SES_DELETE = -1;
|
||||
const edit_t SES_COMMON = 0;
|
||||
const edit_t SES_ADD = 1;
|
||||
|
||||
/**
|
||||
* mark of SES
|
||||
*/
|
||||
#define SES_MARK_DELETE "-"
|
||||
#define SES_MARK_COMMON " "
|
||||
#define SES_MARK_ADD "+"
|
||||
|
||||
/**
|
||||
* info for Unified Format
|
||||
*/
|
||||
typedef struct eleminfo {
|
||||
long long beforeIdx; // index of prev sequence
|
||||
long long afterIdx; // index of after sequence
|
||||
edit_t type; // type of edit(Add, Delete, Common)
|
||||
bool operator==(const eleminfo& other) const{
|
||||
return (this->beforeIdx == other.beforeIdx && this->afterIdx == other.afterIdx && this->type == other.type);
|
||||
}
|
||||
} elemInfo;
|
||||
|
||||
const long long DTL_SEPARATE_SIZE = 3;
|
||||
const long long DTL_CONTEXT_SIZE = 3;
|
||||
|
||||
/**
|
||||
* cordinate for registering route
|
||||
*/
|
||||
typedef struct Point {
|
||||
long long x; // x cordinate
|
||||
long long y; // y cordinate
|
||||
long long k; // vertex
|
||||
} P;
|
||||
|
||||
/**
|
||||
* limit of cordinate size
|
||||
*/
|
||||
const unsigned long long MAX_CORDINATES_SIZE = 2000000;
|
||||
|
||||
typedef vector< long long > editPath;
|
||||
typedef vector< P > editPathCordinates;
|
||||
|
||||
/**
|
||||
* Structure of Unified Format Hunk
|
||||
*/
|
||||
template <typename sesElem>
|
||||
struct uniHunk {
|
||||
long long a, b, c, d; // @@ -a,b +c,d @@
|
||||
vector< sesElem > common[2]; // anteroposterior commons on changes
|
||||
vector< sesElem > change; // changes
|
||||
long long inc_dec_count; // count of increace and decrease
|
||||
};
|
||||
|
||||
#define dtl_typedefs(elem, sequence) \
|
||||
typedef pair< elem, elemInfo > sesElem; \
|
||||
typedef vector< sesElem > sesElemVec; \
|
||||
typedef vector< uniHunk< sesElem > > uniHunkVec; \
|
||||
typedef list< elem > elemList; \
|
||||
typedef vector< elem > elemVec; \
|
||||
typedef typename uniHunkVec::iterator uniHunkVec_iter; \
|
||||
typedef typename sesElemVec::iterator sesElemVec_iter; \
|
||||
typedef typename elemList::iterator elemList_iter; \
|
||||
typedef typename sequence::iterator sequence_iter; \
|
||||
typedef typename sequence::const_iterator sequence_const_iter; \
|
||||
typedef typename elemVec::iterator elemVec_iter;
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // DTL_VARIABLES_H
|
||||
@@ -1,17 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(OpenCLVectorAdd)
|
||||
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
find_package(OpenCL REQUIRED)
|
||||
|
||||
add_executable(OpenCLVectorAdd OpenCLVectorAdd.cpp)
|
||||
|
||||
add_library(TracyClient STATIC ../../public/TracyClient.cpp
|
||||
../../public/tracy/TracyOpenCL.hpp)
|
||||
target_include_directories(TracyClient PUBLIC ../../public/tracy)
|
||||
target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE=1)
|
||||
|
||||
target_link_libraries(OpenCLVectorAdd PUBLIC OpenCL::OpenCL TracyClient ${CMAKE_DL_LIBS} Threads::Threads)
|
||||
@@ -1,220 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <math.h>
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <Tracy.hpp>
|
||||
#include <TracyOpenCL.hpp>
|
||||
|
||||
#define CL_ASSERT(err) \
|
||||
if((err) != CL_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << "OpenCL Call Returned " << err << std::endl; \
|
||||
assert(false); \
|
||||
}
|
||||
|
||||
const char kernelSource[] =
|
||||
" void __kernel vectorAdd(global float* C, global float* A, global float* B, int N) "
|
||||
" { "
|
||||
" int i = get_global_id(0); "
|
||||
" if (i < N) { "
|
||||
" C[i] = A[i] + B[i]; "
|
||||
" } "
|
||||
" } ";
|
||||
|
||||
int main()
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_device_id device;
|
||||
cl_context context;
|
||||
cl_command_queue commandQueue;
|
||||
cl_kernel vectorAddKernel;
|
||||
cl_program program;
|
||||
cl_int err;
|
||||
cl_mem bufferA, bufferB, bufferC;
|
||||
|
||||
TracyCLCtx tracyCLCtx;
|
||||
|
||||
{
|
||||
ZoneScopedN("OpenCL Init");
|
||||
|
||||
cl_uint numPlatforms = 0;
|
||||
CL_ASSERT(clGetPlatformIDs(0, nullptr, &numPlatforms));
|
||||
|
||||
if (numPlatforms == 0)
|
||||
{
|
||||
std::cerr << "Cannot find OpenCL platform to run this application" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
CL_ASSERT(clGetPlatformIDs(1, &platform, nullptr));
|
||||
|
||||
size_t platformNameBufferSize = 0;
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, nullptr, &platformNameBufferSize));
|
||||
std::string platformName(platformNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformNameBufferSize, &platformName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Platform: " << platformName << std::endl;
|
||||
|
||||
CL_ASSERT(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr));
|
||||
size_t deviceNameBufferSize = 0;
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, 0, nullptr, &deviceNameBufferSize));
|
||||
std::string deviceName(deviceNameBufferSize, '\0');
|
||||
CL_ASSERT(clGetDeviceInfo(device, CL_DEVICE_NAME, deviceNameBufferSize, &deviceName[0], nullptr));
|
||||
|
||||
std::cout << "OpenCL Device: " << deviceName << std::endl;
|
||||
|
||||
err = CL_SUCCESS;
|
||||
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
size_t kernelSourceLength = sizeof(kernelSource);
|
||||
const char* kernelSourceArray = { kernelSource };
|
||||
program = clCreateProgramWithSource(context, 1, &kernelSourceArray, &kernelSourceLength, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
if (clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr) != CL_SUCCESS)
|
||||
{
|
||||
size_t programBuildLogBufferSize = 0;
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &programBuildLogBufferSize));
|
||||
std::string programBuildLog(programBuildLogBufferSize, '\0');
|
||||
CL_ASSERT(clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, programBuildLogBufferSize, &programBuildLog[0], nullptr));
|
||||
std::clog << programBuildLog << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vectorAddKernel = clCreateKernel(program, "vectorAdd", &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
commandQueue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
CL_ASSERT(err);
|
||||
}
|
||||
|
||||
tracyCLCtx = TracyCLContext(context, device);
|
||||
|
||||
size_t N = 10 * 1024 * 1024 / sizeof(float); // 10MB of floats
|
||||
std::vector<float> hostA, hostB, hostC;
|
||||
|
||||
{
|
||||
ZoneScopedN("Host Data Init");
|
||||
hostA.resize(N);
|
||||
hostB.resize(N);
|
||||
hostC.resize(N);
|
||||
|
||||
std::iota(std::begin(hostA), std::end(hostA), 0.0f);
|
||||
std::iota(std::begin(hostB), std::end(hostB), 0.0f);
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Host to Device Memory Copy");
|
||||
|
||||
bufferA = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferB = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
bufferC = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), nullptr, &err);
|
||||
CL_ASSERT(err);
|
||||
|
||||
cl_event writeBufferAEvent, writeBufferBEvent;
|
||||
{
|
||||
ZoneScopedN("Write Buffer A");
|
||||
TracyCLZoneS(tracyCLCtx, "Write BufferA", 5);
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferA, CL_FALSE, 0, N * sizeof(float), hostA.data(), 0, nullptr, &writeBufferAEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferAEvent);
|
||||
}
|
||||
{
|
||||
ZoneScopedN("Write Buffer B");
|
||||
TracyCLZone(tracyCLCtx, "Write BufferB");
|
||||
|
||||
CL_ASSERT(clEnqueueWriteBuffer(commandQueue, bufferB, CL_FALSE, 0, N * sizeof(float), hostB.data(), 0, nullptr, &writeBufferBEvent));
|
||||
|
||||
TracyCLZoneSetEvent(writeBufferBEvent);
|
||||
}
|
||||
}
|
||||
|
||||
cl_int clN = static_cast<cl_int>(N);
|
||||
const int numFrames = 10;
|
||||
const int launchsPerFrame = 10;
|
||||
constexpr int numLaunchs = numFrames * launchsPerFrame;
|
||||
std::vector<cl_event> kernelLaunchEvts;
|
||||
kernelLaunchEvts.reserve(numLaunchs);
|
||||
for (int i = 0; i < numFrames; ++i)
|
||||
{
|
||||
FrameMark;
|
||||
for (int j = 0; j < launchsPerFrame; ++j) {
|
||||
ZoneScopedN("VectorAdd Kernel Launch");
|
||||
TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4);
|
||||
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB));
|
||||
CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(cl_int), &clN));
|
||||
|
||||
cl_event vectorAddKernelEvent;
|
||||
CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent));
|
||||
TracyCLZoneSetEvent(vectorAddKernelEvent);
|
||||
CL_ASSERT(clRetainEvent(vectorAddKernelEvent));
|
||||
kernelLaunchEvts.push_back(vectorAddKernelEvent);
|
||||
std::cout << "VectorAdd Kernel Enqueued" << std::endl;
|
||||
}
|
||||
{
|
||||
// Wait frame events to be finished
|
||||
ZoneScopedN("clFinish");
|
||||
CL_ASSERT(clFinish(commandQueue));
|
||||
}
|
||||
// You should collect on each 'frame' ends, so that streaming can be achieved.
|
||||
TracyCLCollect(tracyCLCtx);
|
||||
}
|
||||
|
||||
{
|
||||
ZoneScopedN("Device to Host Memory Copy");
|
||||
TracyCLZone(tracyCLCtx, "Read Buffer C");
|
||||
|
||||
cl_event readbufferCEvent;
|
||||
CL_ASSERT(clEnqueueReadBuffer(commandQueue, bufferC, CL_TRUE, 0, N * sizeof(float), hostC.data(), 0, nullptr, &readbufferCEvent));
|
||||
TracyCLZoneSetEvent(readbufferCEvent);
|
||||
}
|
||||
|
||||
CL_ASSERT(clFinish(commandQueue));
|
||||
std::vector<float> durations(kernelLaunchEvts.size());
|
||||
for (int i=0; i<kernelLaunchEvts.size(); i++) {
|
||||
cl_event evt = kernelLaunchEvts[i];
|
||||
cl_ulong start;
|
||||
cl_ulong end;
|
||||
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr));
|
||||
CL_ASSERT(clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr));
|
||||
CL_ASSERT(clReleaseEvent(evt));
|
||||
durations[i] = (end - start) * 0.001f;
|
||||
std::cout << "VectorAdd Kernel " << i << " tooks " << static_cast<int>(durations[i]) << "us" << std::endl;
|
||||
};
|
||||
float avg = std::accumulate(durations.cbegin(), durations.cend(), 0.0f) / durations.size();
|
||||
float stddev2 = std::accumulate(durations.cbegin(), durations.cend(), 0.0f, [avg](const float& acc, const float& v) {
|
||||
auto d = v - avg;
|
||||
return acc + d*d;
|
||||
}) / (durations.size() - 1.0f);
|
||||
std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl;
|
||||
|
||||
// User should ensure all events are finished, in this case, collect after the clFinish will do the trick.
|
||||
TracyCLCollect(tracyCLCtx);
|
||||
|
||||
{
|
||||
ZoneScopedN("Checking results");
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
assert(hostC[i] == hostA[i] + hostB[i]);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Results are correct!" << std::endl;
|
||||
|
||||
TracyCLDestroy(tracyCLCtx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
1
examples/ToyPathTracer/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
Windows/Compiled*Shader.h
|
||||
@@ -1,4 +0,0 @@
|
||||
https://github.com/aras-p/ToyPathTracer
|
||||
|
||||
Modified to render only 10 frames. Client part requires 12 GB, server part
|
||||
requires 6.4 GB.
|
||||
@@ -1,33 +0,0 @@
|
||||
|
||||
#if defined(__APPLE__) && !defined(__METAL_VERSION__)
|
||||
#include <TargetConditionals.h>
|
||||
#endif
|
||||
|
||||
#define kBackbufferWidth 1280
|
||||
#define kBackbufferHeight 720
|
||||
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
#define CPU_CAN_DO_SIMD 0
|
||||
#define CPU_CAN_DO_THREADS 0
|
||||
#else
|
||||
#define CPU_CAN_DO_SIMD 1
|
||||
#define CPU_CAN_DO_THREADS 1
|
||||
#endif
|
||||
|
||||
|
||||
#define DO_SAMPLES_PER_PIXEL 4
|
||||
#define DO_ANIMATE_SMOOTHING 0.9f
|
||||
#define DO_LIGHT_SAMPLING 1
|
||||
#define DO_MITSUBA_COMPARE 0
|
||||
|
||||
// Should path tracing be done on the GPU with a compute shader?
|
||||
#define DO_COMPUTE_GPU 0
|
||||
#define kCSGroupSizeX 8
|
||||
#define kCSGroupSizeY 8
|
||||
#define kCSMaxObjects 64
|
||||
|
||||
// Should float3 struct use SSE/NEON?
|
||||
#define DO_FLOAT3_WITH_SIMD (!(DO_COMPUTE_GPU) && CPU_CAN_DO_SIMD && 1)
|
||||
|
||||
// Should HitSpheres function use SSE/NEON?
|
||||
#define DO_HIT_SPHERES_SIMD (CPU_CAN_DO_SIMD && 1)
|
||||
@@ -1,192 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define VM_INLINE __forceinline
|
||||
#else
|
||||
#define VM_INLINE __attribute__((unused, always_inline, nodebug)) inline
|
||||
#endif
|
||||
|
||||
#define kSimdWidth 4
|
||||
|
||||
#if !defined(__arm__) && !defined(__arm64__) && !defined(__EMSCRIPTEN__)
|
||||
|
||||
// ---- SSE implementation
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
#define SHUFFLE4(V, X,Y,Z,W) float4(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(W,Z,Y,X)))
|
||||
|
||||
struct float4
|
||||
{
|
||||
VM_INLINE float4() {}
|
||||
VM_INLINE explicit float4(const float *p) { m = _mm_loadu_ps(p); }
|
||||
VM_INLINE explicit float4(float x, float y, float z, float w) { m = _mm_set_ps(w, z, y, x); }
|
||||
VM_INLINE explicit float4(float v) { m = _mm_set_ps1(v); }
|
||||
VM_INLINE explicit float4(__m128 v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
|
||||
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
|
||||
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
|
||||
VM_INLINE float getW() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(3, 3, 3, 3))); }
|
||||
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
typedef float4 bool4;
|
||||
|
||||
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = _mm_and_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = _mm_or_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a) { a.m = _mm_xor_ps(a.m, _mm_set1_ps(-0.0f)); return a; }
|
||||
VM_INLINE float4 min(float4 a, float4 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float4 max(float4 a, float4 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float4 v)
|
||||
{
|
||||
v = min(v, SHUFFLE4(v, 2, 3, 0, 0));
|
||||
v = min(v, SHUFFLE4(v, 1, 0, 0, 0));
|
||||
return v.getX();
|
||||
}
|
||||
|
||||
// Returns a 4-bit code where bit0..bit3 is X..W
|
||||
VM_INLINE unsigned mask(float4 v) { return _mm_movemask_ps(v.m); }
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
|
||||
|
||||
// "select", i.e. hibit(cond) ? b : a
|
||||
// on SSE4.1 and up this can be done easily via "blend" instruction;
|
||||
// on older SSEs has to do a bunch of hoops, see
|
||||
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
|
||||
|
||||
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
|
||||
{
|
||||
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
|
||||
a.m = _mm_blendv_ps(a.m, b.m, cond.m);
|
||||
#else
|
||||
__m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31));
|
||||
a.m = _mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m));
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
VM_INLINE __m128i select(__m128i a, __m128i b, bool4 cond)
|
||||
{
|
||||
#if defined(__SSE4_1__) || defined(_MSC_VER) // on windows assume we always have SSE4.1
|
||||
return _mm_blendv_epi8(a, b, _mm_castps_si128(cond.m));
|
||||
#else
|
||||
__m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31);
|
||||
return _mm_or_si128(_mm_and_si128(d, b), _mm_andnot_si128(d, a));
|
||||
#endif
|
||||
}
|
||||
|
||||
VM_INLINE float4 sqrtf(float4 v) { return float4(_mm_sqrt_ps(v.m)); }
|
||||
|
||||
#elif !defined(__EMSCRIPTEN__)
|
||||
|
||||
// ---- NEON implementation
|
||||
|
||||
#define USE_NEON 1
|
||||
#include <arm_neon.h>
|
||||
|
||||
struct float4
|
||||
{
|
||||
VM_INLINE float4() {}
|
||||
VM_INLINE explicit float4(const float *p) { m = vld1q_f32(p); }
|
||||
VM_INLINE explicit float4(float x, float y, float z, float w) { float v[4] = {x, y, z, w}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float4(float v) { m = vdupq_n_f32(v); }
|
||||
VM_INLINE explicit float4(float32x4_t v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
|
||||
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
|
||||
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
|
||||
VM_INLINE float getW() const { return vgetq_lane_f32(m, 3); }
|
||||
|
||||
float32x4_t m;
|
||||
};
|
||||
|
||||
typedef float4 bool4;
|
||||
|
||||
VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = vaddq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a, float4 b) { a.m = vsubq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator* (float4 a, float4 b) { a.m = vmulq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = vceqq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
|
||||
VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = vcltq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = vcleq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = vandq_u32(a.m, b.m); return a; }
|
||||
VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = vorrq_u32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 operator- (float4 a) { a.m = vnegq_f32(a.m); return a; }
|
||||
VM_INLINE float4 min(float4 a, float4 b) { a.m = vminq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float4 max(float4 a, float4 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float4 v)
|
||||
{
|
||||
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
|
||||
return vget_lane_f32(minOfMinOfHalfs, 0);
|
||||
}
|
||||
|
||||
// Returns a 4-bit code where bit0..bit3 is X..W
|
||||
VM_INLINE unsigned mask(float4 v)
|
||||
{
|
||||
static const uint32x4_t movemask = { 1, 2, 4, 8 };
|
||||
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
|
||||
uint32x4_t t1 = vtstq_u32(t0, highbit);
|
||||
uint32x4_t t2 = vandq_u32(t1, movemask);
|
||||
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
|
||||
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
|
||||
}
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool4 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool4 v) { return mask(v) == 15; }
|
||||
|
||||
// "select", i.e. hibit(cond) ? b : a
|
||||
// on SSE4.1 and up this can be done easily via "blend" instruction;
|
||||
// on older SSEs has to do a bunch of hoops, see
|
||||
// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/
|
||||
|
||||
VM_INLINE float4 select(float4 a, float4 b, bool4 cond)
|
||||
{
|
||||
a.m = vbslq_f32(cond.m, b.m, a.m);
|
||||
return a;
|
||||
}
|
||||
VM_INLINE int32x4_t select(int32x4_t a, int32x4_t b, bool4 cond)
|
||||
{
|
||||
return vbslq_f32(cond.m, b, a);
|
||||
}
|
||||
|
||||
VM_INLINE float4 sqrtf(float4 v)
|
||||
{
|
||||
float32x4_t V = v.m;
|
||||
float32x4_t S0 = vrsqrteq_f32(V);
|
||||
float32x4_t P0 = vmulq_f32( V, S0 );
|
||||
float32x4_t R0 = vrsqrtsq_f32( P0, S0 );
|
||||
float32x4_t S1 = vmulq_f32( S0, R0 );
|
||||
float32x4_t P1 = vmulq_f32( V, S1 );
|
||||
float32x4_t R1 = vrsqrtsq_f32( P1, S1 );
|
||||
float32x4_t S2 = vmulq_f32( S1, R1 );
|
||||
float32x4_t P2 = vmulq_f32( V, S2 );
|
||||
float32x4_t R2 = vrsqrtsq_f32( P2, S2 );
|
||||
float32x4_t S3 = vmulq_f32( S2, R2 );
|
||||
return float4(vmulq_f32(V, S3));
|
||||
}
|
||||
|
||||
VM_INLINE float4 splatX(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 0)); }
|
||||
VM_INLINE float4 splatY(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 1)); }
|
||||
VM_INLINE float4 splatZ(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 0)); }
|
||||
VM_INLINE float4 splatW(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 1)); }
|
||||
|
||||
#endif
|
||||
@@ -1,203 +0,0 @@
|
||||
#include "Maths.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static uint32_t XorShift32(uint32_t& state)
|
||||
{
|
||||
uint32_t x = state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 15;
|
||||
state = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
float RandomFloat01(uint32_t& state)
|
||||
{
|
||||
return (XorShift32(state) & 0xFFFFFF) / 16777216.0f;
|
||||
}
|
||||
|
||||
float3 RandomInUnitDisk(uint32_t& state)
|
||||
{
|
||||
float3 p;
|
||||
do
|
||||
{
|
||||
p = 2.0 * float3(RandomFloat01(state),RandomFloat01(state),0) - float3(1,1,0);
|
||||
} while (dot(p,p) >= 1.0);
|
||||
return p;
|
||||
}
|
||||
|
||||
float3 RandomInUnitSphere(uint32_t& state)
|
||||
{
|
||||
float3 p;
|
||||
do {
|
||||
p = 2.0*float3(RandomFloat01(state),RandomFloat01(state),RandomFloat01(state)) - float3(1,1,1);
|
||||
} while (sqLength(p) >= 1.0);
|
||||
return p;
|
||||
}
|
||||
|
||||
float3 RandomUnitVector(uint32_t& state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float a = RandomFloat01(state) * 2.0f * kPI;
|
||||
float r = sqrtf(1.0f - z * z);
|
||||
float x = r * cosf(a);
|
||||
float y = r * sinf(a);
|
||||
return float3(x, y, z);
|
||||
}
|
||||
|
||||
|
||||
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit)
|
||||
{
|
||||
#if DO_HIT_SPHERES_SIMD
|
||||
float4 hitT = float4(tMax);
|
||||
#if USE_NEON
|
||||
int32x4_t id = vdupq_n_s32(-1);
|
||||
#else
|
||||
__m128i id = _mm_set1_epi32(-1);
|
||||
#endif
|
||||
|
||||
#if DO_FLOAT3_WITH_SIMD && !USE_NEON
|
||||
float4 rOrigX = SHUFFLE4(r.orig, 0, 0, 0, 0);
|
||||
float4 rOrigY = SHUFFLE4(r.orig, 1, 1, 1, 1);
|
||||
float4 rOrigZ = SHUFFLE4(r.orig, 2, 2, 2, 2);
|
||||
float4 rDirX = SHUFFLE4(r.dir, 0, 0, 0, 0);
|
||||
float4 rDirY = SHUFFLE4(r.dir, 1, 1, 1, 1);
|
||||
float4 rDirZ = SHUFFLE4(r.dir, 2, 2, 2, 2);
|
||||
#elif DO_FLOAT3_WITH_SIMD
|
||||
float4 rOrigX = splatX(r.orig.m);
|
||||
float4 rOrigY = splatY(r.orig.m);
|
||||
float4 rOrigZ = splatZ(r.orig.m);
|
||||
float4 rDirX = splatX(r.dir.m);
|
||||
float4 rDirY = splatY(r.dir.m);
|
||||
float4 rDirZ = splatZ(r.dir.m);
|
||||
#else
|
||||
float4 rOrigX = float4(r.orig.x);
|
||||
float4 rOrigY = float4(r.orig.y);
|
||||
float4 rOrigZ = float4(r.orig.z);
|
||||
float4 rDirX = float4(r.dir.x);
|
||||
float4 rDirY = float4(r.dir.y);
|
||||
float4 rDirZ = float4(r.dir.z);
|
||||
#endif
|
||||
float4 tMin4 = float4(tMin);
|
||||
#if USE_NEON
|
||||
int32x4_t curId = vcombine_u32(vcreate_u32(0ULL | (1ULL<<32)), vcreate_u32(2ULL | (3ULL<<32)));
|
||||
#else
|
||||
__m128i curId = _mm_set_epi32(3, 2, 1, 0);
|
||||
#endif
|
||||
// process 4 spheres at once
|
||||
for (int i = 0; i < spheres.simdCount; i += kSimdWidth)
|
||||
{
|
||||
// load data for 4 spheres
|
||||
float4 sCenterX = float4(spheres.centerX + i);
|
||||
float4 sCenterY = float4(spheres.centerY + i);
|
||||
float4 sCenterZ = float4(spheres.centerZ + i);
|
||||
float4 sSqRadius = float4(spheres.sqRadius + i);
|
||||
// note: we flip this vector and calculate -b (nb) since that happens to be slightly preferable computationally
|
||||
float4 coX = sCenterX - rOrigX;
|
||||
float4 coY = sCenterY - rOrigY;
|
||||
float4 coZ = sCenterZ - rOrigZ;
|
||||
float4 nb = coX * rDirX + coY * rDirY + coZ * rDirZ;
|
||||
float4 c = coX * coX + coY * coY + coZ * coZ - sSqRadius;
|
||||
float4 discr = nb * nb - c;
|
||||
bool4 discrPos = discr > float4(0.0f);
|
||||
// if ray hits any of the 4 spheres
|
||||
if (any(discrPos))
|
||||
{
|
||||
float4 discrSq = sqrtf(discr);
|
||||
|
||||
// ray could hit spheres at t0 & t1
|
||||
float4 t0 = nb - discrSq;
|
||||
float4 t1 = nb + discrSq;
|
||||
|
||||
float4 t = select(t1, t0, t0 > tMin4); // if t0 is above min, take it (since it's the earlier hit); else try t1.
|
||||
bool4 msk = discrPos & (t > tMin4) & (t < hitT);
|
||||
// if hit, take it
|
||||
id = select(id, curId, msk);
|
||||
hitT = select(hitT, t, msk);
|
||||
}
|
||||
#if USE_NEON
|
||||
curId = vaddq_s32(curId, vdupq_n_s32(kSimdWidth));
|
||||
#else
|
||||
curId = _mm_add_epi32(curId, _mm_set1_epi32(kSimdWidth));
|
||||
#endif
|
||||
}
|
||||
// now we have up to 4 hits, find and return closest one
|
||||
float minT = hmin(hitT);
|
||||
if (minT < tMax) // any actual hits?
|
||||
{
|
||||
int minMask = mask(hitT == float4(minT));
|
||||
if (minMask != 0)
|
||||
{
|
||||
int id_scalar[4];
|
||||
float hitT_scalar[4];
|
||||
#if USE_NEON
|
||||
vst1q_s32(id_scalar, id);
|
||||
vst1q_f32(hitT_scalar, hitT.m);
|
||||
#else
|
||||
_mm_storeu_si128((__m128i *)id_scalar, id);
|
||||
_mm_storeu_ps(hitT_scalar, hitT.m);
|
||||
#endif
|
||||
|
||||
// In general, you would do this with a bit scan (first set/trailing zero count).
|
||||
// But who cares, it's only 16 options.
|
||||
static const int laneId[16] =
|
||||
{
|
||||
0, 0, 1, 0, // 00xx
|
||||
2, 0, 1, 0, // 01xx
|
||||
3, 0, 1, 0, // 10xx
|
||||
2, 0, 1, 0, // 11xx
|
||||
};
|
||||
|
||||
int lane = laneId[minMask];
|
||||
int hitId = id_scalar[lane];
|
||||
float finalHitT = hitT_scalar[lane];
|
||||
|
||||
outHit.pos = r.pointAt(finalHitT);
|
||||
outHit.normal = (outHit.pos - float3(spheres.centerX[hitId], spheres.centerY[hitId], spheres.centerZ[hitId])) * spheres.invRadius[hitId];
|
||||
outHit.t = finalHitT;
|
||||
return hitId;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
|
||||
#else // #if DO_HIT_SPHERES_SIMD
|
||||
|
||||
float hitT = tMax;
|
||||
int id = -1;
|
||||
for (int i = 0; i < spheres.count; ++i)
|
||||
{
|
||||
float coX = spheres.centerX[i] - r.orig.getX();
|
||||
float coY = spheres.centerY[i] - r.orig.getY();
|
||||
float coZ = spheres.centerZ[i] - r.orig.getZ();
|
||||
float nb = coX * r.dir.getX() + coY * r.dir.getY() + coZ * r.dir.getZ();
|
||||
float c = coX * coX + coY * coY + coZ * coZ - spheres.sqRadius[i];
|
||||
float discr = nb * nb - c;
|
||||
if (discr > 0)
|
||||
{
|
||||
float discrSq = sqrtf(discr);
|
||||
|
||||
// Try earlier t
|
||||
float t = nb - discrSq;
|
||||
if (t <= tMin) // before min, try later t!
|
||||
t = nb + discrSq;
|
||||
|
||||
if (t > tMin && t < hitT)
|
||||
{
|
||||
id = i;
|
||||
hitT = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (id != -1)
|
||||
{
|
||||
outHit.pos = r.pointAt(hitT);
|
||||
outHit.normal = (outHit.pos - float3(spheres.centerX[id], spheres.centerY[id], spheres.centerZ[id])) * spheres.invRadius[id];
|
||||
outHit.t = hitT;
|
||||
return id;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
#endif // #else of #if DO_HIT_SPHERES_SIMD
|
||||
}
|
||||
@@ -1,436 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include "Config.h"
|
||||
#include "MathSimd.h"
|
||||
|
||||
#define kPI 3.1415926f
|
||||
|
||||
// SSE/SIMD vector largely based on http://www.codersnotes.com/notes/maths-lib-2016/
|
||||
#if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
|
||||
#if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
// ---- SSE implementation
|
||||
|
||||
// SHUFFLE3(v, 0,1,2) leaves the vector unchanged (v.xyz).
|
||||
// SHUFFLE3(v, 0,0,0) splats the X (v.xxx).
|
||||
#define SHUFFLE3(V, X,Y,Z) float3(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(Z,Z,Y,X)))
|
||||
|
||||
struct float3
|
||||
{
|
||||
VM_INLINE float3() {}
|
||||
VM_INLINE explicit float3(const float *p) { m = _mm_set_ps(p[2], p[2], p[1], p[0]); }
|
||||
VM_INLINE explicit float3(float x, float y, float z) { m = _mm_set_ps(z, z, y, x); }
|
||||
VM_INLINE explicit float3(float v) { m = _mm_set1_ps(v); }
|
||||
VM_INLINE explicit float3(__m128 v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return _mm_cvtss_f32(m); }
|
||||
VM_INLINE float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
|
||||
VM_INLINE float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
|
||||
|
||||
VM_INLINE float3 yzx() const { return SHUFFLE3(*this, 1, 2, 0); }
|
||||
VM_INLINE float3 zxy() const { return SHUFFLE3(*this, 2, 0, 1); }
|
||||
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
void setX(float x)
|
||||
{
|
||||
m = _mm_move_ss(m, _mm_set_ss(x));
|
||||
}
|
||||
void setY(float y)
|
||||
{
|
||||
__m128 t = _mm_move_ss(m, _mm_set_ss(y));
|
||||
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 2, 0, 0));
|
||||
m = _mm_move_ss(t, m);
|
||||
}
|
||||
void setZ(float z)
|
||||
{
|
||||
__m128 t = _mm_move_ss(m, _mm_set_ss(z));
|
||||
t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 0, 1, 0));
|
||||
m = _mm_move_ss(t, m);
|
||||
}
|
||||
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
typedef float3 bool3;
|
||||
|
||||
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float3 b) { a.m = _mm_div_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float b) { a.m = _mm_mul_ps(a.m, _mm_set1_ps(b)); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float b) { a.m = _mm_div_ps(a.m, _mm_set1_ps(b)); return a; }
|
||||
VM_INLINE float3 operator* (float a, float3 b) { b.m = _mm_mul_ps(_mm_set1_ps(a), b.m); return b; }
|
||||
VM_INLINE float3 operator/ (float a, float3 b) { b.m = _mm_div_ps(_mm_set1_ps(a), b.m); return b; }
|
||||
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
|
||||
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
|
||||
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 min(float3 a, float3 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
|
||||
VM_INLINE float3 max(float3 a, float3 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float3 operator- (float3 a) { return float3(_mm_setzero_ps()) - a; }
|
||||
|
||||
VM_INLINE float hmin(float3 v)
|
||||
{
|
||||
v = min(v, SHUFFLE3(v, 1, 0, 2));
|
||||
return min(v, SHUFFLE3(v, 2, 0, 1)).getX();
|
||||
}
|
||||
VM_INLINE float hmax(float3 v)
|
||||
{
|
||||
v = max(v, SHUFFLE3(v, 1, 0, 2));
|
||||
return max(v, SHUFFLE3(v, 2, 0, 1)).getX();
|
||||
}
|
||||
|
||||
VM_INLINE float3 cross(float3 a, float3 b)
|
||||
{
|
||||
// x <- a.y*b.z - a.z*b.y
|
||||
// y <- a.z*b.x - a.x*b.z
|
||||
// z <- a.x*b.y - a.y*b.x
|
||||
// We can save a shuffle by grouping it in this wacky order:
|
||||
return (a.zxy()*b - a*b.zxy()).zxy();
|
||||
}
|
||||
|
||||
// Returns a 3-bit code where bit0..bit2 is X..Z
|
||||
VM_INLINE unsigned mask(float3 v) { return _mm_movemask_ps(v.m) & 7; }
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
|
||||
|
||||
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
|
||||
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
|
||||
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
|
||||
|
||||
#else // #if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
// ---- NEON implementation
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
struct float3
|
||||
{
|
||||
VM_INLINE float3() {}
|
||||
VM_INLINE explicit float3(const float *p) { float v[4] = {p[0], p[1], p[2], 0}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float3(float x, float y, float z) { float v[4] = {x, y, z, 0}; m = vld1q_f32(v); }
|
||||
VM_INLINE explicit float3(float v) { m = vdupq_n_f32(v); }
|
||||
VM_INLINE explicit float3(float32x4_t v) { m = v; }
|
||||
|
||||
VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); }
|
||||
VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); }
|
||||
VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); }
|
||||
|
||||
VM_INLINE float3 yzx() const
|
||||
{
|
||||
float32x2_t low = vget_low_f32(m);
|
||||
float32x4_t yzx = vcombine_f32(vext_f32(low, vget_high_f32(m), 1), low);
|
||||
return float3(yzx);
|
||||
}
|
||||
VM_INLINE float3 zxy() const
|
||||
{
|
||||
float32x4_t p = m;
|
||||
p = vuzpq_f32(vreinterpretq_f32_s32(vextq_s32(vreinterpretq_s32_f32(p), vreinterpretq_s32_f32(p), 1)), p).val[1];
|
||||
return float3(p);
|
||||
}
|
||||
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
void setX(float x)
|
||||
{
|
||||
m = vsetq_lane_f32(x, m, 0);
|
||||
}
|
||||
void setY(float y)
|
||||
{
|
||||
m = vsetq_lane_f32(y, m, 1);
|
||||
}
|
||||
void setZ(float z)
|
||||
{
|
||||
m = vsetq_lane_f32(z, m, 2);
|
||||
}
|
||||
|
||||
float32x4_t m;
|
||||
};
|
||||
|
||||
typedef float3 bool3;
|
||||
|
||||
VM_INLINE float32x4_t rcp_2(float32x4_t v)
|
||||
{
|
||||
float32x4_t e = vrecpeq_f32(v);
|
||||
e = vmulq_f32(vrecpsq_f32(e, v), e);
|
||||
e = vmulq_f32(vrecpsq_f32(e, v), e);
|
||||
return e;
|
||||
}
|
||||
|
||||
VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = vaddq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator- (float3 a, float3 b) { a.m = vsubq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float3 b) { a.m = vmulq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float3 b) { float32x4_t recip = rcp_2(b.m); a.m = vmulq_f32(a.m, recip); return a; }
|
||||
VM_INLINE float3 operator* (float3 a, float b) { a.m = vmulq_f32(a.m, vdupq_n_f32(b)); return a; }
|
||||
VM_INLINE float3 operator/ (float3 a, float b) { float32x4_t recip = rcp_2(vdupq_n_f32(b)); a.m = vmulq_f32(a.m, recip); return a; }
|
||||
VM_INLINE float3 operator* (float a, float3 b) { b.m = vmulq_f32(vdupq_n_f32(a), b.m); return b; }
|
||||
VM_INLINE float3 operator/ (float a, float3 b) { float32x4_t recip = rcp_2(b.m); b.m = vmulq_f32(vdupq_n_f32(a), recip); return b; }
|
||||
VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
|
||||
VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
|
||||
VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; }
|
||||
VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; }
|
||||
VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = vceqq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; }
|
||||
VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = vcltq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = vcgtq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = vcleq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = vcgeq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 min(float3 a, float3 b) { a.m = vminq_f32(a.m, b.m); return a; }
|
||||
VM_INLINE float3 max(float3 a, float3 b) { a.m = vmaxq_f32(a.m, b.m); return a; }
|
||||
|
||||
VM_INLINE float3 operator- (float3 a) { a.m = vnegq_f32(a.m); return a; }
|
||||
|
||||
VM_INLINE float hmin(float3 v)
|
||||
{
|
||||
float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs);
|
||||
return vget_lane_f32(minOfMinOfHalfs, 0);
|
||||
}
|
||||
VM_INLINE float hmax(float3 v)
|
||||
{
|
||||
float32x2_t maxOfHalfs = vpmax_f32(vget_low_f32(v.m), vget_high_f32(v.m));
|
||||
float32x2_t maxOfMaxOfHalfs = vpmax_f32(maxOfHalfs, maxOfHalfs);
|
||||
return vget_lane_f32(maxOfMaxOfHalfs, 0);
|
||||
}
|
||||
|
||||
VM_INLINE float3 cross(float3 a, float3 b)
|
||||
{
|
||||
// x <- a.y*b.z - a.z*b.y
|
||||
// y <- a.z*b.x - a.x*b.z
|
||||
// z <- a.x*b.y - a.y*b.x
|
||||
// We can save a shuffle by grouping it in this wacky order:
|
||||
return (a.zxy()*b - a*b.zxy()).zxy();
|
||||
}
|
||||
|
||||
// Returns a 3-bit code where bit0..bit2 is X..Z
|
||||
VM_INLINE unsigned mask(float3 v)
|
||||
{
|
||||
static const uint32x4_t movemask = { 1, 2, 4, 8 };
|
||||
static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
||||
uint32x4_t t0 = vreinterpretq_u32_f32(v.m);
|
||||
uint32x4_t t1 = vtstq_u32(t0, highbit);
|
||||
uint32x4_t t2 = vandq_u32(t1, movemask);
|
||||
uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2));
|
||||
return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1);
|
||||
}
|
||||
// Once we have a comparison, we can branch based on its results:
|
||||
VM_INLINE bool any(bool3 v) { return mask(v) != 0; }
|
||||
VM_INLINE bool all(bool3 v) { return mask(v) == 7; }
|
||||
|
||||
VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
|
||||
VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
|
||||
VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); }
|
||||
|
||||
|
||||
#endif // #else of #if !defined(__arm__) && !defined(__arm64__)
|
||||
|
||||
#else // #if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
// ---- Simple scalar C implementation
|
||||
|
||||
|
||||
struct float3
|
||||
{
|
||||
float3() : x(0), y(0), z(0) {}
|
||||
float3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
|
||||
|
||||
float3 operator-() const { return float3(-x, -y, -z); }
|
||||
float3& operator+=(const float3& o) { x+=o.x; y+=o.y; z+=o.z; return *this; }
|
||||
float3& operator-=(const float3& o) { x-=o.x; y-=o.y; z-=o.z; return *this; }
|
||||
float3& operator*=(const float3& o) { x*=o.x; y*=o.y; z*=o.z; return *this; }
|
||||
float3& operator*=(float o) { x*=o; y*=o; z*=o; return *this; }
|
||||
|
||||
VM_INLINE float getX() const { return x; }
|
||||
VM_INLINE float getY() const { return y; }
|
||||
VM_INLINE float getZ() const { return z; }
|
||||
VM_INLINE void setX(float x_) { x = x_; }
|
||||
VM_INLINE void setY(float y_) { y = y_; }
|
||||
VM_INLINE void setZ(float z_) { z = z_; }
|
||||
VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
|
||||
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
VM_INLINE float3 operator+(const float3& a, const float3& b) { return float3(a.x+b.x,a.y+b.y,a.z+b.z); }
|
||||
VM_INLINE float3 operator-(const float3& a, const float3& b) { return float3(a.x-b.x,a.y-b.y,a.z-b.z); }
|
||||
VM_INLINE float3 operator*(const float3& a, const float3& b) { return float3(a.x*b.x,a.y*b.y,a.z*b.z); }
|
||||
VM_INLINE float3 operator*(const float3& a, float b) { return float3(a.x*b,a.y*b,a.z*b); }
|
||||
VM_INLINE float3 operator*(float a, const float3& b) { return float3(a*b.x,a*b.y,a*b.z); }
|
||||
VM_INLINE float dot(const float3& a, const float3& b) { return a.x*b.x+a.y*b.y+a.z*b.z; }
|
||||
VM_INLINE float3 cross(const float3& a, const float3& b)
|
||||
{
|
||||
return float3(
|
||||
a.y*b.z - a.z*b.y,
|
||||
-(a.x*b.z - a.z*b.x),
|
||||
a.x*b.y - a.y*b.x
|
||||
);
|
||||
}
|
||||
#endif // #else of #if DO_FLOAT3_WITH_SIMD
|
||||
|
||||
VM_INLINE float length(float3 v) { return sqrtf(dot(v, v)); }
|
||||
VM_INLINE float sqLength(float3 v) { return dot(v, v); }
|
||||
VM_INLINE float3 normalize(float3 v) { return v * (1.0f / length(v)); }
|
||||
VM_INLINE float3 lerp(float3 a, float3 b, float t) { return a + (b-a)*t; }
|
||||
|
||||
|
||||
inline void AssertUnit(float3 v)
|
||||
{
|
||||
assert(fabsf(sqLength(v) - 1.0f) < 0.01f);
|
||||
}
|
||||
|
||||
inline float3 reflect(float3 v, float3 n)
|
||||
{
|
||||
return v - 2*dot(v,n)*n;
|
||||
}
|
||||
|
||||
inline bool refract(float3 v, float3 n, float nint, float3& outRefracted)
|
||||
{
|
||||
AssertUnit(v);
|
||||
float dt = dot(v, n);
|
||||
float discr = 1.0f - nint*nint*(1-dt*dt);
|
||||
if (discr > 0)
|
||||
{
|
||||
outRefracted = nint * (v - n*dt) - n*sqrtf(discr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
inline float schlick(float cosine, float ri)
|
||||
{
|
||||
float r0 = (1-ri) / (1+ri);
|
||||
r0 = r0*r0;
|
||||
return r0 + (1-r0)*powf(1-cosine, 5);
|
||||
}
|
||||
|
||||
struct Ray
|
||||
{
|
||||
Ray() {}
|
||||
Ray(float3 orig_, float3 dir_) : orig(orig_), dir(dir_) { AssertUnit(dir); }
|
||||
|
||||
float3 pointAt(float t) const { return orig + dir * t; }
|
||||
|
||||
float3 orig;
|
||||
float3 dir;
|
||||
};
|
||||
|
||||
|
||||
struct Hit
|
||||
{
|
||||
float3 pos;
|
||||
float3 normal;
|
||||
float t;
|
||||
};
|
||||
|
||||
|
||||
struct Sphere
|
||||
{
|
||||
Sphere() : radius(1.0f), invRadius(0.0f) {}
|
||||
Sphere(float3 center_, float radius_) : center(center_), radius(radius_), invRadius(0.0f) {}
|
||||
|
||||
void UpdateDerivedData() { invRadius = 1.0f/radius; }
|
||||
|
||||
float3 center;
|
||||
float radius;
|
||||
float invRadius;
|
||||
};
|
||||
|
||||
|
||||
// data for all spheres in a "structure of arrays" layout
|
||||
struct SpheresSoA
|
||||
{
|
||||
SpheresSoA(int c)
|
||||
{
|
||||
count = c;
|
||||
// we'll be processing spheres in kSimdWidth chunks, so make sure to allocate
|
||||
// enough space
|
||||
simdCount = (c + (kSimdWidth - 1)) / kSimdWidth * kSimdWidth;
|
||||
centerX = new float[simdCount];
|
||||
centerY = new float[simdCount];
|
||||
centerZ = new float[simdCount];
|
||||
sqRadius = new float[simdCount];
|
||||
invRadius = new float[simdCount];
|
||||
// set all data to "impossible sphere" state
|
||||
for (int i = count; i < simdCount; ++i)
|
||||
{
|
||||
centerX[i] = centerY[i] = centerZ[i] = 10000.0f;
|
||||
sqRadius[i] = 0.0f;
|
||||
invRadius[i] = 0.0f;
|
||||
}
|
||||
}
|
||||
~SpheresSoA()
|
||||
{
|
||||
delete[] centerX;
|
||||
delete[] centerY;
|
||||
delete[] centerZ;
|
||||
delete[] sqRadius;
|
||||
delete[] invRadius;
|
||||
}
|
||||
float* centerX;
|
||||
float* centerY;
|
||||
float* centerZ;
|
||||
float* sqRadius;
|
||||
float* invRadius;
|
||||
int simdCount;
|
||||
int count;
|
||||
};
|
||||
|
||||
|
||||
int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit);
|
||||
|
||||
float RandomFloat01(uint32_t& state);
|
||||
float3 RandomInUnitDisk(uint32_t& state);
|
||||
float3 RandomInUnitSphere(uint32_t& state);
|
||||
float3 RandomUnitVector(uint32_t& state);
|
||||
|
||||
struct Camera
|
||||
{
|
||||
Camera() {}
|
||||
// vfov is top to bottom in degrees
|
||||
Camera(const float3& lookFrom, const float3& lookAt, const float3& vup, float vfov, float aspect, float aperture, float focusDist)
|
||||
{
|
||||
lensRadius = aperture / 2;
|
||||
float theta = vfov*kPI/180;
|
||||
float halfHeight = tanf(theta/2);
|
||||
float halfWidth = aspect * halfHeight;
|
||||
origin = lookFrom;
|
||||
w = normalize(lookFrom - lookAt);
|
||||
u = normalize(cross(vup, w));
|
||||
v = cross(w, u);
|
||||
lowerLeftCorner = origin - halfWidth*focusDist*u - halfHeight*focusDist*v - focusDist*w;
|
||||
horizontal = 2*halfWidth*focusDist*u;
|
||||
vertical = 2*halfHeight*focusDist*v;
|
||||
}
|
||||
|
||||
Ray GetRay(float s, float t, uint32_t& state) const
|
||||
{
|
||||
float3 rd = lensRadius * RandomInUnitDisk(state);
|
||||
float3 offset = u * rd.getX() + v * rd.getY();
|
||||
return Ray(origin + offset, normalize(lowerLeftCorner + s*horizontal + t*vertical - origin - offset));
|
||||
}
|
||||
|
||||
float3 origin;
|
||||
float3 lowerLeftCorner;
|
||||
float3 horizontal;
|
||||
float3 vertical;
|
||||
float3 u, v, w;
|
||||
float lensRadius;
|
||||
};
|
||||
|
||||
@@ -1,392 +0,0 @@
|
||||
#include "Config.h"
|
||||
#include "Test.h"
|
||||
#include "Maths.h"
|
||||
#include <algorithm>
|
||||
#if CPU_CAN_DO_THREADS
|
||||
#include "enkiTS/TaskScheduler_c.h"
|
||||
#include <thread>
|
||||
#endif
|
||||
#include <atomic>
|
||||
|
||||
#include "../../../public/tracy/Tracy.hpp"
|
||||
|
||||
// 46 spheres (2 emissive) when enabled; 9 spheres (1 emissive) when disabled
|
||||
#define DO_BIG_SCENE 1
|
||||
|
||||
static Sphere s_Spheres[] =
|
||||
{
|
||||
{float3(0,-100.5,-1), 100},
|
||||
{float3(2,0,-1), 0.5f},
|
||||
{float3(0,0,-1), 0.5f},
|
||||
{float3(-2,0,-1), 0.5f},
|
||||
{float3(2,0,1), 0.5f},
|
||||
{float3(0,0,1), 0.5f},
|
||||
{float3(-2,0,1), 0.5f},
|
||||
{float3(0.5f,1,0.5f), 0.5f},
|
||||
{float3(-1.5f,1.5f,0.f), 0.3f},
|
||||
#if DO_BIG_SCENE
|
||||
{float3(4,0,-3), 0.5f}, {float3(3,0,-3), 0.5f}, {float3(2,0,-3), 0.5f}, {float3(1,0,-3), 0.5f}, {float3(0,0,-3), 0.5f}, {float3(-1,0,-3), 0.5f}, {float3(-2,0,-3), 0.5f}, {float3(-3,0,-3), 0.5f}, {float3(-4,0,-3), 0.5f},
|
||||
{float3(4,0,-4), 0.5f}, {float3(3,0,-4), 0.5f}, {float3(2,0,-4), 0.5f}, {float3(1,0,-4), 0.5f}, {float3(0,0,-4), 0.5f}, {float3(-1,0,-4), 0.5f}, {float3(-2,0,-4), 0.5f}, {float3(-3,0,-4), 0.5f}, {float3(-4,0,-4), 0.5f},
|
||||
{float3(4,0,-5), 0.5f}, {float3(3,0,-5), 0.5f}, {float3(2,0,-5), 0.5f}, {float3(1,0,-5), 0.5f}, {float3(0,0,-5), 0.5f}, {float3(-1,0,-5), 0.5f}, {float3(-2,0,-5), 0.5f}, {float3(-3,0,-5), 0.5f}, {float3(-4,0,-5), 0.5f},
|
||||
{float3(4,0,-6), 0.5f}, {float3(3,0,-6), 0.5f}, {float3(2,0,-6), 0.5f}, {float3(1,0,-6), 0.5f}, {float3(0,0,-6), 0.5f}, {float3(-1,0,-6), 0.5f}, {float3(-2,0,-6), 0.5f}, {float3(-3,0,-6), 0.5f}, {float3(-4,0,-6), 0.5f},
|
||||
{float3(1.5f,1.5f,-2), 0.3f},
|
||||
#endif // #if DO_BIG_SCENE
|
||||
};
|
||||
const int kSphereCount = sizeof(s_Spheres) / sizeof(s_Spheres[0]);
|
||||
|
||||
static SpheresSoA s_SpheresSoA(kSphereCount);
|
||||
|
||||
struct Material
|
||||
{
|
||||
enum Type { Lambert, Metal, Dielectric };
|
||||
Type type;
|
||||
float3 albedo;
|
||||
float3 emissive;
|
||||
float roughness;
|
||||
float ri;
|
||||
};
|
||||
|
||||
static Material s_SphereMats[kSphereCount] =
|
||||
{
|
||||
{ Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.8f, 0.4f, 0.4f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.4f, 0.4f, 0.8f), float3(0,0,0), 0, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.2f, 0 },
|
||||
{ Material::Metal, float3(0.4f, 0.8f, 0.4f), float3(0,0,0), 0.6f, 0 },
|
||||
{ Material::Dielectric, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 1.5f },
|
||||
{ Material::Lambert, float3(0.8f, 0.6f, 0.2f), float3(30,25,15), 0, 0 },
|
||||
#if DO_BIG_SCENE
|
||||
{ Material::Lambert, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.1f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.2f, 0.2f, 0.2f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.3f, 0.3f, 0.3f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.4f, 0.4f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.5f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.6f, 0.6f, 0.6f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.7f, 0.7f, 0.7f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.9f, 0.9f, 0.9f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Metal, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.8f, 0.1f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.5f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.8f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.4f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.1f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.5f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.8f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Lambert, float3(0.1f, 0.1f, 0.8f), float3(0,0,0), 0, 0, }, { Material::Metal, float3(0.5f, 0.1f, 0.8f), float3(0,0,0), 0, 0, },
|
||||
{ Material::Lambert, float3(0.1f, 0.2f, 0.5f), float3(3,10,20), 0, 0 },
|
||||
#endif
|
||||
};
|
||||
|
||||
static int s_EmissiveSpheres[kSphereCount];
|
||||
static int s_EmissiveSphereCount;
|
||||
|
||||
static Camera s_Cam;
|
||||
|
||||
const float kMinT = 0.001f;
|
||||
const float kMaxT = 1.0e7f;
|
||||
const int kMaxDepth = 10;
|
||||
|
||||
|
||||
bool HitWorld(const Ray& r, float tMin, float tMax, Hit& outHit, int& outID)
|
||||
{
|
||||
outID = HitSpheres(r, s_SpheresSoA, tMin, tMax, outHit);
|
||||
return outID != -1;
|
||||
}
|
||||
|
||||
|
||||
static bool Scatter(const Material& mat, const Ray& r_in, const Hit& rec, float3& attenuation, Ray& scattered, float3& outLightE, int& inoutRayCount, uint32_t& state)
|
||||
{
|
||||
ZoneScoped;
|
||||
outLightE = float3(0,0,0);
|
||||
if (mat.type == Material::Lambert)
|
||||
{
|
||||
// random point on unit sphere that is tangent to the hit point
|
||||
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
|
||||
scattered = Ray(rec.pos, normalize(target - rec.pos));
|
||||
attenuation = mat.albedo;
|
||||
|
||||
// sample lights
|
||||
#if DO_LIGHT_SAMPLING
|
||||
for (int j = 0; j < s_EmissiveSphereCount; ++j)
|
||||
{
|
||||
int i = s_EmissiveSpheres[j];
|
||||
const Material& smat = s_SphereMats[i];
|
||||
if (&mat == &smat)
|
||||
continue; // skip self
|
||||
const Sphere& s = s_Spheres[i];
|
||||
|
||||
// create a random direction towards sphere
|
||||
// coord system for sampling: sw, su, sv
|
||||
float3 sw = normalize(s.center - rec.pos);
|
||||
float3 su = normalize(cross(fabs(sw.getX())>0.01f ? float3(0,1,0):float3(1,0,0), sw));
|
||||
float3 sv = cross(sw, su);
|
||||
// sample sphere by solid angle
|
||||
float cosAMax = sqrtf(1.0f - s.radius*s.radius / sqLength(rec.pos-s.center));
|
||||
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
|
||||
float cosA = 1.0f - eps1 + eps1 * cosAMax;
|
||||
float sinA = sqrtf(1.0f - cosA*cosA);
|
||||
float phi = 2 * kPI * eps2;
|
||||
float3 l = su * (cosf(phi) * sinA) + sv * (sinf(phi) * sinA) + sw * cosA;
|
||||
//l = normalize(l); // NOTE(fg): This is already normalized, by construction.
|
||||
|
||||
// shoot shadow ray
|
||||
Hit lightHit;
|
||||
int hitID;
|
||||
++inoutRayCount;
|
||||
if (HitWorld(Ray(rec.pos, l), kMinT, kMaxT, lightHit, hitID) && hitID == i)
|
||||
{
|
||||
float omega = 2 * kPI * (1-cosAMax);
|
||||
|
||||
float3 rdir = r_in.dir;
|
||||
AssertUnit(rdir);
|
||||
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
|
||||
outLightE += (mat.albedo * smat.emissive) * (std::max(0.0f, dot(l, nl)) * omega / kPI);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
else if (mat.type == Material::Metal)
|
||||
{
|
||||
AssertUnit(r_in.dir); AssertUnit(rec.normal);
|
||||
float3 refl = reflect(r_in.dir, rec.normal);
|
||||
// reflected ray, and random inside of sphere based on roughness
|
||||
float roughness = mat.roughness;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
roughness = 0; // until we get better BRDF for metals
|
||||
#endif
|
||||
scattered = Ray(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
|
||||
attenuation = mat.albedo;
|
||||
return dot(scattered.dir, rec.normal) > 0;
|
||||
}
|
||||
else if (mat.type == Material::Dielectric)
|
||||
{
|
||||
AssertUnit(r_in.dir); AssertUnit(rec.normal);
|
||||
float3 outwardN;
|
||||
float3 rdir = r_in.dir;
|
||||
float3 refl = reflect(rdir, rec.normal);
|
||||
float nint;
|
||||
attenuation = float3(1,1,1);
|
||||
float3 refr;
|
||||
float reflProb;
|
||||
float cosine;
|
||||
if (dot(rdir, rec.normal) > 0)
|
||||
{
|
||||
outwardN = -rec.normal;
|
||||
nint = mat.ri;
|
||||
cosine = mat.ri * dot(rdir, rec.normal);
|
||||
}
|
||||
else
|
||||
{
|
||||
outwardN = rec.normal;
|
||||
nint = 1.0f / mat.ri;
|
||||
cosine = -dot(rdir, rec.normal);
|
||||
}
|
||||
if (refract(rdir, outwardN, nint, refr))
|
||||
{
|
||||
reflProb = schlick(cosine, mat.ri);
|
||||
}
|
||||
else
|
||||
{
|
||||
reflProb = 1;
|
||||
}
|
||||
if (RandomFloat01(state) < reflProb)
|
||||
scattered = Ray(rec.pos, normalize(refl));
|
||||
else
|
||||
scattered = Ray(rec.pos, normalize(refr));
|
||||
}
|
||||
else
|
||||
{
|
||||
attenuation = float3(1,0,1);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static float3 Trace(const Ray& r, int depth, int& inoutRayCount, uint32_t& state, bool doMaterialE = true)
|
||||
{
|
||||
ZoneScoped;
|
||||
Hit rec;
|
||||
int id = 0;
|
||||
++inoutRayCount;
|
||||
if (HitWorld(r, kMinT, kMaxT, rec, id))
|
||||
{
|
||||
Ray scattered;
|
||||
float3 attenuation;
|
||||
float3 lightE;
|
||||
const Material& mat = s_SphereMats[id];
|
||||
float3 matE = mat.emissive;
|
||||
if (depth < kMaxDepth && Scatter(mat, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
|
||||
{
|
||||
#if DO_LIGHT_SAMPLING
|
||||
if (!doMaterialE) matE = float3(0,0,0); // don't add material emission if told so
|
||||
// dor Lambert materials, we just did explicit light (emissive) sampling and already
|
||||
// for their contribution, so if next ray bounce hits the light again, don't add
|
||||
// emission
|
||||
doMaterialE = (mat.type != Material::Lambert);
|
||||
#endif
|
||||
return matE + lightE + attenuation * Trace(scattered, depth+1, inoutRayCount, state, doMaterialE);
|
||||
}
|
||||
else
|
||||
{
|
||||
return matE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// sky
|
||||
#if DO_MITSUBA_COMPARE
|
||||
return float3(0.15f,0.21f,0.3f); // easier compare with Mitsuba's constant environment light
|
||||
#else
|
||||
float3 unitDir = r.dir;
|
||||
float t = 0.5f*(unitDir.getY() + 1.0f);
|
||||
return ((1.0f-t)*float3(1.0f, 1.0f, 1.0f) + t*float3(0.5f, 0.7f, 1.0f)) * 0.3f;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if CPU_CAN_DO_THREADS
|
||||
static enkiTaskScheduler* g_TS;
|
||||
#endif
|
||||
|
||||
void InitializeTest()
|
||||
{
|
||||
ZoneScoped;
|
||||
#if CPU_CAN_DO_THREADS
|
||||
g_TS = enkiNewTaskScheduler();
|
||||
enkiInitTaskSchedulerNumThreads(g_TS, std::max<int>( 2, std::thread::hardware_concurrency() - 2));
|
||||
#endif
|
||||
}
|
||||
|
||||
void ShutdownTest()
|
||||
{
|
||||
ZoneScoped;
|
||||
#if CPU_CAN_DO_THREADS
|
||||
enkiDeleteTaskScheduler(g_TS);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct JobData
|
||||
{
|
||||
float time;
|
||||
int frameCount;
|
||||
int screenWidth, screenHeight;
|
||||
float* backbuffer;
|
||||
Camera* cam;
|
||||
std::atomic<int> rayCount;
|
||||
unsigned testFlags;
|
||||
};
|
||||
|
||||
static void TraceRowJob(uint32_t start, uint32_t end, uint32_t threadnum, void* data_)
|
||||
{
|
||||
ZoneScoped;
|
||||
JobData& data = *(JobData*)data_;
|
||||
float* backbuffer = data.backbuffer + start * data.screenWidth * 4;
|
||||
float invWidth = 1.0f / data.screenWidth;
|
||||
float invHeight = 1.0f / data.screenHeight;
|
||||
float lerpFac = float(data.frameCount) / float(data.frameCount+1);
|
||||
if (data.testFlags & kFlagAnimate)
|
||||
lerpFac *= DO_ANIMATE_SMOOTHING;
|
||||
if (!(data.testFlags & kFlagProgressive))
|
||||
lerpFac = 0;
|
||||
int rayCount = 0;
|
||||
for (uint32_t y = start; y < end; ++y)
|
||||
{
|
||||
uint32_t state = (y * 9781 + data.frameCount * 6271) | 1;
|
||||
for (int x = 0; x < data.screenWidth; ++x)
|
||||
{
|
||||
float3 col(0, 0, 0);
|
||||
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
|
||||
{
|
||||
float u = float(x + RandomFloat01(state)) * invWidth;
|
||||
float v = float(y + RandomFloat01(state)) * invHeight;
|
||||
Ray r = data.cam->GetRay(u, v, state);
|
||||
col += Trace(r, 0, rayCount, state);
|
||||
}
|
||||
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
|
||||
|
||||
float3 prev(backbuffer[0], backbuffer[1], backbuffer[2]);
|
||||
col = prev * lerpFac + col * (1-lerpFac);
|
||||
col.store(backbuffer);
|
||||
backbuffer += 4;
|
||||
}
|
||||
}
|
||||
data.rayCount += rayCount;
|
||||
}
|
||||
|
||||
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags)
|
||||
{
|
||||
ZoneScoped;
|
||||
if (testFlags & kFlagAnimate)
|
||||
{
|
||||
s_Spheres[1].center.setY(cosf(time) + 1.0f);
|
||||
s_Spheres[8].center.setZ(sinf(time)*0.3f);
|
||||
}
|
||||
float3 lookfrom(0, 2, 3);
|
||||
float3 lookat(0, 0, 0);
|
||||
float distToFocus = 3;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
float aperture = 0.0f;
|
||||
#else
|
||||
float aperture = 0.1f;
|
||||
#endif
|
||||
#if DO_BIG_SCENE
|
||||
aperture *= 0.2f;
|
||||
#endif
|
||||
|
||||
s_EmissiveSphereCount = 0;
|
||||
for (int i = 0; i < kSphereCount; ++i)
|
||||
{
|
||||
Sphere& s = s_Spheres[i];
|
||||
s.UpdateDerivedData();
|
||||
s_SpheresSoA.centerX[i] = s.center.getX();
|
||||
s_SpheresSoA.centerY[i] = s.center.getY();
|
||||
s_SpheresSoA.centerZ[i] = s.center.getZ();
|
||||
s_SpheresSoA.sqRadius[i] = s.radius * s.radius;
|
||||
s_SpheresSoA.invRadius[i] = s.invRadius;
|
||||
|
||||
// Remember IDs of emissive spheres (light sources)
|
||||
const Material& smat = s_SphereMats[i];
|
||||
if (smat.emissive.getX() > 0 || smat.emissive.getY() > 0 || smat.emissive.getZ() > 0)
|
||||
{
|
||||
s_EmissiveSpheres[s_EmissiveSphereCount] = i;
|
||||
s_EmissiveSphereCount++;
|
||||
}
|
||||
}
|
||||
|
||||
s_Cam = Camera(lookfrom, lookat, float3(0, 1, 0), 60, float(screenWidth) / float(screenHeight), aperture, distToFocus);
|
||||
}
|
||||
|
||||
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags)
|
||||
{
|
||||
ZoneScoped;
|
||||
JobData args;
|
||||
args.time = time;
|
||||
args.frameCount = frameCount;
|
||||
args.screenWidth = screenWidth;
|
||||
args.screenHeight = screenHeight;
|
||||
args.backbuffer = backbuffer;
|
||||
args.cam = &s_Cam;
|
||||
args.testFlags = testFlags;
|
||||
args.rayCount = 0;
|
||||
|
||||
#if CPU_CAN_DO_THREADS
|
||||
enkiTaskSet* task = enkiCreateTaskSet(g_TS, TraceRowJob);
|
||||
bool threaded = true;
|
||||
enkiAddTaskSetToPipeMinRange(g_TS, task, &args, screenHeight, threaded ? 4 : screenHeight);
|
||||
enkiWaitForTaskSet(g_TS, task);
|
||||
enkiDeleteTaskSet(task);
|
||||
#else
|
||||
TraceRowJob(0, screenHeight, 0, &args);
|
||||
#endif
|
||||
|
||||
outRayCount = args.rayCount;
|
||||
}
|
||||
|
||||
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize)
|
||||
{
|
||||
ZoneScoped;
|
||||
outCount = kSphereCount;
|
||||
outObjectSize = sizeof(Sphere);
|
||||
outMaterialSize = sizeof(Material);
|
||||
outCamSize = sizeof(Camera);
|
||||
}
|
||||
|
||||
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount)
|
||||
{
|
||||
ZoneScoped;
|
||||
memcpy(outObjects, s_Spheres, kSphereCount * sizeof(s_Spheres[0]));
|
||||
memcpy(outMaterials, s_SphereMats, kSphereCount * sizeof(s_SphereMats[0]));
|
||||
memcpy(outCam, &s_Cam, sizeof(s_Cam));
|
||||
memcpy(outEmissives, s_EmissiveSpheres, s_EmissiveSphereCount * sizeof(s_EmissiveSpheres[0]));
|
||||
*outEmissiveCount = s_EmissiveSphereCount;
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
enum TestFlags
|
||||
{
|
||||
kFlagAnimate = (1 << 0),
|
||||
kFlagProgressive = (1 << 1),
|
||||
};
|
||||
|
||||
void InitializeTest();
|
||||
void ShutdownTest();
|
||||
|
||||
void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, unsigned testFlags);
|
||||
void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, float* backbuffer, int& outRayCount, unsigned testFlags);
|
||||
|
||||
void GetObjectCount(int& outCount, int& outObjectSize, int& outMaterialSize, int& outCamSize);
|
||||
void GetSceneDesc(void* outObjects, void* outMaterials, void* outCam, void* outEmissives, int* outEmissiveCount);
|
||||
@@ -1,79 +0,0 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <Windows.h>
|
||||
#undef GetObject
|
||||
#include <intrin.h>
|
||||
|
||||
extern "C" void _ReadWriteBarrier();
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#pragma intrinsic(_InterlockedCompareExchange)
|
||||
#pragma intrinsic(_InterlockedExchangeAdd)
|
||||
|
||||
// Memory Barriers to prevent CPU and Compiler re-ordering
|
||||
#define BASE_MEMORYBARRIER_ACQUIRE() _ReadWriteBarrier()
|
||||
#define BASE_MEMORYBARRIER_RELEASE() _ReadWriteBarrier()
|
||||
#define BASE_ALIGN(x) __declspec( align( x ) )
|
||||
|
||||
#else
|
||||
#define BASE_MEMORYBARRIER_ACQUIRE() __asm__ __volatile__("": : :"memory")
|
||||
#define BASE_MEMORYBARRIER_RELEASE() __asm__ __volatile__("": : :"memory")
|
||||
#define BASE_ALIGN(x) __attribute__ ((aligned( x )))
|
||||
#endif
|
||||
|
||||
namespace enki
|
||||
{
|
||||
// Atomically performs: if( *pDest == compareWith ) { *pDest = swapTo; }
|
||||
// returns old *pDest (so if successfull, returns compareWith)
|
||||
inline uint32_t AtomicCompareAndSwap( volatile uint32_t* pDest, uint32_t swapTo, uint32_t compareWith )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
|
||||
return _InterlockedCompareExchange( (volatile long*)pDest,swapTo, compareWith );
|
||||
#else
|
||||
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint64_t AtomicCompareAndSwap( volatile uint64_t* pDest, uint64_t swapTo, uint64_t compareWith )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// assumes two's complement - unsigned / signed conversion leads to same bit pattern
|
||||
return _InterlockedCompareExchange64( (__int64 volatile*)pDest, swapTo, compareWith );
|
||||
#else
|
||||
return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
|
||||
#endif
|
||||
}
|
||||
|
||||
// Atomically performs: tmp = *pDest; *pDest += value; return tmp;
|
||||
inline int32_t AtomicAdd( volatile int32_t* pDest, int32_t value )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return _InterlockedExchangeAdd( (long*)pDest, value );
|
||||
#else
|
||||
return __sync_fetch_and_add( pDest, value );
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,240 +0,0 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "Atomics.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
namespace enki
|
||||
{
|
||||
// LockLessMultiReadPipe - Single writer, multiple reader thread safe pipe using (semi) lockless programming
|
||||
// Readers can only read from the back of the pipe
|
||||
// The single writer can write to the front of the pipe, and read from both ends (a writer can be a reader)
|
||||
// for many of the principles used here, see http://msdn.microsoft.com/en-us/library/windows/desktop/ee418650(v=vs.85).aspx
|
||||
// Note: using log2 sizes so we do not need to clamp (multi-operation)
|
||||
// T is the contained type
|
||||
// Note this is not true lockless as the use of flags as a form of lock state.
|
||||
template<uint8_t cSizeLog2, typename T> class LockLessMultiReadPipe
|
||||
{
|
||||
public:
|
||||
LockLessMultiReadPipe();
|
||||
~LockLessMultiReadPipe() {}
|
||||
|
||||
// ReaderTryReadBack returns false if we were unable to read
|
||||
// This is thread safe for both multiple readers and the writer
|
||||
bool ReaderTryReadBack( T* pOut );
|
||||
|
||||
// WriterTryReadFront returns false if we were unable to read
|
||||
// This is thread safe for the single writer, but should not be called by readers
|
||||
bool WriterTryReadFront( T* pOut );
|
||||
|
||||
// WriterTryWriteFront returns false if we were unable to write
|
||||
// This is thread safe for the single writer, but should not be called by readers
|
||||
bool WriterTryWriteFront( const T& in );
|
||||
|
||||
// IsPipeEmpty() is a utility function, not intended for general use
|
||||
// Should only be used very prudently.
|
||||
bool IsPipeEmpty() const
|
||||
{
|
||||
return 0 == m_WriteIndex - m_ReadCount;
|
||||
}
|
||||
|
||||
void Clear()
|
||||
{
|
||||
m_WriteIndex = 0;
|
||||
m_ReadIndex = 0;
|
||||
m_ReadCount = 0;
|
||||
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
|
||||
}
|
||||
|
||||
private:
|
||||
const static uint32_t ms_cSize = ( 1 << cSizeLog2 );
|
||||
const static uint32_t ms_cIndexMask = ms_cSize - 1;
|
||||
const static uint32_t FLAG_INVALID = 0xFFFFFFFF; // 32bit for CAS
|
||||
const static uint32_t FLAG_CAN_WRITE = 0x00000000; // 32bit for CAS
|
||||
const static uint32_t FLAG_CAN_READ = 0x11111111; // 32bit for CAS
|
||||
|
||||
T m_Buffer[ ms_cSize ];
|
||||
|
||||
// read and write indexes allow fast access to the pipe, but actual access
|
||||
// controlled by the access flags.
|
||||
volatile uint32_t BASE_ALIGN(4) m_WriteIndex;
|
||||
volatile uint32_t BASE_ALIGN(4) m_ReadCount;
|
||||
volatile uint32_t m_Flags[ ms_cSize ];
|
||||
volatile uint32_t BASE_ALIGN(4) m_ReadIndex;
|
||||
};
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
LockLessMultiReadPipe<cSizeLog2,T>::LockLessMultiReadPipe()
|
||||
: m_WriteIndex(0)
|
||||
, m_ReadIndex(0)
|
||||
, m_ReadCount(0)
|
||||
{
|
||||
assert( cSizeLog2 < 32 );
|
||||
memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
|
||||
}
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::ReaderTryReadBack( T* pOut )
|
||||
{
|
||||
|
||||
uint32_t actualReadIndex;
|
||||
|
||||
uint32_t readCount = m_ReadCount;
|
||||
|
||||
// We get hold of read index for consistency,
|
||||
// and do first pass starting at read count
|
||||
uint32_t readIndexToUse = readCount;
|
||||
|
||||
|
||||
while(true)
|
||||
{
|
||||
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
// power of two sizes ensures we can use a simple calc without modulus
|
||||
uint32_t numInPipe = writeIndex - readCount;
|
||||
if( 0 == numInPipe )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if( readIndexToUse >= writeIndex )
|
||||
{
|
||||
// move back to start
|
||||
readIndexToUse = m_ReadIndex;
|
||||
}
|
||||
|
||||
|
||||
// power of two sizes ensures we can perform AND for a modulus
|
||||
actualReadIndex = readIndexToUse & ms_cIndexMask;
|
||||
|
||||
// Multiple potential readers mean we should check if the data is valid,
|
||||
// using an atomic compare exchange
|
||||
uint32_t previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
|
||||
if( FLAG_CAN_READ == previous )
|
||||
{
|
||||
break;
|
||||
}
|
||||
++readIndexToUse;
|
||||
|
||||
//update known readcount
|
||||
readCount = m_ReadCount;
|
||||
}
|
||||
|
||||
// we update the read index using an atomic add, as we've only read one piece of data.
|
||||
// this ensure consistency of the read index, and the above loop ensures readers
|
||||
// only read from unread data
|
||||
AtomicAdd( (volatile int32_t*)&m_ReadCount, 1 );
|
||||
|
||||
BASE_MEMORYBARRIER_ACQUIRE();
|
||||
// now read data, ensuring we do so after above reads & CAS
|
||||
*pOut = m_Buffer[ actualReadIndex ];
|
||||
|
||||
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryReadFront( T* pOut )
|
||||
{
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
uint32_t frontReadIndex = writeIndex;
|
||||
|
||||
// Multiple potential readers mean we should check if the data is valid,
|
||||
// using an atomic compare exchange - which acts as a form of lock (so not quite lockless really).
|
||||
uint32_t previous = FLAG_INVALID;
|
||||
uint32_t actualReadIndex = 0;
|
||||
while( true )
|
||||
{
|
||||
// power of two sizes ensures we can use a simple calc without modulus
|
||||
uint32_t readCount = m_ReadCount;
|
||||
uint32_t numInPipe = writeIndex - readCount;
|
||||
if( 0 == numInPipe || 0 == frontReadIndex )
|
||||
{
|
||||
// frontReadIndex can get to 0 here if that item was just being read by another thread.
|
||||
m_ReadIndex = readCount;
|
||||
return false;
|
||||
}
|
||||
--frontReadIndex;
|
||||
actualReadIndex = frontReadIndex & ms_cIndexMask;
|
||||
previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
|
||||
if( FLAG_CAN_READ == previous )
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if( m_ReadIndex >= frontReadIndex )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// now read data, ensuring we do so after above reads & CAS
|
||||
*pOut = m_Buffer[ actualReadIndex ];
|
||||
|
||||
m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
|
||||
|
||||
BASE_MEMORYBARRIER_RELEASE();
|
||||
|
||||
// 32-bit aligned stores are atomic, and writer owns the write index
|
||||
// we only move one back as this is as many as we have read, not where we have read from.
|
||||
--m_WriteIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template<uint8_t cSizeLog2, typename T> inline
|
||||
bool LockLessMultiReadPipe<cSizeLog2,T>::WriterTryWriteFront( const T& in )
|
||||
{
|
||||
// The writer 'owns' the write index, and readers can only reduce
|
||||
// the amount of data in the pipe.
|
||||
// We get hold of both values for consistency and to reduce false sharing
|
||||
// impacting more than one access
|
||||
uint32_t writeIndex = m_WriteIndex;
|
||||
|
||||
|
||||
// power of two sizes ensures we can perform AND for a modulus
|
||||
uint32_t actualWriteIndex = writeIndex & ms_cIndexMask;
|
||||
|
||||
// a reader may still be reading this item, as there are multiple readers
|
||||
if( m_Flags[ actualWriteIndex ] != FLAG_CAN_WRITE )
|
||||
{
|
||||
return false; // still being read, so have caught up with tail.
|
||||
}
|
||||
|
||||
|
||||
// as we are the only writer we can update the data without atomics
|
||||
// whilst the write index has not been updated
|
||||
m_Buffer[ actualWriteIndex ] = in;
|
||||
m_Flags[ actualWriteIndex ] = FLAG_CAN_READ;
|
||||
|
||||
// We need to ensure the above writes occur prior to updating the write index,
|
||||
// otherwise another thread might read before it's finished
|
||||
BASE_MEMORYBARRIER_RELEASE();
|
||||
|
||||
// 32-bit aligned stores are atomic, and the writer controls the write index
|
||||
++writeIndex;
|
||||
m_WriteIndex = writeIndex;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,437 +0,0 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "TaskScheduler.h"
|
||||
#include "LockLessMultiReadPipe.h"
|
||||
|
||||
|
||||
|
||||
using namespace enki;
|
||||
|
||||
|
||||
static const uint32_t PIPESIZE_LOG2 = 8;
|
||||
static const uint32_t SPIN_COUNT = 100;
|
||||
static const uint32_t SPIN_BACKOFF_MULTIPLIER = 10;
|
||||
static const uint32_t MAX_NUM_INITIAL_PARTITIONS = 8;
|
||||
|
||||
// each software thread gets it's own copy of gtl_threadNum, so this is safe to use as a static variable
|
||||
static THREAD_LOCAL uint32_t gtl_threadNum = 0;
|
||||
|
||||
namespace enki
|
||||
{
|
||||
struct SubTaskSet
|
||||
{
|
||||
ITaskSet* pTask;
|
||||
TaskSetPartition partition;
|
||||
};
|
||||
|
||||
// we derive class TaskPipe rather than typedef to get forward declaration working easily
|
||||
class TaskPipe : public LockLessMultiReadPipe<PIPESIZE_LOG2,enki::SubTaskSet> {};
|
||||
|
||||
struct ThreadArgs
|
||||
{
|
||||
uint32_t threadNum;
|
||||
TaskScheduler* pTaskScheduler;
|
||||
};
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
SubTaskSet SplitTask( SubTaskSet& subTask_, uint32_t rangeToSplit_ )
|
||||
{
|
||||
SubTaskSet splitTask = subTask_;
|
||||
uint32_t rangeLeft = subTask_.partition.end - subTask_.partition.start;
|
||||
|
||||
if( rangeToSplit_ > rangeLeft )
|
||||
{
|
||||
rangeToSplit_ = rangeLeft;
|
||||
}
|
||||
splitTask.partition.end = subTask_.partition.start + rangeToSplit_;
|
||||
subTask_.partition.start = splitTask.partition.end;
|
||||
return splitTask;
|
||||
}
|
||||
|
||||
#if defined _WIN32
|
||||
#if defined _M_IX86 || defined _M_X64
|
||||
#pragma intrinsic(_mm_pause)
|
||||
inline void Pause() { _mm_pause(); }
|
||||
#endif
|
||||
#elif defined __i386__ || defined __x86_64__
|
||||
inline void Pause() { __asm__ __volatile__("pause;"); }
|
||||
#else
|
||||
inline void Pause() { ;} // may have NOP or yield equiv
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void SafeCallback(ProfilerCallbackFunc func_, uint32_t threadnum_)
|
||||
{
|
||||
if( func_ )
|
||||
{
|
||||
func_(threadnum_);
|
||||
}
|
||||
}
|
||||
|
||||
ProfilerCallbacks* TaskScheduler::GetProfilerCallbacks()
|
||||
{
|
||||
return &m_ProfilerCallbacks;
|
||||
}
|
||||
|
||||
THREADFUNC_DECL TaskScheduler::TaskingThreadFunction( void* pArgs )
|
||||
{
|
||||
ThreadArgs args = *(ThreadArgs*)pArgs;
|
||||
uint32_t threadNum = args.threadNum;
|
||||
TaskScheduler* pTS = args.pTaskScheduler;
|
||||
gtl_threadNum = threadNum;
|
||||
|
||||
SafeCallback( pTS->m_ProfilerCallbacks.threadStart, threadNum );
|
||||
|
||||
uint32_t spinCount = 0;
|
||||
uint32_t hintPipeToCheck_io = threadNum + 1; // does not need to be clamped.
|
||||
while( pTS->m_bRunning )
|
||||
{
|
||||
if(!pTS->TryRunTask( threadNum, hintPipeToCheck_io ) )
|
||||
{
|
||||
// no tasks, will spin then wait
|
||||
++spinCount;
|
||||
if( spinCount > SPIN_COUNT )
|
||||
{
|
||||
pTS->WaitForTasks( threadNum );
|
||||
spinCount = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t spinBackoffCount = spinCount * SPIN_BACKOFF_MULTIPLIER;
|
||||
while( spinBackoffCount )
|
||||
{
|
||||
Pause();
|
||||
--spinBackoffCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
spinCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
AtomicAdd( &pTS->m_NumThreadsRunning, -1 );
|
||||
SafeCallback( pTS->m_ProfilerCallbacks.threadStop, threadNum );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void TaskScheduler::StartThreads()
|
||||
{
|
||||
if( m_bHaveThreads )
|
||||
{
|
||||
return;
|
||||
}
|
||||
m_bRunning = true;
|
||||
|
||||
SemaphoreCreate( m_NewTaskSemaphore );
|
||||
|
||||
// we create one less thread than m_NumThreads as the main thread counts as one
|
||||
m_pThreadNumStore = new ThreadArgs[m_NumThreads];
|
||||
m_pThreadIDs = new threadid_t[m_NumThreads];
|
||||
m_pThreadNumStore[0].threadNum = 0;
|
||||
m_pThreadNumStore[0].pTaskScheduler = this;
|
||||
m_pThreadIDs[0] = 0;
|
||||
m_NumThreadsWaiting = 0;
|
||||
m_NumThreadsRunning = 1;// acount for main thread
|
||||
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
m_pThreadNumStore[thread].threadNum = thread;
|
||||
m_pThreadNumStore[thread].pTaskScheduler = this;
|
||||
ThreadCreate( &m_pThreadIDs[thread], TaskingThreadFunction, &m_pThreadNumStore[thread] );
|
||||
++m_NumThreadsRunning;
|
||||
}
|
||||
|
||||
// ensure we have sufficient tasks to equally fill either all threads including main
|
||||
// or just the threads we've launched, this is outside the firstinit as we want to be able
|
||||
// to runtime change it
|
||||
if( 1 == m_NumThreads )
|
||||
{
|
||||
m_NumPartitions = 1;
|
||||
m_NumInitialPartitions = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_NumPartitions = m_NumThreads * (m_NumThreads - 1);
|
||||
m_NumInitialPartitions = m_NumThreads - 1;
|
||||
if( m_NumInitialPartitions > MAX_NUM_INITIAL_PARTITIONS )
|
||||
{
|
||||
m_NumInitialPartitions = MAX_NUM_INITIAL_PARTITIONS;
|
||||
}
|
||||
}
|
||||
|
||||
m_bHaveThreads = true;
|
||||
}
|
||||
|
||||
void TaskScheduler::StopThreads( bool bWait_ )
|
||||
{
|
||||
if( m_bHaveThreads )
|
||||
{
|
||||
// wait for them threads quit before deleting data
|
||||
m_bRunning = false;
|
||||
while( bWait_ && m_NumThreadsRunning > 1 )
|
||||
{
|
||||
// keep firing event to ensure all threads pick up state of m_bRunning
|
||||
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsRunning );
|
||||
}
|
||||
|
||||
for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
ThreadTerminate( m_pThreadIDs[thread] );
|
||||
}
|
||||
|
||||
m_NumThreads = 0;
|
||||
delete[] m_pThreadNumStore;
|
||||
delete[] m_pThreadIDs;
|
||||
m_pThreadNumStore = 0;
|
||||
m_pThreadIDs = 0;
|
||||
SemaphoreClose( m_NewTaskSemaphore );
|
||||
|
||||
m_bHaveThreads = false;
|
||||
m_NumThreadsWaiting = 0;
|
||||
m_NumThreadsRunning = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool TaskScheduler::TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ )
|
||||
{
|
||||
// check for tasks
|
||||
SubTaskSet subTask;
|
||||
bool bHaveTask = m_pPipesPerThread[ threadNum ].WriterTryReadFront( &subTask );
|
||||
|
||||
uint32_t threadToCheck = hintPipeToCheck_io_;
|
||||
uint32_t checkCount = 0;
|
||||
while( !bHaveTask && checkCount < m_NumThreads )
|
||||
{
|
||||
threadToCheck = ( hintPipeToCheck_io_ + checkCount ) % m_NumThreads;
|
||||
if( threadToCheck != threadNum )
|
||||
{
|
||||
bHaveTask = m_pPipesPerThread[ threadToCheck ].ReaderTryReadBack( &subTask );
|
||||
}
|
||||
++checkCount;
|
||||
}
|
||||
|
||||
if( bHaveTask )
|
||||
{
|
||||
// update hint, will preserve value unless actually got task from another thread.
|
||||
hintPipeToCheck_io_ = threadToCheck;
|
||||
|
||||
uint32_t partitionSize = subTask.partition.end - subTask.partition.start;
|
||||
if( subTask.pTask->m_RangeToRun < partitionSize )
|
||||
{
|
||||
SubTaskSet taskToRun = SplitTask( subTask, subTask.pTask->m_RangeToRun );
|
||||
SplitAndAddTask( gtl_threadNum, subTask, subTask.pTask->m_RangeToRun, 0 );
|
||||
taskToRun.pTask->ExecuteRange( taskToRun.partition, threadNum );
|
||||
AtomicAdd( &taskToRun.pTask->m_RunningCount, -1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// the task has already been divided up by AddTaskSetToPipe, so just run it
|
||||
subTask.pTask->ExecuteRange( subTask.partition, threadNum );
|
||||
AtomicAdd( &subTask.pTask->m_RunningCount, -1 );
|
||||
}
|
||||
}
|
||||
|
||||
return bHaveTask;
|
||||
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitForTasks( uint32_t threadNum )
|
||||
{
|
||||
// We incrememt the number of threads waiting here in order
|
||||
// to ensure that the check for tasks occurs after the increment
|
||||
// to prevent a task being added after a check, then the thread waiting.
|
||||
// This will occasionally result in threads being mistakenly awoken,
|
||||
// but they will then go back to sleep.
|
||||
AtomicAdd( &m_NumThreadsWaiting, 1 );
|
||||
|
||||
bool bHaveTasks = false;
|
||||
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
|
||||
{
|
||||
bHaveTasks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !bHaveTasks )
|
||||
{
|
||||
SafeCallback( m_ProfilerCallbacks.waitStart, threadNum );
|
||||
SemaphoreWait( m_NewTaskSemaphore );
|
||||
SafeCallback( m_ProfilerCallbacks.waitStop, threadNum );
|
||||
}
|
||||
|
||||
int32_t prev = AtomicAdd( &m_NumThreadsWaiting, -1 );
|
||||
assert( prev != 0 );
|
||||
}
|
||||
|
||||
void TaskScheduler::WakeThreads()
|
||||
{
|
||||
SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsWaiting );
|
||||
}
|
||||
|
||||
void TaskScheduler::SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
|
||||
uint32_t rangeToSplit_, int32_t runningCountOffset_ )
|
||||
{
|
||||
int32_t numAdded = 0;
|
||||
while( subTask_.partition.start != subTask_.partition.end )
|
||||
{
|
||||
SubTaskSet taskToAdd = SplitTask( subTask_, rangeToSplit_ );
|
||||
|
||||
// add the partition to the pipe
|
||||
++numAdded;
|
||||
if( !m_pPipesPerThread[ gtl_threadNum ].WriterTryWriteFront( taskToAdd ) )
|
||||
{
|
||||
if( numAdded > 1 )
|
||||
{
|
||||
WakeThreads();
|
||||
}
|
||||
// alter range to run the appropriate fraction
|
||||
if( taskToAdd.pTask->m_RangeToRun < rangeToSplit_ )
|
||||
{
|
||||
taskToAdd.partition.end = taskToAdd.partition.start + taskToAdd.pTask->m_RangeToRun;
|
||||
subTask_.partition.start = taskToAdd.partition.end;
|
||||
}
|
||||
taskToAdd.pTask->ExecuteRange( taskToAdd.partition, threadNum_ );
|
||||
--numAdded;
|
||||
}
|
||||
}
|
||||
|
||||
// increment running count by number added
|
||||
AtomicAdd( &subTask_.pTask->m_RunningCount, numAdded + runningCountOffset_ );
|
||||
|
||||
WakeThreads();
|
||||
}
|
||||
|
||||
void TaskScheduler::AddTaskSetToPipe( ITaskSet* pTaskSet )
|
||||
{
|
||||
// set running count to -1 to guarantee it won't be found complete until all subtasks added
|
||||
pTaskSet->m_RunningCount = -1;
|
||||
|
||||
// divide task up and add to pipe
|
||||
pTaskSet->m_RangeToRun = pTaskSet->m_SetSize / m_NumPartitions;
|
||||
if( pTaskSet->m_RangeToRun < pTaskSet->m_MinRange ) { pTaskSet->m_RangeToRun = pTaskSet->m_MinRange; }
|
||||
|
||||
uint32_t rangeToSplit = pTaskSet->m_SetSize / m_NumInitialPartitions;
|
||||
if( rangeToSplit < pTaskSet->m_MinRange ) { rangeToSplit = pTaskSet->m_MinRange; }
|
||||
|
||||
SubTaskSet subTask;
|
||||
subTask.pTask = pTaskSet;
|
||||
subTask.partition.start = 0;
|
||||
subTask.partition.end = pTaskSet->m_SetSize;
|
||||
SplitAndAddTask( gtl_threadNum, subTask, rangeToSplit, 1 );
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforTaskSet( const ITaskSet* pTaskSet )
|
||||
{
|
||||
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
|
||||
if( pTaskSet )
|
||||
{
|
||||
while( pTaskSet->m_RunningCount )
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
// should add a spin then wait for task completion event.
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
}
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforAll()
|
||||
{
|
||||
bool bHaveTasks = true;
|
||||
uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
|
||||
int32_t threadsRunning = m_NumThreadsRunning - 1;
|
||||
while( bHaveTasks || m_NumThreadsWaiting < threadsRunning )
|
||||
{
|
||||
TryRunTask( gtl_threadNum, hintPipeToCheck_io );
|
||||
bHaveTasks = false;
|
||||
for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
|
||||
{
|
||||
if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
|
||||
{
|
||||
bHaveTasks = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TaskScheduler::WaitforAllAndShutdown()
|
||||
{
|
||||
WaitforAll();
|
||||
StopThreads(true);
|
||||
delete[] m_pPipesPerThread;
|
||||
m_pPipesPerThread = 0;
|
||||
}
|
||||
|
||||
uint32_t TaskScheduler::GetNumTaskThreads() const
|
||||
{
|
||||
return m_NumThreads;
|
||||
}
|
||||
|
||||
TaskScheduler::TaskScheduler()
|
||||
: m_pPipesPerThread(NULL)
|
||||
, m_NumThreads(0)
|
||||
, m_pThreadNumStore(NULL)
|
||||
, m_pThreadIDs(NULL)
|
||||
, m_bRunning(false)
|
||||
, m_NumThreadsRunning(0)
|
||||
, m_NumThreadsWaiting(0)
|
||||
, m_NumPartitions(0)
|
||||
, m_bHaveThreads(false)
|
||||
{
|
||||
memset(&m_ProfilerCallbacks, 0, sizeof(m_ProfilerCallbacks));
|
||||
}
|
||||
|
||||
TaskScheduler::~TaskScheduler()
|
||||
{
|
||||
StopThreads( true ); // Stops threads, waiting for them.
|
||||
|
||||
delete[] m_pPipesPerThread;
|
||||
m_pPipesPerThread = 0;
|
||||
}
|
||||
|
||||
void TaskScheduler::Initialize( uint32_t numThreads_ )
|
||||
{
|
||||
assert( numThreads_ );
|
||||
StopThreads( true ); // Stops threads, waiting for them.
|
||||
delete[] m_pPipesPerThread;
|
||||
|
||||
m_NumThreads = numThreads_;
|
||||
|
||||
m_pPipesPerThread = new TaskPipe[ m_NumThreads ];
|
||||
|
||||
StartThreads();
|
||||
}
|
||||
|
||||
void TaskScheduler::Initialize()
|
||||
{
|
||||
Initialize( GetNumHardwareThreads() );
|
||||
}
|
||||
@@ -1,177 +0,0 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "Threads.h"
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct TaskSetPartition
|
||||
{
|
||||
uint32_t start;
|
||||
uint32_t end;
|
||||
};
|
||||
|
||||
class TaskScheduler;
|
||||
class TaskPipe;
|
||||
struct ThreadArgs;
|
||||
struct SubTaskSet;
|
||||
|
||||
// Subclass ITaskSet to create tasks.
|
||||
// TaskSets can be re-used, but check
|
||||
class ITaskSet
|
||||
{
|
||||
public:
|
||||
ITaskSet()
|
||||
: m_SetSize(1)
|
||||
, m_MinRange(1)
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(1)
|
||||
{}
|
||||
|
||||
ITaskSet( uint32_t setSize_ )
|
||||
: m_SetSize( setSize_ )
|
||||
, m_MinRange(1)
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(1)
|
||||
{}
|
||||
|
||||
ITaskSet( uint32_t setSize_, uint32_t minRange_ )
|
||||
: m_SetSize( setSize_ )
|
||||
, m_MinRange( minRange_ )
|
||||
, m_RunningCount(0)
|
||||
, m_RangeToRun(minRange_)
|
||||
{}
|
||||
|
||||
// Execute range should be overloaded to process tasks. It will be called with a
|
||||
// range_ where range.start >= 0; range.start < range.end; and range.end < m_SetSize;
|
||||
// The range values should be mapped so that linearly processing them in order is cache friendly
|
||||
// i.e. neighbouring values should be close together.
|
||||
// threadnum should not be used for changing processing of data, it's intended purpose
|
||||
// is to allow per-thread data buckets for output.
|
||||
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum ) = 0;
|
||||
|
||||
// Size of set - usually the number of data items to be processed, see ExecuteRange. Defaults to 1
|
||||
uint32_t m_SetSize;
|
||||
|
||||
// Minimum size of of TaskSetPartition range when splitting a task set into partitions.
|
||||
// This should be set to a value which results in computation effort of at least 10k
|
||||
// clock cycles to minimize tast scheduler overhead.
|
||||
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
|
||||
// of m_MinRange.
|
||||
// Also known as grain size in literature.
|
||||
uint32_t m_MinRange;
|
||||
|
||||
bool GetIsComplete()
|
||||
{
|
||||
return 0 == m_RunningCount;
|
||||
}
|
||||
private:
|
||||
friend class TaskScheduler;
|
||||
volatile int32_t m_RunningCount;
|
||||
uint32_t m_RangeToRun;
|
||||
};
|
||||
|
||||
// TaskScheduler implements several callbacks intended for profilers
|
||||
typedef void (*ProfilerCallbackFunc)( uint32_t threadnum_ );
|
||||
struct ProfilerCallbacks
|
||||
{
|
||||
ProfilerCallbackFunc threadStart;
|
||||
ProfilerCallbackFunc threadStop;
|
||||
ProfilerCallbackFunc waitStart;
|
||||
ProfilerCallbackFunc waitStop;
|
||||
};
|
||||
|
||||
class TaskScheduler
|
||||
{
|
||||
public:
|
||||
TaskScheduler();
|
||||
~TaskScheduler();
|
||||
|
||||
// Call either Initialize() or Initialize( numThreads_ ) before adding tasks.
|
||||
|
||||
// Initialize() will create GetNumHardwareThreads()-1 threads, which is
|
||||
// sufficient to fill the system when including the main thread.
|
||||
// Initialize can be called multiple times - it will wait for completion
|
||||
// before re-initializing.
|
||||
void Initialize();
|
||||
|
||||
// Initialize( numThreads_ ) - numThreads_ (must be > 0)
|
||||
// will create numThreads_-1 threads, as thread 0 is
|
||||
// the thread on which the initialize was called.
|
||||
void Initialize( uint32_t numThreads_ );
|
||||
|
||||
|
||||
// Adds the TaskSet to pipe and returns if the pipe is not full.
|
||||
// If the pipe is full, pTaskSet is run.
|
||||
// should only be called from main thread, or within a task
|
||||
void AddTaskSetToPipe( ITaskSet* pTaskSet );
|
||||
|
||||
// Runs the TaskSets in pipe until true == pTaskSet->GetIsComplete();
|
||||
// should only be called from thread which created the taskscheduler , or within a task
|
||||
// if called with 0 it will try to run tasks, and return if none available.
|
||||
void WaitforTaskSet( const ITaskSet* pTaskSet );
|
||||
|
||||
// Waits for all task sets to complete - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void WaitforAll();
|
||||
|
||||
// Waits for all task sets to complete and shutdown threads - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void WaitforAllAndShutdown();
|
||||
|
||||
// Returns the number of threads created for running tasks + 1
|
||||
// to account for the main thread.
|
||||
uint32_t GetNumTaskThreads() const;
|
||||
|
||||
// Returns the ProfilerCallbacks structure so that it can be modified to
|
||||
// set the callbacks.
|
||||
ProfilerCallbacks* GetProfilerCallbacks();
|
||||
|
||||
private:
|
||||
static THREADFUNC_DECL TaskingThreadFunction( void* pArgs );
|
||||
void WaitForTasks( uint32_t threadNum );
|
||||
bool TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ );
|
||||
void StartThreads();
|
||||
void StopThreads( bool bWait_ );
|
||||
void SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
|
||||
uint32_t rangeToSplit_, int32_t runningCountOffset_ );
|
||||
void WakeThreads();
|
||||
|
||||
TaskPipe* m_pPipesPerThread;
|
||||
|
||||
uint32_t m_NumThreads;
|
||||
ThreadArgs* m_pThreadNumStore;
|
||||
threadid_t* m_pThreadIDs;
|
||||
volatile bool m_bRunning;
|
||||
volatile int32_t m_NumThreadsRunning;
|
||||
volatile int32_t m_NumThreadsWaiting;
|
||||
uint32_t m_NumPartitions;
|
||||
uint32_t m_NumInitialPartitions;
|
||||
semaphoreid_t m_NewTaskSemaphore;
|
||||
bool m_bHaveThreads;
|
||||
ProfilerCallbacks m_ProfilerCallbacks;
|
||||
|
||||
TaskScheduler( const TaskScheduler& nocopy );
|
||||
TaskScheduler& operator=( const TaskScheduler& nocopy );
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,122 +0,0 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#include "TaskScheduler_c.h"
|
||||
#include "TaskScheduler.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
using namespace enki;
|
||||
|
||||
struct enkiTaskScheduler : TaskScheduler
|
||||
{
|
||||
};
|
||||
|
||||
struct enkiTaskSet : ITaskSet
|
||||
{
|
||||
enkiTaskSet( enkiTaskExecuteRange taskFun_ ) : taskFun(taskFun_), pArgs(NULL) {}
|
||||
|
||||
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum )
|
||||
{
|
||||
taskFun( range.start, range.end, threadnum, pArgs );
|
||||
}
|
||||
|
||||
enkiTaskExecuteRange taskFun;
|
||||
void* pArgs;
|
||||
};
|
||||
|
||||
enkiTaskScheduler* enkiNewTaskScheduler()
|
||||
{
|
||||
enkiTaskScheduler* pETS = new enkiTaskScheduler();
|
||||
return pETS;
|
||||
}
|
||||
|
||||
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
pETS_->Initialize();
|
||||
}
|
||||
|
||||
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ )
|
||||
{
|
||||
pETS_->Initialize( numThreads_ );
|
||||
}
|
||||
|
||||
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
delete pETS_;
|
||||
}
|
||||
|
||||
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ )
|
||||
{
|
||||
return new enkiTaskSet( taskFunc_ );
|
||||
}
|
||||
|
||||
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
delete pTaskSet_;
|
||||
}
|
||||
|
||||
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_, void* pArgs_, uint32_t setSize_ )
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
assert( pTaskSet_->taskFun );
|
||||
|
||||
pTaskSet_->m_SetSize = setSize_;
|
||||
pTaskSet_->pArgs = pArgs_;
|
||||
pETS_->AddTaskSetToPipe( pTaskSet_ );
|
||||
}
|
||||
|
||||
void enkiAddTaskSetToPipeMinRange(enkiTaskScheduler * pETS_, enkiTaskSet * pTaskSet_, void * pArgs_, uint32_t setSize_, uint32_t minRange_)
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
assert( pTaskSet_->taskFun );
|
||||
|
||||
pTaskSet_->m_SetSize = setSize_;
|
||||
pTaskSet_->m_MinRange = minRange_;
|
||||
pTaskSet_->pArgs = pArgs_;
|
||||
pETS_->AddTaskSetToPipe( pTaskSet_ );
|
||||
}
|
||||
|
||||
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
assert( pTaskSet_ );
|
||||
return ( pTaskSet_->GetIsComplete() ) ? 1 : 0;
|
||||
}
|
||||
|
||||
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
|
||||
{
|
||||
pETS_->WaitforTaskSet( pTaskSet_ );
|
||||
}
|
||||
|
||||
void enkiWaitForAll( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
pETS_->WaitforAll();
|
||||
}
|
||||
|
||||
|
||||
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
return pETS_->GetNumTaskThreads();
|
||||
}
|
||||
|
||||
enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ )
|
||||
{
|
||||
assert( sizeof(enkiProfilerCallbacks) == sizeof(enki::ProfilerCallbacks) );
|
||||
return (enkiProfilerCallbacks*)pETS_->GetProfilerCallbacks();
|
||||
}
|
||||
|
||||
@@ -1,104 +0,0 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct enkiTaskScheduler enkiTaskScheduler;
|
||||
typedef struct enkiTaskSet enkiTaskSet;
|
||||
|
||||
typedef void (* enkiTaskExecuteRange)( uint32_t start_, uint32_t end, uint32_t threadnum_, void* pArgs_ );
|
||||
|
||||
|
||||
// Create a new task scheduler
|
||||
enkiTaskScheduler* enkiNewTaskScheduler();
|
||||
|
||||
// Initialize task scheduler - will create GetNumHardwareThreads()-1 threads, which is
|
||||
// sufficient to fill the system when including the main thread.
|
||||
// Initialize can be called multiple times - it will wait for completion
|
||||
// before re-initializing.
|
||||
void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// Initialize a task scheduler with numThreads_ (must be > 0)
|
||||
// will create numThreads_-1 threads, as thread 0 is
|
||||
// the thread on which the initialize was called.
|
||||
void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ );
|
||||
|
||||
|
||||
// Delete a task scheduler
|
||||
void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// Create a task set.
|
||||
enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ );
|
||||
|
||||
// Delete a task set.
|
||||
void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ );
|
||||
|
||||
// Schedule the task
|
||||
void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
|
||||
void* pArgs_, uint32_t setSize_ );
|
||||
|
||||
// Schedule the task with a minimum range.
|
||||
// This should be set to a value which results in computation effort of at least 10k
|
||||
// clock cycles to minimize tast scheduler overhead.
|
||||
// NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
|
||||
// of m_MinRange.
|
||||
// Also known as grain size in literature.
|
||||
void enkiAddTaskSetToPipeMinRange( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
|
||||
void* pArgs_, uint32_t setSize_, uint32_t minRange_ );
|
||||
|
||||
|
||||
// Check if TaskSet is complete. Doesn't wait. Returns 1 if complete, 0 if not.
|
||||
int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
|
||||
|
||||
|
||||
// Wait for a given task.
|
||||
// should only be called from thread which created the taskscheduler , or within a task
|
||||
// if called with 0 it will try to run tasks, and return if none available.
|
||||
void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
|
||||
|
||||
|
||||
// Waits for all task sets to complete - not guaranteed to work unless we know we
|
||||
// are in a situation where tasks aren't being continuosly added.
|
||||
void enkiWaitForAll( enkiTaskScheduler* pETS_ );
|
||||
|
||||
|
||||
// get number of threads
|
||||
uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ );
|
||||
|
||||
// TaskScheduler implements several callbacks intended for profilers
|
||||
typedef void (*enkiProfilerCallbackFunc)( uint32_t threadnum_ );
|
||||
struct enkiProfilerCallbacks
|
||||
{
|
||||
enkiProfilerCallbackFunc threadStart;
|
||||
enkiProfilerCallbackFunc threadStop;
|
||||
enkiProfilerCallbackFunc waitStart;
|
||||
enkiProfilerCallbackFunc waitStop;
|
||||
};
|
||||
|
||||
// Get the callback structure so it can be set
|
||||
struct enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -1,210 +0,0 @@
|
||||
// Copyright (c) 2013 Doug Binks
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied
|
||||
// warranty. In no event will the authors be held liable for any damages
|
||||
// arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it
|
||||
// freely, subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented; you must not
|
||||
// claim that you wrote the original software. If you use this software
|
||||
// in a product, an acknowledgement in the product documentation would be
|
||||
// appreciated but is not required.
|
||||
// 2. Altered source versions must be plainly marked as such, and must not be
|
||||
// misrepresented as being the original software.
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include "Atomics.h"
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <Windows.h>
|
||||
|
||||
#define THREADFUNC_DECL DWORD WINAPI
|
||||
#define THREAD_LOCAL __declspec( thread )
|
||||
|
||||
namespace enki
|
||||
{
|
||||
typedef HANDLE threadid_t;
|
||||
|
||||
// declare the thread start function as:
|
||||
// THREADFUNC_DECL MyThreadStart( void* pArg );
|
||||
inline bool ThreadCreate( threadid_t* returnid, DWORD ( WINAPI *StartFunc) (void* ), void* pArg )
|
||||
{
|
||||
// posix equiv pthread_create
|
||||
DWORD threadid;
|
||||
*returnid = CreateThread( 0, 0, StartFunc, pArg, 0, &threadid );
|
||||
return *returnid != NULL;
|
||||
}
|
||||
|
||||
inline bool ThreadTerminate( threadid_t threadid )
|
||||
{
|
||||
// posix equiv pthread_cancel
|
||||
return CloseHandle( threadid ) == 0;
|
||||
}
|
||||
|
||||
inline uint32_t GetNumHardwareThreads()
|
||||
{
|
||||
SYSTEM_INFO sysInfo;
|
||||
GetSystemInfo(&sysInfo);
|
||||
return sysInfo.dwNumberOfProcessors;
|
||||
}
|
||||
}
|
||||
|
||||
#else // posix
|
||||
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#define THREADFUNC_DECL void*
|
||||
#define THREAD_LOCAL __thread
|
||||
|
||||
namespace enki
|
||||
{
|
||||
typedef pthread_t threadid_t;
|
||||
|
||||
// declare the thread start function as:
|
||||
// THREADFUNC_DECL MyThreadStart( void* pArg );
|
||||
inline bool ThreadCreate( threadid_t* returnid, void* ( *StartFunc) (void* ), void* pArg )
|
||||
{
|
||||
// posix equiv pthread_create
|
||||
int32_t retval = pthread_create( returnid, NULL, StartFunc, pArg );
|
||||
|
||||
return retval == 0;
|
||||
}
|
||||
|
||||
inline bool ThreadTerminate( threadid_t threadid )
|
||||
{
|
||||
// posix equiv pthread_cancel
|
||||
return pthread_cancel( threadid ) == 0;
|
||||
}
|
||||
|
||||
inline uint32_t GetNumHardwareThreads()
|
||||
{
|
||||
return (uint32_t)sysconf( _SC_NPROCESSORS_ONLN );
|
||||
}
|
||||
}
|
||||
|
||||
#endif // posix
|
||||
|
||||
|
||||
// Semaphore implementation
|
||||
#ifdef _WIN32
|
||||
|
||||
namespace enki
|
||||
{
|
||||
struct semaphoreid_t
|
||||
{
|
||||
HANDLE sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphoreid.sem = CreateSemaphore(NULL, 0, MAXLONG, NULL );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
CloseHandle( semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
DWORD retval = WaitForSingleObject( semaphoreid.sem, INFINITE );
|
||||
|
||||
assert( retval != WAIT_FAILED );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
if( countWaiting )
|
||||
{
|
||||
ReleaseSemaphore( semaphoreid.sem, countWaiting, NULL );
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(__MACH__)
|
||||
|
||||
// OS X does not have POSIX semaphores
|
||||
// see https://developer.apple.com/library/content/documentation/Darwin/Conceptual/KernelProgramming/synchronization/synchronization.html
|
||||
#include <mach/mach.h>
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct semaphoreid_t
|
||||
{
|
||||
semaphore_t sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_create( mach_task_self(), &semaphoreid.sem, SYNC_POLICY_FIFO, 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_destroy( mach_task_self(), semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
semaphore_wait( semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
while( countWaiting-- > 0 )
|
||||
{
|
||||
semaphore_signal( semaphoreid.sem );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else // POSIX
|
||||
|
||||
#include <semaphore.h>
|
||||
|
||||
namespace enki
|
||||
{
|
||||
|
||||
struct semaphoreid_t
|
||||
{
|
||||
sem_t sem;
|
||||
};
|
||||
|
||||
inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
int err = sem_init( &semaphoreid.sem, 0, 0 );
|
||||
assert( err == 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreClose( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
sem_destroy( &semaphoreid.sem );
|
||||
}
|
||||
|
||||
inline void SemaphoreWait( semaphoreid_t& semaphoreid )
|
||||
{
|
||||
int err = sem_wait( &semaphoreid.sem );
|
||||
assert( err == 0 );
|
||||
}
|
||||
|
||||
inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
|
||||
{
|
||||
while( countWaiting-- > 0 )
|
||||
{
|
||||
sem_post( &semaphoreid.sem );
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,395 +0,0 @@
|
||||
#include "../Source/Config.h"
|
||||
|
||||
inline uint RNG(inout uint state)
|
||||
{
|
||||
uint x = state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 15;
|
||||
state = x;
|
||||
return x;
|
||||
}
|
||||
|
||||
float RandomFloat01(inout uint state)
|
||||
{
|
||||
return (RNG(state) & 0xFFFFFF) / 16777216.0f;
|
||||
}
|
||||
|
||||
float3 RandomInUnitDisk(inout uint state)
|
||||
{
|
||||
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float2 xy = float2(cos(a), sin(a));
|
||||
xy *= sqrt(RandomFloat01(state));
|
||||
return float3(xy, 0);
|
||||
}
|
||||
float3 RandomInUnitSphere(inout uint state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float t = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float r = sqrt(max(0.0, 1.0f - z * z));
|
||||
float x = r * cos(t);
|
||||
float y = r * sin(t);
|
||||
float3 res = float3(x, y, z);
|
||||
res *= pow(RandomFloat01(state), 1.0 / 3.0);
|
||||
return res;
|
||||
}
|
||||
float3 RandomUnitVector(inout uint state)
|
||||
{
|
||||
float z = RandomFloat01(state) * 2.0f - 1.0f;
|
||||
float a = RandomFloat01(state) * 2.0f * 3.1415926f;
|
||||
float r = sqrt(1.0f - z * z);
|
||||
float x = r * cos(a);
|
||||
float y = r * sin(a);
|
||||
return float3(x, y, z);
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct Ray
|
||||
{
|
||||
float3 orig;
|
||||
float3 dir;
|
||||
};
|
||||
Ray MakeRay(float3 orig_, float3 dir_) { Ray r; r.orig = orig_; r.dir = dir_; return r; }
|
||||
float3 RayPointAt(Ray r, float t) { return r.orig + r.dir * t; }
|
||||
|
||||
|
||||
inline bool refract(float3 v, float3 n, float nint, out float3 outRefracted)
|
||||
{
|
||||
float dt = dot(v, n);
|
||||
float discr = 1.0f - nint * nint*(1 - dt * dt);
|
||||
if (discr > 0)
|
||||
{
|
||||
outRefracted = nint * (v - n * dt) - n * sqrt(discr);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
inline float schlick(float cosine, float ri)
|
||||
{
|
||||
float r0 = (1 - ri) / (1 + ri);
|
||||
r0 = r0 * r0;
|
||||
// note: saturate to guard against possible tiny negative numbers
|
||||
return r0 + (1 - r0)*pow(saturate(1 - cosine), 5);
|
||||
}
|
||||
|
||||
struct Hit
|
||||
{
|
||||
float3 pos;
|
||||
float3 normal;
|
||||
float t;
|
||||
};
|
||||
|
||||
struct Sphere
|
||||
{
|
||||
float3 center;
|
||||
float radius;
|
||||
float invRadius;
|
||||
};
|
||||
|
||||
#define MatLambert 0
|
||||
#define MatMetal 1
|
||||
#define MatDielectric 2
|
||||
|
||||
struct Material
|
||||
{
|
||||
int type;
|
||||
float3 albedo;
|
||||
float3 emissive;
|
||||
float roughness;
|
||||
float ri;
|
||||
};
|
||||
|
||||
groupshared Sphere s_GroupSpheres[kCSMaxObjects];
|
||||
groupshared Material s_GroupMaterials[kCSMaxObjects];
|
||||
groupshared int s_GroupEmissives[kCSMaxObjects];
|
||||
|
||||
|
||||
struct Camera
|
||||
{
|
||||
float3 origin;
|
||||
float3 lowerLeftCorner;
|
||||
float3 horizontal;
|
||||
float3 vertical;
|
||||
float3 u, v, w;
|
||||
float lensRadius;
|
||||
};
|
||||
|
||||
Ray CameraGetRay(Camera cam, float s, float t, inout uint state)
|
||||
{
|
||||
float3 rd = cam.lensRadius * RandomInUnitDisk(state);
|
||||
float3 offset = cam.u * rd.x + cam.v * rd.y;
|
||||
return MakeRay(cam.origin + offset, normalize(cam.lowerLeftCorner + s * cam.horizontal + t * cam.vertical - cam.origin - offset));
|
||||
}
|
||||
|
||||
|
||||
int HitSpheres(Ray r, int sphereCount, float tMin, float tMax, inout Hit outHit)
|
||||
{
|
||||
float hitT = tMax;
|
||||
int id = -1;
|
||||
for (int i = 0; i < sphereCount; ++i)
|
||||
{
|
||||
Sphere s = s_GroupSpheres[i];
|
||||
float3 co = s.center - r.orig;
|
||||
float nb = dot(co, r.dir);
|
||||
float c = dot(co, co) - s.radius*s.radius;
|
||||
float discr = nb * nb - c;
|
||||
if (discr > 0)
|
||||
{
|
||||
float discrSq = sqrt(discr);
|
||||
|
||||
// Try earlier t
|
||||
float t = nb - discrSq;
|
||||
if (t <= tMin) // before min, try later t!
|
||||
t = nb + discrSq;
|
||||
|
||||
if (t > tMin && t < hitT)
|
||||
{
|
||||
id = i;
|
||||
hitT = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (id != -1)
|
||||
{
|
||||
outHit.pos = RayPointAt(r, hitT);
|
||||
outHit.normal = (outHit.pos - s_GroupSpheres[id].center) * s_GroupSpheres[id].invRadius;
|
||||
outHit.t = hitT;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
struct Params
|
||||
{
|
||||
Camera cam;
|
||||
int sphereCount;
|
||||
int screenWidth;
|
||||
int screenHeight;
|
||||
int frames;
|
||||
float invWidth;
|
||||
float invHeight;
|
||||
float lerpFac;
|
||||
int emissiveCount;
|
||||
};
|
||||
|
||||
|
||||
#define kMinT 0.001f
|
||||
#define kMaxT 1.0e7f
|
||||
#define kMaxDepth 10
|
||||
|
||||
|
||||
static int HitWorld(int sphereCount, Ray r, float tMin, float tMax, inout Hit outHit)
|
||||
{
|
||||
return HitSpheres(r, sphereCount, tMin, tMax, outHit);
|
||||
}
|
||||
|
||||
|
||||
static bool Scatter(int sphereCount, int emissiveCount, int matID, Ray r_in, Hit rec, out float3 attenuation, out Ray scattered, out float3 outLightE, inout int inoutRayCount, inout uint state)
|
||||
{
|
||||
outLightE = float3(0, 0, 0);
|
||||
Material mat = s_GroupMaterials[matID];
|
||||
if (mat.type == MatLambert)
|
||||
{
|
||||
// random point on unit sphere that is tangent to the hit point
|
||||
float3 target = rec.pos + rec.normal + RandomUnitVector(state);
|
||||
scattered = MakeRay(rec.pos, normalize(target - rec.pos));
|
||||
attenuation = mat.albedo;
|
||||
|
||||
// sample lights
|
||||
#if DO_LIGHT_SAMPLING
|
||||
for (int j = 0; j < emissiveCount; ++j)
|
||||
{
|
||||
int i = s_GroupEmissives[j];
|
||||
if (matID == i)
|
||||
continue; // skip self
|
||||
Material smat = s_GroupMaterials[i];
|
||||
Sphere s = s_GroupSpheres[i];
|
||||
|
||||
// create a random direction towards sphere
|
||||
// coord system for sampling: sw, su, sv
|
||||
float3 sw = normalize(s.center - rec.pos);
|
||||
float3 su = normalize(cross(abs(sw.x)>0.01f ? float3(0, 1, 0) : float3(1, 0, 0), sw));
|
||||
float3 sv = cross(sw, su);
|
||||
// sample sphere by solid angle
|
||||
float cosAMax = sqrt(1.0f - s.radius*s.radius / dot(rec.pos - s.center, rec.pos - s.center));
|
||||
float eps1 = RandomFloat01(state), eps2 = RandomFloat01(state);
|
||||
float cosA = 1.0f - eps1 + eps1 * cosAMax;
|
||||
float sinA = sqrt(1.0f - cosA * cosA);
|
||||
float phi = 2 * 3.1415926 * eps2;
|
||||
float3 l = su * cos(phi) * sinA + sv * sin(phi) * sinA + sw * cosA;
|
||||
|
||||
// shoot shadow ray
|
||||
Hit lightHit;
|
||||
++inoutRayCount;
|
||||
int hitID = HitWorld(sphereCount, MakeRay(rec.pos, l), kMinT, kMaxT, lightHit);
|
||||
if (hitID == i)
|
||||
{
|
||||
float omega = 2 * 3.1415926 * (1 - cosAMax);
|
||||
|
||||
float3 rdir = r_in.dir;
|
||||
float3 nl = dot(rec.normal, rdir) < 0 ? rec.normal : -rec.normal;
|
||||
outLightE += (mat.albedo * smat.emissive) * (max(0.0f, dot(l, nl)) * omega / 3.1415926);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
else if (mat.type == MatMetal)
|
||||
{
|
||||
float3 refl = reflect(r_in.dir, rec.normal);
|
||||
// reflected ray, and random inside of sphere based on roughness
|
||||
float roughness = mat.roughness;
|
||||
#if DO_MITSUBA_COMPARE
|
||||
roughness = 0; // until we get better BRDF for metals
|
||||
#endif
|
||||
scattered = MakeRay(rec.pos, normalize(refl + roughness*RandomInUnitSphere(state)));
|
||||
attenuation = mat.albedo;
|
||||
return dot(scattered.dir, rec.normal) > 0;
|
||||
}
|
||||
else if (mat.type == MatDielectric)
|
||||
{
|
||||
float3 outwardN;
|
||||
float3 rdir = r_in.dir;
|
||||
float3 refl = reflect(rdir, rec.normal);
|
||||
float nint;
|
||||
attenuation = float3(1, 1, 1);
|
||||
float3 refr;
|
||||
float reflProb;
|
||||
float cosine;
|
||||
if (dot(rdir, rec.normal) > 0)
|
||||
{
|
||||
outwardN = -rec.normal;
|
||||
nint = mat.ri;
|
||||
cosine = mat.ri * dot(rdir, rec.normal);
|
||||
}
|
||||
else
|
||||
{
|
||||
outwardN = rec.normal;
|
||||
nint = 1.0f / mat.ri;
|
||||
cosine = -dot(rdir, rec.normal);
|
||||
}
|
||||
if (refract(rdir, outwardN, nint, refr))
|
||||
{
|
||||
reflProb = schlick(cosine, mat.ri);
|
||||
}
|
||||
else
|
||||
{
|
||||
reflProb = 1;
|
||||
}
|
||||
if (RandomFloat01(state) < reflProb)
|
||||
scattered = MakeRay(rec.pos, normalize(refl));
|
||||
else
|
||||
scattered = MakeRay(rec.pos, normalize(refr));
|
||||
}
|
||||
else
|
||||
{
|
||||
attenuation = float3(1, 0, 1);
|
||||
scattered = MakeRay(float3(0,0,0), float3(0, 0, 1));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static float3 Trace(int sphereCount, int emissiveCount, Ray r, inout int inoutRayCount, inout uint state)
|
||||
{
|
||||
float3 col = 0;
|
||||
float3 curAtten = 1;
|
||||
bool doMaterialE = true;
|
||||
// GPUs don't support recursion, so do tracing iterations in a loop up to max depth
|
||||
for (int depth = 0; depth < kMaxDepth; ++depth)
|
||||
{
|
||||
Hit rec;
|
||||
++inoutRayCount;
|
||||
int id = HitWorld(sphereCount, r, kMinT, kMaxT, rec);
|
||||
if (id >= 0)
|
||||
{
|
||||
Ray scattered;
|
||||
float3 attenuation;
|
||||
float3 lightE;
|
||||
Material mat = s_GroupMaterials[id];
|
||||
float3 matE = mat.emissive;
|
||||
if (Scatter(sphereCount, emissiveCount, id, r, rec, attenuation, scattered, lightE, inoutRayCount, state))
|
||||
{
|
||||
#if DO_LIGHT_SAMPLING
|
||||
if (!doMaterialE) matE = 0;
|
||||
doMaterialE = (mat.type != MatLambert);
|
||||
#endif
|
||||
col += curAtten * (matE + lightE);
|
||||
curAtten *= attenuation;
|
||||
r = scattered;
|
||||
}
|
||||
else
|
||||
{
|
||||
col += curAtten * matE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// sky
|
||||
#if DO_MITSUBA_COMPARE
|
||||
col += curAtten * float3(0.15f, 0.21f, 0.3f); // easier compare with Mitsuba's constant environment light
|
||||
#else
|
||||
float3 unitDir = r.dir;
|
||||
float t = 0.5f*(unitDir.y + 1.0f);
|
||||
float3 skyCol = ((1.0f - t)*float3(1.0f, 1.0f, 1.0f) + t * float3(0.5f, 0.7f, 1.0f)) * 0.3f;
|
||||
col += curAtten * skyCol;
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
return col;
|
||||
}
|
||||
|
||||
Texture2D srcImage : register(t0);
|
||||
RWTexture2D<float4> dstImage : register(u0);
|
||||
StructuredBuffer<Sphere> g_Spheres : register(t1);
|
||||
StructuredBuffer<Material> g_Materials : register(t2);
|
||||
StructuredBuffer<Params> g_Params : register(t3);
|
||||
StructuredBuffer<int> g_Emissives : register(t4);
|
||||
RWByteAddressBuffer g_OutRayCount : register(u1);
|
||||
|
||||
[numthreads(kCSGroupSizeX, kCSGroupSizeY, 1)]
|
||||
void main(uint3 gid : SV_DispatchThreadID, uint3 tid : SV_GroupThreadID)
|
||||
{
|
||||
// First, move scene data (spheres, materials, emissive indices) into group shared
|
||||
// memory. Do this in parallel; each thread in group copies its own chunk of data.
|
||||
uint threadID = tid.y * kCSGroupSizeX + tid.x;
|
||||
uint groupSize = kCSGroupSizeX * kCSGroupSizeY;
|
||||
uint objCount = g_Params[0].sphereCount;
|
||||
uint myObjCount = (objCount + groupSize - 1) / groupSize;
|
||||
uint myObjStart = threadID * myObjCount;
|
||||
for (uint io = myObjStart; io < myObjStart + myObjCount; ++io)
|
||||
{
|
||||
if (io < objCount)
|
||||
{
|
||||
s_GroupSpheres[io] = g_Spheres[io];
|
||||
s_GroupMaterials[io] = g_Materials[io];
|
||||
}
|
||||
if (io < g_Params[0].emissiveCount)
|
||||
{
|
||||
s_GroupEmissives[io] = g_Emissives[io];
|
||||
}
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
|
||||
int rayCount = 0;
|
||||
float3 col = 0;
|
||||
Params params = g_Params[0];
|
||||
uint rngState = (gid.x * 1973 + gid.y * 9277 + params.frames * 26699) | 1;
|
||||
for (int s = 0; s < DO_SAMPLES_PER_PIXEL; s++)
|
||||
{
|
||||
float u = float(gid.x + RandomFloat01(rngState)) * params.invWidth;
|
||||
float v = float(gid.y + RandomFloat01(rngState)) * params.invHeight;
|
||||
Ray r = CameraGetRay(params.cam, u, v, rngState);
|
||||
col += Trace(params.sphereCount, params.emissiveCount, r, rayCount, rngState);
|
||||
}
|
||||
col *= 1.0f / float(DO_SAMPLES_PER_PIXEL);
|
||||
|
||||
float3 prev = srcImage.Load(int3(gid.xy,0)).rgb;
|
||||
col = lerp(col, prev, params.lerpFac);
|
||||
dstImage[gid.xy] = float4(col, 1);
|
||||
|
||||
g_OutRayCount.InterlockedAdd(0, rayCount);
|
||||
}
|
||||