Compare commits

...

33 Commits

Author SHA1 Message Date
Bartosz Taudul
d192badd23 Put atomics on separate cache lines. 2020-01-06 17:44:44 +01:00
Bartosz Taudul
6045199577 Add memory order information. 2020-01-06 17:40:57 +01:00
Bartosz Taudul
eb6c6a48f5 Free empty blocks on thread cleanup. 2020-01-06 17:04:11 +01:00
Bartosz Taudul
41fb476cb0 Yield thread in spin locks. 2020-01-06 17:01:03 +01:00
Bartosz Taudul
e93d72143b Move TracyYield.hpp to common. 2020-01-06 17:00:05 +01:00
Bartosz Taudul
f12c4f3e88 No need to specify inline, when it's explicit. 2020-01-06 16:58:16 +01:00
Bartosz Taudul
7b12fcdacf Store just one block in each producer. 2020-01-06 16:58:16 +01:00
Bartosz Taudul
ffdf5bbd95 Make QueueDataSize constexpr. 2020-01-06 15:16:27 +01:00
Bartosz Taudul
6db81069bf Broken dequeue directly from producers. 2020-01-05 22:41:02 +01:00
Bartosz Taudul
8aac5d49d0 Set blocks tail nullptr, if head is also nullptr. 2020-01-05 21:59:58 +01:00
Bartosz Taudul
d062e1699d Dequeue released blocks. 2020-01-03 17:24:06 +01:00
Bartosz Taudul
b3a9231808 Looking for tail is not needed in FreeBlocks() also. 2020-01-03 17:20:08 +01:00
Bartosz Taudul
a70ebef673 Make sure pointers are correct. 2020-01-03 17:19:50 +01:00
Bartosz Taudul
422229bf1a Use new dequeue interface. 2020-01-03 16:54:35 +01:00
Bartosz Taudul
c7944fda98 Blocks tail is already known. 2020-01-03 14:11:50 +01:00
Bartosz Taudul
235ac90b85 Set proper tail of blocks. 2020-01-03 01:24:11 +01:00
Bartosz Taudul
ab91480f6d Store tail atomic variable in register.
Also: use macros for lock free queue prepare and commit.
2020-01-03 01:11:38 +01:00
Bartosz Taudul
9fed0ef938 Flush data after queue delay is calibrated. 2020-01-03 00:38:12 +01:00
Bartosz Taudul
7d68b16341 Implement flushing data from producer. 2020-01-03 00:37:54 +01:00
Bartosz Taudul
89254ab353 Allow just freeing blocks. 2020-01-03 00:37:40 +01:00
Bartosz Taudul
8588b8b4a6 Don't lock memory on fast path. 2020-01-03 00:25:01 +01:00
Bartosz Taudul
a35e1e7a8c Keep lock free queue thread local data in one struct. 2020-01-02 23:55:56 +01:00
Bartosz Taudul
be5c94ee09 Prevent inlining next block preparation. 2020-01-02 23:52:03 +01:00
Bartosz Taudul
9b0044838e Enqueue can be a static operation. 2020-01-02 23:42:38 +01:00
Bartosz Taudul
4af26880dd Don't report CPU topology if delayed init is active.
Reporting topology requires producer to be available, which creates a
deadlock during delayed init data structures construction.

Calling GetProducer() results in a call to GetProfilerThreadData(),
which in turn calls GetProfilerData() to construct its thread local
variable. However, at this point we already are calling
GetProfilerData() (to construct the profiler itself). This would result
in an incorrect double construction of data, but the code already
prevents this by allowing init code to be entered only once. Hence the
deadlock.

Currently this is a non-issue, as no platform which can report CPU
topology needs to use delayed init.
2020-01-02 22:45:21 +01:00
Bartosz Taudul
e7cb1fe52b Remove concurrentqueue. 2020-01-02 22:45:21 +01:00
Bartosz Taudul
4355c686af Update memory requirements. 2020-01-02 22:45:21 +01:00
Bartosz Taudul
25a260dcd1 Missing header. 2020-01-02 22:45:21 +01:00
Bartosz Taudul
a298c4333e Use new lock-free queue.
Only enqueue is implemented, no way to dequeue items yet. Expect lots of
bugs and reduced performance.
2020-01-02 22:45:21 +01:00
Bartosz Taudul
6054a301c2 Direct enqueue of QueueItems. 2020-01-02 22:23:58 +01:00
Bartosz Taudul
40186956f6 Add inlines required to not duplicate symbols. 2020-01-02 22:23:58 +01:00
Bartosz Taudul
92fded825e Lock-free queue enqueue. 2020-01-02 22:23:58 +01:00
Bartosz Taudul
6b64fbc3be Producers and data blocks plumbing for lock-free queue. 2020-01-02 22:23:58 +01:00
17 changed files with 747 additions and 2067 deletions

View File

@@ -23,6 +23,7 @@
#include "common/TracySocket.cpp"
#include "client/tracy_rpmalloc.cpp"
#include "client/TracyDxt1.cpp"
#include "client/TracyLfq.cpp"
#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6
# include "libbacktrace/alloc.cpp"

View File

@@ -179,14 +179,10 @@ static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth )
}
assert( dst - ptr == spaceNeeded + 4 );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::CallstackAlloc );
TracyLfqPrepare( QueueType::CallstackAlloc );
MemWrite( &item->callstackAlloc.ptr, (uint64_t)ptr );
MemWrite( &item->callstackAlloc.nativePtr, (uint64_t)Callstack( depth ) );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
static inline int LuaZoneBeginS( lua_State* L )
@@ -203,14 +199,10 @@ static inline int LuaZoneBeginS( lua_State* L )
lua_getinfo( L, "Snl", &dbg );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
#ifdef TRACY_CALLSTACK
const uint32_t depth = TRACY_CALLSTACK;
@@ -238,14 +230,10 @@ static inline int LuaZoneBeginNS( lua_State* L )
const auto name = lua_tolstring( L, 1, &nsz );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLocCallstack );
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
#ifdef TRACY_CALLSTACK
const uint32_t depth = TRACY_CALLSTACK;
@@ -275,14 +263,10 @@ static inline int LuaZoneBegin( lua_State* L )
lua_getinfo( L, "Snl", &dbg );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
return 0;
#endif
}
@@ -306,14 +290,10 @@ static inline int LuaZoneBeginN( lua_State* L )
const auto name = lua_tolstring( L, 1, &nsz );
const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginAllocSrcLoc );
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
return 0;
#endif
}
@@ -331,13 +311,9 @@ static inline int LuaZoneEnd( lua_State* L )
}
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
TracyLfqPrepare( QueueType::ZoneEnd );
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
return 0;
}
@@ -355,16 +331,12 @@ static inline int LuaZoneText( lua_State* L )
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneText );
TracyLfqPrepare( QueueType::ZoneText );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
return 0;
}
@@ -382,16 +354,12 @@ static inline int LuaZoneName( lua_State* L )
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneName );
TracyLfqPrepare( QueueType::ZoneName );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
return 0;
}
@@ -404,17 +372,13 @@ static inline int LuaMessage( lua_State* L )
auto txt = lua_tostring( L, 1 );
const auto size = strlen( txt );
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::Message );
TracyLfqPrepare( QueueType::Message );
MemWrite( &item->message.time, Profiler::GetTime() );
MemWrite( &item->message.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
return 0;
}

View File

@@ -101,12 +101,8 @@ public:
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
const float period = 1.f;
Magic magic;
const auto thread = GetThreadHandle();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
TracyLfqPrepare( QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
MemWrite( &item->gpuNewContext.thread, thread );
@@ -118,7 +114,7 @@ public:
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
void Collect()
@@ -135,10 +131,6 @@ public:
}
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
while( m_tail != m_head )
{
GLint available;
@@ -148,12 +140,11 @@ public:
uint64_t time;
glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuTime );
TracyLfqPrepare( QueueType::GpuTime );
MemWrite( &item->gpuTime.gpuTime, (int64_t)time );
MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail );
MemWrite( &item->gpuTime.context, m_context );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
m_tail = ( m_tail + 1 ) % QueryCount;
}
@@ -199,17 +190,13 @@ public:
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBegin );
TracyLfqPrepare( QueueType::GpuZoneBegin );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth )
@@ -223,18 +210,14 @@ public:
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
Magic magic;
const auto thread = GetThreadHandle();
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack );
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
MemWrite( &item->gpuZoneBegin.thread, thread );
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
GetProfiler().SendCallstack( depth );
}
@@ -247,16 +230,12 @@ public:
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::GpuZoneEnd );
TracyLfqPrepare( QueueType::GpuZoneEnd );
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
private:

20
client/TracyLfq.cpp Normal file
View File

@@ -0,0 +1,20 @@
#include "TracyLfq.hpp"
namespace tracy
{
LfqBlock* LfqProducerImpl::NextBlock()
{
LfqBlock* blk = m_queue->GetFreeBlock();
assert( blk );
assert( blk->next.load( std::memory_order_relaxed ) == nullptr );
blk->thread = m_thread;
lfq.dataEnd = blk->dataEnd;
lfq.tail = &blk->tail;
LfqBlock* oldBlk = m_block.load( std::memory_order_relaxed );
m_block.store( blk, std::memory_order_release );
m_queue->ReleaseBlock( oldBlk );
return blk;
}
}

482
client/TracyLfq.hpp Normal file
View File

@@ -0,0 +1,482 @@
#ifndef __TRACYLFQ_HPP__
#define __TRACYLFQ_HPP__
#include <atomic>
#include <assert.h>
#include <stdint.h>
#include <thread>
#include "../common/TracyApi.h"
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
#include "../common/TracyForceInline.hpp"
#include "../common/TracyQueue.hpp"
#include "../common/TracySystem.hpp"
#include "../common/TracyYield.hpp"
#define TracyLfqPrepare( type ) \
char* __nextPtr; \
QueueItem* item; \
auto& __tail = LfqProducer::PrepareNext( item, __nextPtr, type );
#define TracyLfqCommit \
LfqProducer::CommitNext( __tail, __nextPtr );
#define TracyLfqPrepareC( type ) \
char* nextPtr; \
tracy::QueueItem* item; \
auto& tail = tracy::LfqProducer::PrepareNext( item, nextPtr, type );
#define TracyLfqCommitC \
tracy::LfqProducer::CommitNext( tail, nextPtr );
namespace tracy
{
class LockFreeQueue;
class LfqProducer;
TRACY_API LfqProducer& GetProducer();
class LfqBlock
{
public:
enum { BlockSize = 64*1024 };
tracy_force_inline LfqBlock()
: head( nullptr )
, tail( nullptr )
, next( nullptr )
, thread( 0 )
{
dataEnd = data + BlockSize;
head.store( data, std::memory_order_relaxed );
tail.store( data, std::memory_order_release );
}
tracy_force_inline void Reset()
{
head.store( data, std::memory_order_relaxed );
tail.store( data, std::memory_order_release );
}
LfqBlock( const LfqBlock& ) = delete;
LfqBlock( LfqBlock&& ) = delete;
LfqBlock& operator=( const LfqBlock& ) = delete;
LfqBlock& operator=( LfqBlock&& ) = delete;
alignas(64) std::atomic<char*> head;
alignas(64) std::atomic<char*> tail;
alignas(64) std::atomic<LfqBlock*> next;
alignas(64) const char* dataEnd;
uint64_t thread;
char data[BlockSize];
};
struct LfqData
{
const char* dataEnd;
std::atomic<char*>* tail;
};
extern thread_local LfqData lfq;
class LfqProducerImpl
{
public:
tracy_force_inline LfqProducerImpl( LockFreeQueue* queue )
: m_block( nullptr )
, m_active( false )
, m_available( true )
, m_queue( queue )
{
assert( m_queue );
}
tracy_force_inline void PrepareThread();
tracy_force_inline void CleanupThread();
tracy_force_inline std::atomic<char*>& PrepareNext( char*& ptr, char*& nextPtr, size_t sz )
{
auto blk = NextBlock();
auto& tail = blk->tail;
ptr = tail.load( std::memory_order_relaxed );
nextPtr = ptr + sz;
return tail;
}
tracy_no_inline LfqBlock* NextBlock();
inline void FlushDataImpl();
alignas(64) std::atomic<LfqProducerImpl*> m_next;
alignas(64) std::atomic<bool> m_active;
alignas(64) std::atomic<bool> m_available;
alignas(64) std::atomic<LfqBlock*> m_block;
LfqProducerImpl( const LfqProducerImpl& ) = delete;
LfqProducerImpl( LfqProducerImpl&& ) = delete;
LfqProducerImpl& operator=( const LfqProducerImpl& ) = delete;
LfqProducerImpl& operator=( LfqProducerImpl&& ) = delete;
private:
uint64_t m_thread;
LockFreeQueue* m_queue;
};
class LfqProducer
{
public:
inline LfqProducer( LockFreeQueue& queue );
inline ~LfqProducer();
inline LfqProducer& operator=( LfqProducer&& ) noexcept;
static tracy_force_inline std::atomic<char*>& PrepareNext( QueueItem*& item, char*& nextPtr, QueueType type )
{
char* ptr;
auto& ret = PrepareNext( ptr, nextPtr, QueueDataSize[(uint8_t)type] );
item = (QueueItem*)ptr;
MemWrite( &item->hdr.type, type );
return ret;
}
static tracy_force_inline std::atomic<char*>& PrepareNext( char*& ptr, char*& nextPtr, size_t sz )
{
auto& tail = *lfq.tail;
ptr = tail.load( std::memory_order_relaxed );
auto np = ptr + sz;
if( np <= lfq.dataEnd )
{
nextPtr = np;
return tail;
}
else
{
return GetProducer().m_prod->PrepareNext( ptr, nextPtr, sz );
}
}
static tracy_force_inline void CommitNext( std::atomic<char*>& tail, char* nextPtr )
{
tail.store( nextPtr, std::memory_order_release );
}
static tracy_force_inline void FlushData()
{
GetProducer().m_prod->FlushDataImpl();
}
LfqProducer( const LfqProducer& ) = delete;
LfqProducer( LfqProducer&& ) = delete;
LfqProducer& operator=( const LfqProducer& ) = delete;
private:
LfqProducerImpl* m_prod;
LockFreeQueue* m_queue;
};
class LockFreeQueue
{
public:
LockFreeQueue()
: m_freeBlocks( nullptr )
, m_blocksHead( nullptr )
, m_blocksTail( nullptr )
, m_producers( nullptr )
, m_currentProducer( nullptr )
{
const auto numCpus = std::thread::hardware_concurrency();
LfqBlock* prev = nullptr;
for( unsigned int i=0; i<numCpus; i++ )
{
auto blk = AllocNewBlock();
blk->next.store( prev, std::memory_order_relaxed );
prev = blk;
}
m_freeBlocks.store( prev, std::memory_order_release );
LfqProducerImpl* prevProd = nullptr;
for( unsigned int i=0; i<numCpus; i++ )
{
auto prod = AllocNewProducer();
prod->m_next.store( prevProd, std::memory_order_relaxed );
prevProd = prod;
}
m_producers.store( prevProd, std::memory_order_release );
}
// Don't free anything, application is shutting down anyway
~LockFreeQueue()
{
}
LfqBlock* GetFreeBlock()
{
LfqBlock* ptr = m_freeBlocks.load( std::memory_order_acquire );
for(;;)
{
if( !ptr ) return AllocNewBlock();
auto next = ptr->next.load( std::memory_order_acquire );
if( m_freeBlocks.compare_exchange_strong( ptr, next, std::memory_order_release, std::memory_order_relaxed ) )
{
ptr->next.store( nullptr, std::memory_order_relaxed );
ptr->Reset();
return ptr;
}
}
}
void ReleaseBlock( LfqBlock* blk )
{
assert( blk );
assert( blk->next.load( std::memory_order_relaxed ) == nullptr );
auto tail = m_blocksTail.load( std::memory_order_acquire );
for(;;)
{
if( !tail )
{
auto head = m_blocksHead.load( std::memory_order_acquire );
if( !head )
{
if( m_blocksHead.compare_exchange_strong( head, blk, std::memory_order_release, std::memory_order_relaxed ) )
{
assert( m_blocksTail.load( std::memory_order_relaxed ) == nullptr );
m_blocksTail.store( blk, std::memory_order_release );
return;
}
}
}
else
{
auto next = tail->next.load( std::memory_order_acquire );
if( !next )
{
if( tail->next.compare_exchange_strong( next, blk, std::memory_order_release, std::memory_order_relaxed ) )
{
m_blocksTail.store( blk, std::memory_order_release );
return;
}
}
}
}
}
void FreeBlock( LfqBlock* blk )
{
assert( blk );
auto head = m_freeBlocks.load( std::memory_order_relaxed );
blk->next.store( head, std::memory_order_relaxed );
while( !m_freeBlocks.compare_exchange_weak( head, blk, std::memory_order_release, std::memory_order_relaxed ) ) { blk->next.store( head, std::memory_order_relaxed ); YieldThread(); }
}
LfqProducerImpl* GetIdleProducer()
{
LfqProducerImpl* prod = m_producers.load( std::memory_order_acquire );
assert( prod );
for(;;)
{
bool available = prod->m_available.load( std::memory_order_acquire );
if( available )
{
if( prod->m_available.compare_exchange_strong( available, false, std::memory_order_release, std::memory_order_relaxed ) ) return prod;
}
prod = prod->m_next.load( std::memory_order_acquire );
if( !prod )
{
prod = AllocNewProducer();
prod->m_available.store( false, std::memory_order_release );
auto head = m_producers.load( std::memory_order_relaxed );
prod->m_next.store( head, std::memory_order_relaxed );
while( !m_producers.compare_exchange_weak( head, prod, std::memory_order_release, std::memory_order_relaxed ) ) { prod->m_next.store( head, std::memory_order_relaxed ); YieldThread(); }
return prod;
}
}
}
void ReleaseProducer( LfqProducerImpl* prod )
{
assert( prod->m_available.load( std::memory_order_relaxed ) == false );
prod->m_available.store( true, std::memory_order_release );
}
size_t Dequeue( char* ptr, size_t sz, uint64_t& thread )
{
{
auto blk = m_blocksHead.load( std::memory_order_acquire );
if( blk != nullptr )
{
auto next = blk->next.load( std::memory_order_acquire );
if( m_blocksHead.compare_exchange_strong( blk, next, std::memory_order_release, std::memory_order_relaxed ) )
{
if( next == nullptr )
{
m_blocksTail.store( nullptr, std::memory_order_release );
}
auto head = blk->head.load( std::memory_order_relaxed );
auto tail = blk->tail.load( std::memory_order_acquire );
const auto datasz = tail - head;
if( datasz > 0 )
{
thread = blk->thread;
memcpy( ptr, head, datasz );
FreeBlock( blk );
return datasz;
}
FreeBlock( blk );
}
}
}
{
LfqBlock* blk = nullptr;
char* head;
char* tail;
auto prod = m_currentProducer;
if( !prod ) prod = m_producers.load( std::memory_order_acquire );
while( prod )
{
if( prod->m_active.load( std::memory_order_acquire ) == true )
{
blk = prod->m_block.load( std::memory_order_acquire );
head = blk->head.load( std::memory_order_relaxed );
tail = blk->tail.load( std::memory_order_acquire );
if( tail - head != 0 )
{
break;
}
}
prod = prod->m_next.load( std::memory_order_acquire );
}
m_currentProducer = prod;
if( prod )
{
const auto datasz = tail - head;
assert( datasz != 0 );
thread = blk->thread;
memcpy( ptr, head, datasz );
blk->head.store( tail, std::memory_order_release );
return datasz;
}
}
return 0;
}
LockFreeQueue( const LockFreeQueue& ) = delete;
LockFreeQueue( LockFreeQueue&& ) = delete;
LockFreeQueue& operator=( const LockFreeQueue& ) = delete;
LockFreeQueue& operator=( LockFreeQueue&& ) = delete;
private:
LfqBlock* AllocNewBlock()
{
auto blk = (LfqBlock*)tracy_malloc( sizeof( LfqBlock ) );
new(blk) LfqBlock();
return blk;
}
LfqProducerImpl* AllocNewProducer()
{
auto prod = (LfqProducerImpl*)tracy_malloc( sizeof( LfqProducerImpl ) );
new(prod) LfqProducerImpl( this );
return prod;
}
alignas(64) std::atomic<LfqBlock*> m_freeBlocks;
alignas(64) std::atomic<LfqBlock*> m_blocksHead;
alignas(64) std::atomic<LfqBlock*> m_blocksTail;
alignas(64) std::atomic<LfqProducerImpl*> m_producers;
alignas(64) LfqProducerImpl* m_currentProducer;
};
inline LfqProducer::LfqProducer( LockFreeQueue& queue )
: m_prod( queue.GetIdleProducer() )
, m_queue( &queue )
{
assert( m_queue );
m_prod->PrepareThread();
assert( m_prod->m_active.load( std::memory_order_relaxed ) == false );
m_prod->m_active.store( true, std::memory_order_release );
}
inline LfqProducer::~LfqProducer()
{
if( m_prod )
{
assert( m_prod->m_active.load( std::memory_order_relaxed ) == true );
m_prod->m_active.store( false, std::memory_order_release );
m_prod->CleanupThread();
m_queue->ReleaseProducer( m_prod );
}
}
inline LfqProducer& LfqProducer::operator=( LfqProducer&& other ) noexcept
{
m_prod = other.m_prod;
m_queue = other.m_queue;
other.m_prod = nullptr;
other.m_queue = nullptr;
return *this;
}
tracy_force_inline void LfqProducerImpl::PrepareThread()
{
m_thread = detail::GetThreadHandleImpl();
auto blk = m_queue->GetFreeBlock();
assert( blk );
assert( blk->next.load( std::memory_order_relaxed ) == nullptr );
blk->thread = m_thread;
lfq.dataEnd = blk->dataEnd;
lfq.tail = &blk->tail;
m_block.store( blk, std::memory_order_release );
}
tracy_force_inline void LfqProducerImpl::CleanupThread()
{
auto blk = m_block.load( std::memory_order_relaxed );
assert( blk );
while( !m_block.compare_exchange_weak( blk, nullptr, std::memory_order_release, std::memory_order_relaxed ) ) { YieldThread(); }
auto head = blk->head.load( std::memory_order_relaxed );
auto tail = blk->tail.load( std::memory_order_acquire );
if( head == tail )
{
m_queue->FreeBlock( blk );
}
else
{
m_queue->ReleaseBlock( blk );
}
}
void LfqProducerImpl::FlushDataImpl()
{
LfqBlock* blk = m_block.load( std::memory_order_acquire );
m_block.store( nullptr, std::memory_order_release );
if( blk ) m_queue->FreeBlock( blk );
PrepareThread();
}
}
#endif

View File

@@ -23,11 +23,7 @@ public:
{
assert( m_id != std::numeric_limits<uint32_t>::max() );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
TracyLfqPrepare( QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
@@ -35,7 +31,7 @@ public:
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
LockableCtx( const LockableCtx& ) = delete;
@@ -43,18 +39,14 @@ public:
tracy_force_inline ~LockableCtx()
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockTerminate );
TracyLfqPrepare( QueueType::LockTerminate );
MemWrite( &item->lockTerminate.id, m_id );
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
MemWrite( &item->lockTerminate.type, LockType::Lockable );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
tracy_force_inline bool BeforeLock()
@@ -225,11 +217,7 @@ public:
{
assert( m_id != std::numeric_limits<uint32_t>::max() );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
TracyLfqPrepare( QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
@@ -239,7 +227,7 @@ public:
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
SharedLockableCtx( const SharedLockableCtx& ) = delete;
@@ -247,11 +235,7 @@ public:
tracy_force_inline ~SharedLockableCtx()
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::LockTerminate );
TracyLfqPrepare( QueueType::LockTerminate );
MemWrite( &item->lockTerminate.id, m_id );
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
MemWrite( &item->lockTerminate.type, LockType::SharedLockable );
@@ -260,7 +244,7 @@ public:
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
tracy_force_inline bool BeforeLock()

View File

@@ -43,6 +43,7 @@
#include <chrono>
#include <limits>
#include <new>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <thread>
@@ -149,11 +150,6 @@ struct InitTimeWrapper
int64_t val;
};
struct ProducerWrapper
{
tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
};
struct ThreadHandleWrapper
{
uint64_t val;
@@ -587,14 +583,10 @@ LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp )
}
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::CrashReport );
TracyLfqPrepare( QueueType::CrashReport );
item->crashReport.time = Profiler::GetTime();
item->crashReport.text = (uint64_t)s_crashText;
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" );
}
@@ -628,12 +620,8 @@ LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp )
CloseHandle( h );
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::Crash );
tail.store( magic + 1, std::memory_order_release );
TracyLfqPrepare( QueueType::Crash );
TracyLfqCommit;
}
std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) );
@@ -823,14 +811,10 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
}
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::CrashReport );
TracyLfqPrepare( QueueType::CrashReport );
item->crashReport.time = Profiler::GetTime();
item->crashReport.text = (uint64_t)s_crashText;
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" );
}
@@ -853,12 +837,8 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
closedir( dp );
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::Crash );
tail.store( magic + 1, std::memory_order_release );
TracyLfqPrepare( QueueType::Crash );
TracyLfqCommit;
}
std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) );
@@ -870,7 +850,7 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
#endif
enum { QueuePrealloc = 256 * 1024 };
thread_local LfqData lfq;
static Profiler* s_instance;
static Thread* s_thread;
@@ -882,7 +862,7 @@ static Thread* s_sysTraceThread = nullptr;
#ifdef TRACY_DELAYED_INIT
struct ThreadNameData;
TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue();
TRACY_API LockFreeQueue& GetQueue();
struct RPMallocInit { RPMallocInit() { rpmalloc_initialize(); } };
@@ -896,25 +876,18 @@ struct ProfilerData
{
int64_t initTime = SetupHwTimer();
RPMallocInit rpmalloc_init;
moodycamel::ConcurrentQueue<QueueItem> queue;
LockFreeQueue queue;
Profiler profiler;
std::atomic<uint32_t> lockCounter { 0 };
std::atomic<uint8_t> gpuCtxCounter { 0 };
std::atomic<ThreadNameData*> threadNameData { nullptr };
};
struct ProducerWrapper
{
ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {}
moodycamel::ProducerToken detail;
tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
};
struct ProfilerThreadData
{
ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {}
ProfilerThreadData( ProfilerData& data ) : producer( data.queue ), gpuCtx( { nullptr } ) {}
RPMallocInit rpmalloc_init;
ProducerWrapper token;
LfqProducer producer;
GpuCtxWrapper gpuCtx;
# ifdef TRACY_ON_DEMAND
LuaZoneState luaZoneState;
@@ -949,9 +922,9 @@ static ProfilerThreadData& GetProfilerThreadData()
return data;
}
TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; }
TRACY_API LfqProducer& GetProducer() { return GetProfilerThreadData().producer; }
TRACY_API Profiler& GetProfiler() { return GetProfilerData().profiler; }
TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return GetProfilerData().queue; }
TRACY_API LockFreeQueue& GetQueue() { return GetProfilerData().queue; }
TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; }
TRACY_API std::atomic<uint32_t>& GetLockCounter() { return GetProfilerData().lockCounter; }
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; }
@@ -972,13 +945,12 @@ TRACY_API void InitRPMallocThread()
// MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this.
// 1a. But s_queue is needed for initialization of variables in point 2.
extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
extern LockFreeQueue s_lfq;
thread_local RPMallocInit init_order(106) s_rpmalloc_thread_init;
// 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread.
thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue );
thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) };
thread_local LfqProducer init_order(107) s_producer( s_lfq );
thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() };
# ifdef _MSC_VER
@@ -989,7 +961,7 @@ thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThr
static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() };
static RPMallocInit init_order(102) s_rpmalloc_init;
moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc );
LockFreeQueue init_order(103) s_lfq;
std::atomic<uint32_t> init_order(104) s_lockCounter( 0 );
std::atomic<uint8_t> init_order(104) s_gpuCtxCounter( 0 );
@@ -1005,9 +977,9 @@ thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false };
static Profiler init_order(105) s_profiler;
TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return s_token.ptr; }
TRACY_API LfqProducer& GetProducer() { return s_producer; }
TRACY_API Profiler& GetProfiler() { return s_profiler; }
TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return s_queue; }
TRACY_API LockFreeQueue& GetQueue() { return s_lfq; }
TRACY_API int64_t GetInitTime() { return s_initTime.val; }
TRACY_API std::atomic<uint32_t>& GetLockCounter() { return s_lockCounter; }
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return s_gpuCtxCounter; }
@@ -1026,7 +998,6 @@ TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; }
# endif
#endif
enum { BulkSize = TargetFrameSize / QueueItemSize };
Profiler::Profiler()
: m_timeBegin( 0 )
@@ -1043,7 +1014,7 @@ Profiler::Profiler()
, m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) )
, m_bufferOffset( 0 )
, m_bufferStart( 0 )
, m_itemBuf( (QueueItem*)tracy_malloc( sizeof( QueueItem ) * BulkSize ) )
, m_itemBuf( (char*)tracy_malloc( TargetFrameSize ) )
, m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) )
, m_serialQueue( 1024*1024 )
, m_serialDequeue( 1024*1024 )
@@ -1063,8 +1034,7 @@ Profiler::Profiler()
#ifndef TRACY_DELAYED_INIT
# ifdef _MSC_VER
// 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here.
s_token_detail = moodycamel::ProducerToken( s_queue );
s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) };
s_producer = LfqProducer( s_lfq );
s_threadHandle = ThreadHandleWrapper { m_mainThread };
# endif
#endif
@@ -1219,8 +1189,6 @@ void Profiler::Worker()
memcpy( welcome.hostInfo, hostinfo, hisz );
memset( welcome.hostInfo + hisz, 0, WelcomeMessageHostInfoSize - hisz );
moodycamel::ConsumerToken token( GetQueue() );
ListenSocket listen;
if( !listen.Listen( port, 8 ) )
{
@@ -1232,7 +1200,7 @@ void Profiler::Worker()
return;
}
ClearQueues( token );
ClearQueues();
}
}
@@ -1320,7 +1288,7 @@ void Profiler::Worker()
#ifdef TRACY_ON_DEMAND
const auto currentTime = GetTime();
ClearQueues( token );
ClearQueues();
m_connectionId.fetch_add( 1, std::memory_order_release );
m_isConnected.store( true, std::memory_order_release );
#endif
@@ -1362,7 +1330,7 @@ void Profiler::Worker()
for(;;)
{
ProcessSysTime();
const auto status = Dequeue( token );
const auto status = Dequeue();
const auto serialStatus = DequeueSerial();
if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
{
@@ -1424,7 +1392,7 @@ void Profiler::Worker()
return;
}
ClearQueues( token );
ClearQueues();
m_sock = listen.Accept();
if( m_sock )
@@ -1462,7 +1430,7 @@ void Profiler::Worker()
// Client is exiting. Send items remaining in queues.
for(;;)
{
const auto status = Dequeue( token );
const auto status = Dequeue();
const auto serialStatus = DequeueSerial();
if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
{
@@ -1506,7 +1474,7 @@ void Profiler::Worker()
return;
}
}
while( Dequeue( token ) == DequeueStatus::DataDequeued ) {}
while( Dequeue() == DequeueStatus::DataDequeued ) {}
while( DequeueSerial() == DequeueStatus::DataDequeued ) {}
if( m_bufferOffset != m_bufferStart )
{
@@ -1565,18 +1533,14 @@ void Profiler::CompressWorker()
CompressImageDxt1( (const char*)fi->image, etc1buf, w, h );
tracy_free( fi->image );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::FrameImage );
TracyLfqPrepare( QueueType::FrameImage );
MemWrite( &item->frameImage.image, (uint64_t)etc1buf );
MemWrite( &item->frameImage.frame, fi->frame );
MemWrite( &item->frameImage.w, w );
MemWrite( &item->frameImage.h, h );
uint8_t flip = fi->flip;
MemWrite( &item->frameImage.flip, flip );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
fi++;
}
@@ -1649,13 +1613,31 @@ static void FreeAssociatedMemory( const QueueItem& item )
}
}
void Profiler::ClearQueues( moodycamel::ConsumerToken& token )
static void FreeBufferAssociatedMemory( const char* ptr, size_t sz )
{
const auto end = ptr + sz;
while( ptr < end )
{
const auto type = MemRead<uint8_t>( ptr );
const auto itemsz = QueueDataSize[type];
if( type < (int)QueueType::Terminate )
{
QueueItem item;
memcpy( &item, ptr, itemsz );
FreeAssociatedMemory( item );
}
ptr += itemsz;
}
}
void Profiler::ClearQueues()
{
for(;;)
{
const auto sz = GetQueue().try_dequeue_bulk( token, m_itemBuf, BulkSize );
uint64_t thread;
const auto sz = GetQueue().Dequeue( m_itemBuf, TargetFrameSize, thread );
if( sz == 0 ) break;
for( size_t i=0; i<sz; i++ ) FreeAssociatedMemory( m_itemBuf[i] );
FreeBufferAssociatedMemory( m_itemBuf, sz );
}
ClearSerial();
@@ -1683,10 +1665,10 @@ void Profiler::ClearSerial()
m_serialDequeue.clear();
}
Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
Profiler::DequeueStatus Profiler::Dequeue()
{
uint64_t threadId;
const auto sz = GetQueue().try_dequeue_bulk_single( token, m_itemBuf, BulkSize, threadId );
const auto sz = GetQueue().Dequeue( m_itemBuf, TargetFrameSize, threadId );
if( sz > 0 )
{
if( threadId != m_threadCtx )
@@ -1700,18 +1682,19 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
}
auto end = m_itemBuf + sz;
auto item = m_itemBuf;
while( item != end )
auto data = m_itemBuf;
while( data < end )
{
uint64_t ptr;
const auto idx = MemRead<uint8_t>( &item->hdr.idx );
const auto idx = MemRead<uint8_t>( data );
const auto itemsz = QueueDataSize[idx];
if( idx < (int)QueueType::Terminate )
{
switch( (QueueType)idx )
{
case QueueType::ZoneText:
case QueueType::ZoneName:
ptr = MemRead<uint64_t>( &item->zoneText.text );
ptr = MemRead<uint64_t>( data + offsetof( QueueItem, zoneText.text ) );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
tracy_free( (void*)ptr );
break;
@@ -1719,12 +1702,12 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
case QueueType::MessageColor:
case QueueType::MessageCallstack:
case QueueType::MessageColorCallstack:
ptr = MemRead<uint64_t>( &item->message.text );
ptr = MemRead<uint64_t>( data + offsetof( QueueItem, message.text ) );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
tracy_free( (void*)ptr );
break;
case QueueType::MessageAppInfo:
ptr = MemRead<uint64_t>( &item->message.text );
ptr = MemRead<uint64_t>( data + offsetof( QueueItem, message.text ) );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
#ifndef TRACY_ON_DEMAND
tracy_free( (void*)ptr );
@@ -1733,37 +1716,37 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
case QueueType::ZoneBeginAllocSrcLoc:
case QueueType::ZoneBeginAllocSrcLocCallstack:
{
int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
int64_t t = MemRead<int64_t>( data + offsetof( QueueItem, zoneBegin.time ) );
int64_t dt = t - m_refTimeThread;
m_refTimeThread = t;
MemWrite( &item->zoneBegin.time, dt );
ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc );
MemWrite( data + offsetof( QueueItem, zoneBegin.time ), dt );
ptr = MemRead<uint64_t>( data + offsetof( QueueItem, zoneBegin.srcloc ) );
SendSourceLocationPayload( ptr );
tracy_free( (void*)ptr );
break;
}
case QueueType::Callstack:
ptr = MemRead<uint64_t>( &item->callstack.ptr );
ptr = MemRead<uint64_t>( data + offsetof( QueueItem, callstack.ptr ) );
SendCallstackPayload( ptr );
tracy_free( (void*)ptr );
break;
case QueueType::CallstackAlloc:
ptr = MemRead<uint64_t>( &item->callstackAlloc.nativePtr );
ptr = MemRead<uint64_t>( data + offsetof( QueueItem, callstackAlloc.nativePtr ) );
if( ptr != 0 )
{
CutCallstack( (void*)ptr, "lua_pcall" );
SendCallstackPayload( ptr );
tracy_free( (void*)ptr );
}
ptr = MemRead<uint64_t>( &item->callstackAlloc.ptr );
ptr = MemRead<uint64_t>( data + offsetof( QueueItem, callstackAlloc.ptr ) );
SendCallstackAlloc( ptr );
tracy_free( (void*)ptr );
break;
case QueueType::FrameImage:
{
ptr = MemRead<uint64_t>( &item->frameImage.image );
const auto w = MemRead<uint16_t>( &item->frameImage.w );
const auto h = MemRead<uint16_t>( &item->frameImage.h );
ptr = MemRead<uint64_t>( data + offsetof( QueueItem, frameImage.image ) );
const auto w = MemRead<uint16_t>( data + offsetof( QueueItem, frameImage.w ) );
const auto h = MemRead<uint16_t>( data + offsetof( QueueItem, frameImage.h ) );
const auto csz = size_t( w * h / 2 );
SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData );
tracy_free( (void*)ptr );
@@ -1772,67 +1755,67 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
case QueueType::ZoneBegin:
case QueueType::ZoneBeginCallstack:
{
int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
int64_t t = MemRead<int64_t>( data + offsetof( QueueItem, zoneBegin.time ) );
int64_t dt = t - m_refTimeThread;
m_refTimeThread = t;
MemWrite( &item->zoneBegin.time, dt );
MemWrite( data + offsetof( QueueItem, zoneBegin.time ), dt );
break;
}
case QueueType::ZoneEnd:
{
int64_t t = MemRead<int64_t>( &item->zoneEnd.time );
int64_t t = MemRead<int64_t>( data + offsetof( QueueItem, zoneEnd.time ) );
int64_t dt = t - m_refTimeThread;
m_refTimeThread = t;
MemWrite( &item->zoneEnd.time, dt );
MemWrite( data + offsetof( QueueItem, zoneEnd.time ), dt );
break;
}
case QueueType::GpuZoneBegin:
case QueueType::GpuZoneBeginCallstack:
{
int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
int64_t t = MemRead<int64_t>( data + offsetof( QueueItem, gpuZoneBegin.cpuTime ) );
int64_t dt = t - m_refTimeThread;
m_refTimeThread = t;
MemWrite( &item->gpuZoneBegin.cpuTime, dt );
MemWrite( data + offsetof( QueueItem, gpuZoneBegin.cpuTime ), dt );
break;
}
case QueueType::GpuZoneEnd:
{
int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
int64_t t = MemRead<int64_t>( data + offsetof( QueueItem, gpuZoneEnd.cpuTime ) );
int64_t dt = t - m_refTimeThread;
m_refTimeThread = t;
MemWrite( &item->gpuZoneEnd.cpuTime, dt );
MemWrite( data + offsetof( QueueItem, gpuZoneEnd.cpuTime ), dt );
break;
}
case QueueType::PlotData:
{
int64_t t = MemRead<int64_t>( &item->plotData.time );
int64_t t = MemRead<int64_t>( data + offsetof( QueueItem, plotData.time ) );
int64_t dt = t - m_refTimeThread;
m_refTimeThread = t;
MemWrite( &item->plotData.time, dt );
MemWrite( data + offsetof( QueueItem, plotData.time ), dt );
break;
}
case QueueType::ContextSwitch:
{
int64_t t = MemRead<int64_t>( &item->contextSwitch.time );
int64_t t = MemRead<int64_t>( data + offsetof( QueueItem, contextSwitch.time ) );
int64_t dt = t - m_refTimeCtx;
m_refTimeCtx = t;
MemWrite( &item->contextSwitch.time, dt );
MemWrite( data + offsetof( QueueItem, contextSwitch.time ), dt );
break;
}
case QueueType::ThreadWakeup:
{
int64_t t = MemRead<int64_t>( &item->threadWakeup.time );
int64_t t = MemRead<int64_t>( data + offsetof( QueueItem, threadWakeup.time ) );
int64_t dt = t - m_refTimeCtx;
m_refTimeCtx = t;
MemWrite( &item->threadWakeup.time, dt );
MemWrite( data + offsetof( QueueItem, threadWakeup.time ), dt );
break;
}
case QueueType::GpuTime:
{
int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime );
int64_t t = MemRead<int64_t>( data + offsetof( QueueItem, gpuTime.gpuTime ) );
int64_t dt = t - m_refTimeGpu;
m_refTimeGpu = t;
MemWrite( &item->gpuTime.gpuTime, dt );
MemWrite( data + offsetof( QueueItem, gpuTime.gpuTime ), dt );
break;
}
default:
@@ -1840,8 +1823,8 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
break;
}
}
if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost;
item++;
if( !AppendData( data, itemsz ) ) return DequeueStatus::ConnectionLost;
data += itemsz;
}
}
else
@@ -1851,20 +1834,22 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
return DequeueStatus::DataDequeued;
}
Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop )
Profiler::DequeueStatus Profiler::DequeueContextSwitches( int64_t& timeStop )
{
const auto sz = GetQueue().try_dequeue_bulk( token, m_itemBuf, BulkSize );
uint64_t threadId;
const auto sz = GetQueue().Dequeue( m_itemBuf, TargetFrameSize, threadId );
if( sz > 0 )
{
auto end = m_itemBuf + sz;
auto item = m_itemBuf;
while( item != end )
auto data = m_itemBuf;
while( data < end )
{
FreeAssociatedMemory( *item );
const auto idx = MemRead<uint8_t>( &item->hdr.idx );
const auto idx = MemRead<uint8_t>( data );
const auto itemsz = QueueDataSize[idx];
FreeBufferAssociatedMemory( data, itemsz );
if( idx == (uint8_t)QueueType::ContextSwitch )
{
const auto csTime = MemRead<int64_t>( &item->contextSwitch.time );
const auto csTime = MemRead<int64_t>( data + offsetof( QueueItem, contextSwitch.time ) );
if( csTime > timeStop )
{
timeStop = -1;
@@ -1872,12 +1857,12 @@ Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::Con
}
int64_t dt = csTime - m_refTimeCtx;
m_refTimeCtx = csTime;
MemWrite( &item->contextSwitch.time, dt );
if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) ) return DequeueStatus::ConnectionLost;
MemWrite( data + offsetof( QueueItem, contextSwitch.time ), dt );
if( !AppendData( data, itemsz ) ) return DequeueStatus::ConnectionLost;
}
else if( idx == (uint8_t)QueueType::ThreadWakeup )
{
const auto csTime = MemRead<int64_t>( &item->threadWakeup.time );
const auto csTime = MemRead<int64_t>( data + offsetof( QueueItem, threadWakeup.time ) );
if( csTime > timeStop )
{
timeStop = -1;
@@ -1885,10 +1870,10 @@ Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::Con
}
int64_t dt = csTime - m_refTimeCtx;
m_refTimeCtx = csTime;
MemWrite( &item->threadWakeup.time, dt );
if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) ) return DequeueStatus::ConnectionLost;
MemWrite( data + offsetof( QueueItem, threadWakeup.time ), dt );
if( !AppendData( data, itemsz ) ) return DequeueStatus::ConnectionLost;
}
item++;
data += itemsz;
}
}
else
@@ -2151,7 +2136,7 @@ void Profiler::SendCallstackPayload( uint64_t _ptr )
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] );
AppendDataUnsafe( &l16, sizeof( l16 ) );
if( compile_time_condition<sizeof( uintptr_t ) == sizeof( uint64_t )>::value )
if( sizeof( uintptr_t ) == sizeof( uint64_t ) )
{
AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz );
}
@@ -2280,15 +2265,13 @@ bool Profiler::HandleServerQuery()
void Profiler::HandleDisconnect()
{
moodycamel::ConsumerToken token( GetQueue() );
#ifdef TRACY_HAS_SYSTEM_TRACING
if( s_sysTraceThread )
{
auto timestamp = GetTime();
for(;;)
{
const auto status = DequeueContextSwitches( token, timestamp );
const auto status = DequeueContextSwitches( timestamp );
if( status == DequeueStatus::ConnectionLost )
{
return;
@@ -2337,7 +2320,7 @@ void Profiler::HandleDisconnect()
if( !SendData( (const char*)&terminate, 1 ) ) return;
for(;;)
{
ClearQueues( token );
ClearQueues();
if( m_sock->HasData() )
{
while( m_sock->HasData() )
@@ -2388,7 +2371,7 @@ void Profiler::CalibrateTimer()
void Profiler::CalibrateDelay()
{
enum { Iterations = 50000 };
enum { Iterations = LfqBlock::BlockSize / ( QueueDataSize[(uint8_t)QueueType::ZoneBegin] + QueueDataSize[(uint8_t)QueueType::ZoneEnd] ) };
auto mindiff = std::numeric_limits<int64_t>::max();
for( int i=0; i<Iterations * 10; i++ )
@@ -2404,52 +2387,36 @@ void Profiler::CalibrateDelay()
m_delay = m_resolution;
#else
enum { Events = Iterations * 2 }; // start + end
static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
moodycamel::ProducerToken ptoken_detail( GetQueue() );
moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptoken = GetQueue().get_explicit_producer( ptoken_detail );
static const tracy::SourceLocationData __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
const auto t0 = GetTime();
for( int i=0; i<Iterations; i++ )
{
{
Magic magic;
auto& tail = ptoken->get_tail_index();
auto item = ptoken->enqueue_begin( magic );
TracyLfqPrepare( QueueType::ZoneBegin );
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
{
Magic magic;
auto& tail = ptoken->get_tail_index();
auto item = ptoken->enqueue_begin( magic );
TracyLfqPrepare( QueueType::ZoneEnd );
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
MemWrite( &item->zoneEnd.time, GetTime() );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
}
const auto t1 = GetTime();
const auto dt = t1 - t0;
m_delay = dt / Events;
enum { Bulk = 1000 };
moodycamel::ConsumerToken token( GetQueue() );
int left = Events;
QueueItem item[Bulk];
while( left != 0 )
{
const auto sz = GetQueue().try_dequeue_bulk( token, item, std::min( left, (int)Bulk ) );
assert( sz > 0 );
left -= (int)sz;
}
assert( GetQueue().size_approx() == 0 );
LfqProducer::FlushData();
#endif
}
void Profiler::ReportTopology()
{
#ifndef TRACY_DELAYED_INIT
struct CpuData
{
uint32_t package;
@@ -2521,15 +2488,11 @@ void Profiler::ReportTopology()
idx++;
}
Magic magic;
auto token = GetToken();
for( uint32_t i=0; i<numcpus; i++ )
{
auto& data = cpuData[i];
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::CpuTopology );
TracyLfqPrepare( QueueType::CpuTopology );
MemWrite( &item->cpuTopology.package, data.package );
MemWrite( &item->cpuTopology.core, data.core );
MemWrite( &item->cpuTopology.thread, data.thread );
@@ -2538,7 +2501,7 @@ void Profiler::ReportTopology()
DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
tracy_free( cpuData );
@@ -2570,15 +2533,11 @@ void Profiler::ReportTopology()
cpuData[i].core = uint32_t( atoi( buf ) );
}
Magic magic;
auto token = GetToken();
for( uint32_t i=0; i<numcpus; i++ )
{
auto& data = cpuData[i];
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::CpuTopology );
TracyLfqPrepare( QueueType::CpuTopology );
MemWrite( &item->cpuTopology.package, data.package );
MemWrite( &item->cpuTopology.core, data.core );
MemWrite( &item->cpuTopology.thread, data.thread );
@@ -2587,11 +2546,12 @@ void Profiler::ReportTopology()
DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
tracy_free( cpuData );
#endif
#endif
}
void Profiler::SendCallstack( int depth, const char* skipBefore )
@@ -2600,13 +2560,9 @@ void Profiler::SendCallstack( int depth, const char* skipBefore )
auto ptr = Callstack( depth );
CutCallstack( ptr, skipBefore );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::Callstack );
TracyLfqPrepare( QueueType::Callstack );
MemWrite( &item->callstack.ptr, ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
#endif
}
@@ -2647,14 +2603,10 @@ void Profiler::ProcessSysTime()
{
m_sysTimeLast = t;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::SysTimeReport );
TracyLfqPrepare( QueueType::SysTimeReport );
MemWrite( &item->sysTime.time, GetTime() );
MemWrite( &item->sysTime.sysTime, sysTime );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
}
}
@@ -2662,11 +2614,7 @@ void Profiler::ProcessSysTime()
void Profiler::ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val )
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ParamSetup );
TracyLfqPrepare( QueueType::ParamSetup );
tracy::MemWrite( &item->paramSetup.idx, idx );
tracy::MemWrite( &item->paramSetup.name, (uint64_t)name );
tracy::MemWrite( &item->paramSetup.isBool, (uint8_t)isBool );
@@ -2676,7 +2624,7 @@ void Profiler::ParameterSetup( uint32_t idx, const char* name, bool isBool, int3
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
void Profiler::HandleParameter( uint64_t payload )
@@ -2707,24 +2655,16 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_l
#ifndef TRACY_NO_VERIFY
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
tracy::MemWrite( &item->zoneValidation.id, id );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
#endif
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBegin );
TracyLfqPrepareC( tracy::QueueType::ZoneBegin );
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
return ctx;
}
@@ -2743,24 +2683,16 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___trac
#ifndef TRACY_NO_VERIFY
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
tracy::MemWrite( &item->zoneValidation.id, id );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
#endif
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBeginCallstack );
TracyLfqPrepareC( tracy::QueueType::ZoneBeginCallstack );
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
tracy::GetProfiler().SendCallstack( depth );
@@ -2785,24 +2717,16 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int act
#ifndef TRACY_NO_VERIFY
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
tracy::MemWrite( &item->zoneValidation.id, id );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
#endif
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBeginAllocSrcLoc );
TracyLfqPrepareC( tracy::QueueType::ZoneBeginAllocSrcLoc );
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->zoneBegin.srcloc, srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
return ctx;
}
@@ -2825,24 +2749,16 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srclo
#ifndef TRACY_NO_VERIFY
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
tracy::MemWrite( &item->zoneValidation.id, id );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
#endif
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBeginAllocSrcLocCallstack );
TracyLfqPrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack );
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->zoneBegin.srcloc, srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
tracy::GetProfiler().SendCallstack( depth );
@@ -2854,23 +2770,15 @@ TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx )
if( !ctx.active ) return;
#ifndef TRACY_NO_VERIFY
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
tracy::MemWrite( &item->zoneValidation.id, ctx.id );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
#endif
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneEnd );
TracyLfqPrepareC( tracy::QueueType::ZoneEnd );
tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
}
@@ -2882,23 +2790,15 @@ TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size
ptr[size] = '\0';
#ifndef TRACY_NO_VERIFY
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
tracy::MemWrite( &item->zoneValidation.id, ctx.id );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
#endif
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneText );
TracyLfqPrepareC( tracy::QueueType::ZoneText );
tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
}
@@ -2910,23 +2810,15 @@ TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size
ptr[size] = '\0';
#ifndef TRACY_NO_VERIFY
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
tracy::MemWrite( &item->zoneValidation.id, ctx.id );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
#endif
{
tracy::Magic magic;
auto token = tracy::GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneName );
TracyLfqPrepareC( tracy::QueueType::ZoneName );
tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommitC;
}
}

View File

@@ -6,10 +6,10 @@
#include <stdint.h>
#include <string.h>
#include "tracy_concurrentqueue.h"
#include "TracyCallstack.hpp"
#include "TracySysTime.hpp"
#include "TracyFastVector.hpp"
#include "TracyLfq.hpp"
#include "../common/TracyQueue.hpp"
#include "../common/TracyAlign.hpp"
#include "../common/TracyAlloc.hpp"
@@ -51,7 +51,7 @@ struct GpuCtxWrapper
GpuCtx* ptr;
};
TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken();
TRACY_API LfqProducer& GetProducer();
TRACY_API Profiler& GetProfiler();
TRACY_API std::atomic<uint32_t>& GetLockCounter();
TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
@@ -77,8 +77,6 @@ struct LuaZoneState
};
#endif
using Magic = moodycamel::ConcurrentQueueDefaultTraits::index_t;
typedef void(*ParameterCallback)( uint32_t idx, int32_t val );
@@ -152,14 +150,10 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::FrameMarkMsg );
TracyLfqPrepare( QueueType::FrameMarkMsg );
MemWrite( &item->frameMark.time, GetTime() );
MemWrite( &item->frameMark.name, uint64_t( name ) );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
static tracy_force_inline void SendFrameMark( const char* name, QueueType type )
@@ -201,16 +195,12 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::PlotData );
TracyLfqPrepare( QueueType::PlotData );
MemWrite( &item->plotData.name, (uint64_t)name );
MemWrite( &item->plotData.time, GetTime() );
MemWrite( &item->plotData.type, PlotDataType::Int );
MemWrite( &item->plotData.data.i, val );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
static tracy_force_inline void PlotData( const char* name, float val )
@@ -218,16 +208,12 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::PlotData );
TracyLfqPrepare( QueueType::PlotData );
MemWrite( &item->plotData.name, (uint64_t)name );
MemWrite( &item->plotData.time, GetTime() );
MemWrite( &item->plotData.type, PlotDataType::Float );
MemWrite( &item->plotData.data.f, val );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
static tracy_force_inline void PlotData( const char* name, double val )
@@ -235,25 +221,17 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::PlotData );
TracyLfqPrepare( QueueType::PlotData );
MemWrite( &item->plotData.name, (uint64_t)name );
MemWrite( &item->plotData.time, GetTime() );
MemWrite( &item->plotData.type, PlotDataType::Double );
MemWrite( &item->plotData.data.d, val );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
static tracy_force_inline void ConfigurePlot( const char* name, PlotFormatType type )
{
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::PlotConfig );
TracyLfqPrepare( QueueType::PlotConfig );
MemWrite( &item->plotConfig.name, (uint64_t)name );
MemWrite( &item->plotConfig.type, (uint8_t)type );
@@ -261,7 +239,7 @@ public:
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
@@ -269,17 +247,13 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
}
@@ -289,14 +263,10 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.text, (uint64_t)txt );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
}
@@ -306,20 +276,16 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
MemWrite( &item->messageColor.time, GetTime() );
MemWrite( &item->messageColor.text, (uint64_t)ptr );
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
}
@@ -329,31 +295,23 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
MemWrite( &item->messageColor.time, GetTime() );
MemWrite( &item->messageColor.text, (uint64_t)txt );
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
}
static tracy_force_inline void MessageAppInfo( const char* txt, size_t size )
{
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::MessageAppInfo );
TracyLfqPrepare( QueueType::MessageAppInfo );
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.text, (uint64_t)ptr );
@@ -361,7 +319,7 @@ public:
GetProfiler().DeferItem( *item );
#endif
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
static tracy_force_inline void MemAlloc( const void* ptr, size_t size )
@@ -434,13 +392,9 @@ public:
{
#ifdef TRACY_HAS_CALLSTACK
auto ptr = Callstack( depth );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::Callstack );
TracyLfqPrepare( QueueType::Callstack );
MemWrite( &item->callstack.ptr, ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
#endif
}
@@ -526,10 +480,10 @@ private:
static void LaunchCompressWorker( void* ptr ) { ((Profiler*)ptr)->CompressWorker(); }
void CompressWorker();
void ClearQueues( tracy::moodycamel::ConsumerToken& token );
void ClearQueues();
void ClearSerial();
DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token );
DequeueStatus DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop );
DequeueStatus Dequeue();
DequeueStatus DequeueContextSwitches( int64_t& timeStop );
DequeueStatus DequeueSerial();
bool AppendData( const void* data, size_t len );
bool CommitData();
@@ -576,7 +530,7 @@ private:
MemWrite( &item->memAlloc.time, GetTime() );
MemWrite( &item->memAlloc.thread, thread );
MemWrite( &item->memAlloc.ptr, (uint64_t)ptr );
if( compile_time_condition<sizeof( size ) == 4>::value )
if( sizeof( size ) == 4 )
{
memcpy( &item->memAlloc.size, &size, 4 );
memset( &item->memAlloc.size + 4, 0, 2 );
@@ -626,7 +580,7 @@ private:
int m_bufferOffset;
int m_bufferStart;
QueueItem* m_itemBuf;
char* m_itemBuf;
char* m_lz4Buf;
FastVector<QueueItem> m_serialQueue, m_serialDequeue;

View File

@@ -24,14 +24,10 @@ public:
#endif
{
if( !m_active ) return;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBegin );
TracyLfqPrepare( QueueType::ZoneBegin );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true )
@@ -43,14 +39,10 @@ public:
#endif
{
if( !m_active ) return;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneBeginCallstack );
TracyLfqPrepare( QueueType::ZoneBeginCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
GetProfiler().SendCallstack( depth );
}
@@ -61,13 +53,9 @@ public:
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneEnd );
TracyLfqPrepare( QueueType::ZoneEnd );
MemWrite( &item->zoneEnd.time, Profiler::GetTime() );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
tracy_force_inline void Text( const char* txt, size_t size )
@@ -76,16 +64,12 @@ public:
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneText );
TracyLfqPrepare( QueueType::ZoneText );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
tracy_force_inline void Name( const char* txt, size_t size )
@@ -94,16 +78,12 @@ public:
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
Magic magic;
auto token = GetToken();
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ZoneName );
TracyLfqPrepare( QueueType::ZoneName );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
private:

View File

@@ -9,6 +9,7 @@
# endif
# define INITGUID
# include <algorithm>
# include <assert.h>
# include <string.h>
# include <windows.h>
@@ -65,11 +66,7 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
{
const auto cswitch = (const CSwitch*)record->UserData;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ContextSwitch );
TracyLfqPrepare( QueueType::ContextSwitch );
MemWrite( &item->contextSwitch.time, hdr.TimeStamp.QuadPart );
memcpy( &item->contextSwitch.oldThread, &cswitch->oldThreadId, sizeof( cswitch->oldThreadId ) );
memcpy( &item->contextSwitch.newThread, &cswitch->newThreadId, sizeof( cswitch->newThreadId ) );
@@ -78,21 +75,17 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber );
MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason );
MemWrite( &item->contextSwitch.state, cswitch->oldThreadState );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
else if( hdr.EventDescriptor.Opcode == 50 )
{
const auto rt = (const ReadyThread*)record->UserData;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ThreadWakeup );
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart );
memcpy( &item->threadWakeup.thread, &rt->threadId, sizeof( rt->threadId ) );
memset( ((char*)&item->threadWakeup.thread)+4, 0, 4 );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
}
@@ -279,14 +272,10 @@ void SysTraceSendExternalName( uint64_t thread )
{
{
uint64_t _pid = pid;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::TidToPid );
TracyLfqPrepare( QueueType::TidToPid );
MemWrite( &item->tidToPid.tid, thread );
MemWrite( &item->tidToPid.pid, _pid );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
if( pid == 4 )
{
@@ -607,18 +596,14 @@ static void HandleTraceLine( const char* line )
uint8_t reason = 100;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ContextSwitch );
TracyLfqPrepare( QueueType::ContextSwitch );
MemWrite( &item->contextSwitch.time, time );
MemWrite( &item->contextSwitch.oldThread, oldPid );
MemWrite( &item->contextSwitch.newThread, newPid );
MemWrite( &item->contextSwitch.cpu, cpu );
MemWrite( &item->contextSwitch.reason, reason );
MemWrite( &item->contextSwitch.state, oldState );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
else if( memcmp( line, "sched_wakeup", 12 ) == 0 )
{
@@ -629,14 +614,10 @@ static void HandleTraceLine( const char* line )
const auto pid = ReadNumber( line );
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::ThreadWakeup );
TracyLfqPrepare( QueueType::ThreadWakeup );
MemWrite( &item->threadWakeup.time, time );
MemWrite( &item->threadWakeup.thread, pid );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
}
@@ -830,14 +811,10 @@ void SysTraceSendExternalName( uint64_t thread )
{
{
uint64_t _pid = pid;
Magic magic;
auto token = GetToken();
auto& tail = token->get_tail_index();
auto item = token->enqueue_begin( magic );
MemWrite( &item->hdr.type, QueueType::TidToPid );
TracyLfqPrepare( QueueType::TidToPid );
MemWrite( &item->tidToPid.tid, thread );
MemWrite( &item->tidToPid.pid, _pid );
tail.store( magic + 1, std::memory_order_release );
TracyLfqCommit;
}
sprintf( fn, "/proc/%i/comm", pid );
f = fopen( fn, "rb" );

File diff suppressed because it is too large Load Diff

View File

@@ -423,7 +423,7 @@ struct QueueItem
enum { QueueItemSize = sizeof( QueueItem ) };
static const size_t QueueDataSize[] = {
static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueZoneText ),
sizeof( QueueHeader ) + sizeof( QueueZoneText ), // zone name
sizeof( QueueHeader ) + sizeof( QueueMessage ),

View File

@@ -1,4 +1,4 @@
https://github.com/aras-p/ToyPathTracer
Modified to render only 10 frames. Client part requires 12 GB, server part
Modified to render only 10 frames. Client part requires 8 GB, server part
requires 7 GB.

View File

@@ -2438,7 +2438,6 @@ The following libraries are included with and used by the Tracy Profiler:
\item 2-clause BSD license
\begin{itemize}
\item concurrentqueue -- \url{https://github.com/cameron314/concurrentqueue}
\item LZ4 -- \url{https://github.com/lz4/lz4}
\item xxHash -- \url{https://github.com/Cyan4973/xxHash}
\end{itemize}

View File

@@ -10,9 +10,9 @@
#include <thread>
#include "TracyFileHeader.hpp"
#include "TracyYield.hpp"
#include "../common/tracy_lz4.hpp"
#include "../common/TracyForceInline.hpp"
#include "../common/TracyYield.hpp"
namespace tracy
{

View File

@@ -29,12 +29,12 @@
#include "../common/TracyProtocol.hpp"
#include "../common/TracySystem.hpp"
#include "../common/TracyYield.hpp"
#include "TracyFileRead.hpp"
#include "TracyFileWrite.hpp"
#include "TracyTaskDispatch.hpp"
#include "TracyVersion.hpp"
#include "TracyWorker.hpp"
#include "TracyYield.hpp"
#include "tracy_flat_hash_map.hpp"