move the symbol resolving functionality to the update application instead of creating a new one

This commit is contained in:
Tiago Rodrigues
2023-11-24 20:48:39 -05:00
parent a1ca8dd06a
commit de6e7d2fc3
16 changed files with 90 additions and 876 deletions

View File

@@ -60,7 +60,7 @@
<AdditionalIncludeDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include;$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\include\capstone;$(VcpkgManifestRoot)\vcpkg_installed\$(VcpkgTriplet)\$(VcpkgTriplet)\include\capstone;$(VcpkgRoot)\installed\$(VcpkgTriplet)\include\capstone</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ws2_32.lib;capstone.lib;dbghelp.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\debug\lib</AdditionalLibraryDirectories>
</Link>
@@ -82,7 +82,7 @@
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ws2_32.lib;capstone.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>ws2_32.lib;capstone.lib;dbghelp.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
<AdditionalLibraryDirectories>$(ProjectDir)..\..\..\vcpkg_installed\$(VcpkgTriplet)\lib</AdditionalLibraryDirectories>
</Link>
@@ -130,6 +130,9 @@
<ClCompile Include="..\..\..\zstd\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\fastcover.c" />
<ClCompile Include="..\..\..\zstd\dictBuilder\zdict.c" />
<ClCompile Include="..\..\src\OfflineSymbolResolver.cpp" />
<ClCompile Include="..\..\src\OfflineSymbolResolverAddr2Line.cpp" />
<ClCompile Include="..\..\src\OfflineSymbolResolverDbgHelper.cpp" />
<ClCompile Include="..\..\src\update.cpp" />
</ItemGroup>
<ItemGroup>
@@ -200,6 +203,7 @@
<ClInclude Include="..\..\..\zstd\zdict.h" />
<ClInclude Include="..\..\..\zstd\zstd.h" />
<ClInclude Include="..\..\..\zstd\zstd_errors.h" />
<ClInclude Include="..\..\src\OfflineSymbolResolver.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S" />

View File

@@ -159,6 +159,15 @@
<ClCompile Include="..\..\..\public\common\TracySystem.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\..\src\OfflineSymbolResolver.cpp">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\..\src\OfflineSymbolResolverAddr2Line.cpp">
<Filter>src</Filter>
</ClCompile>
<ClCompile Include="..\..\src\OfflineSymbolResolverDbgHelper.cpp">
<Filter>src</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\..\server\TracyCharUtil.hpp">
@@ -362,6 +371,9 @@
<ClInclude Include="..\..\..\public\common\TracyYield.hpp">
<Filter>common</Filter>
</ClInclude>
<ClInclude Include="..\..\src\OfflineSymbolResolver.h">
<Filter>src</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\..\..\zstd\decompress\huf_decompress_amd64.S">

View File

@@ -0,0 +1,171 @@
#include <fstream>
#include <iostream>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unordered_map>
#include "../../server/TracyWorker.hpp"
#include "../../zstd/zstd.h"
#include "OfflineSymbolResolver.h"
bool ApplyPathSubstitutions( std::string& path, const PathSubstitutionList& pathSubstitutionlist )
{
for( const auto& substitution : pathSubstitutionlist )
{
if( std::regex_match(path, substitution.first) )
{
path = std::regex_replace( path, substitution.first, substitution.second );
return true;
}
}
return false;
}
tracy::StringIdx AddSymbolString( tracy::Worker& worker, const std::string& str )
{
// TODO: use string hash map to reduce potential string duplication?
tracy::StringLocation location = worker.StoreString( str.c_str(), str.length() );
return tracy::StringIdx( location.idx );
}
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstitutionlist, bool verbose )
{
uint64_t callstackFrameCount = worker.GetCallstackFrameCount();
std::string relativeSoNameMatch = "[unresolved]";
std::cout << "Found " << callstackFrameCount << " callstack frames. Batching into image groups..." << std::endl;
// batch the symbol queries by .so so we issue the least amount of requests
using FrameEntriesPerImageIdx = std::unordered_map<uint32_t, FrameEntryList>;
FrameEntriesPerImageIdx entriesPerImageIdx;
auto& callstackFrameMap = worker.GetCallstackFrameMap();
for( auto it = callstackFrameMap.begin(); it != callstackFrameMap.end(); ++it )
{
tracy::CallstackFrameData* frameDataPtr = it->second;
if( !frameDataPtr )
{
continue;
}
tracy::CallstackFrameData& frameData = *frameDataPtr;
const char* imageName = worker.GetString( frameData.imageName );
const uint32_t imageNameIdx = frameData.imageName.Idx();
FrameEntryList& entries = entriesPerImageIdx[imageNameIdx];
for( uint8_t f = 0; f < frameData.size; f++ )
{
tracy::CallstackFrame& frame = frameData.data[f];
// TODO: use a better way to identify symbols that are unresolved
const char* nameStr = worker.GetString(frame.name);
if( strncmp( nameStr, relativeSoNameMatch.c_str(), relativeSoNameMatch.length() ) == 0 )
{
// when doing offline resolving we pass the offset from the start of the shared library in the "symAddr"
const uint64_t decodedOffset = frame.symAddr;
entries.push_back( {&frame, decodedOffset} );
}
}
}
std::cout << "Batched into " << entriesPerImageIdx.size() << " unique image groups" << std::endl;
// FIXME: the resolving of symbols here can be slow and could be done in parallel per "image"
// - be careful with string allocation though as that would be not safe to do in parallel
for( FrameEntriesPerImageIdx::iterator imageIt = entriesPerImageIdx.begin(),
imageItEnd = entriesPerImageIdx.end(); imageIt != imageItEnd; ++imageIt )
{
tracy::StringIdx imageIdx( imageIt->first );
std::string imagePath = worker.GetString( imageIdx );
FrameEntryList& entries = imageIt->second;
if( !entries.size() ) continue;
std::cout << "Resolving " << entries.size() << " symbols for image: '"
<< imagePath << "'" << std::endl;
const bool substituted = ApplyPathSubstitutions( imagePath, pathSubstitutionlist );
if( substituted )
{
std::cout << "\tPath substituted to: '" << imagePath << "'" << std::endl;
}
SymbolEntryList resolvedEntries;
ResolveSymbols( imagePath, entries, resolvedEntries );
if( resolvedEntries.size() != entries.size() )
{
std::cerr << " failed to resolve all entries! (got: "
<< resolvedEntries.size() << ")" << std::endl;
continue;
}
// finally patch the string with the resolved symbol data
for ( size_t i = 0; i < resolvedEntries.size(); ++i )
{
FrameEntry& frameEntry = entries[i];
const SymbolEntry& symbolEntry = resolvedEntries[i];
tracy::CallstackFrame& frame = *frameEntry.frame;
if( !symbolEntry.name.length() ) continue;
if( verbose )
{
const char* nameStr = worker.GetString( frame.name );
std::cout << "patching '" << nameStr << "' of '" << imagePath
<< "' -> '" << symbolEntry.name << "'" << std::endl;
}
frame.name = AddSymbolString( worker, symbolEntry.name );
const char* newName = worker.GetString( frame.name );
if( symbolEntry.file.length() )
{
frame.file = AddSymbolString( worker, symbolEntry.file );
frame.line = symbolEntry.line;
}
}
}
return true;
}
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings, bool verbose )
{
std::cout << "Resolving and patching symbols..." << std::endl;
PathSubstitutionList pathSubstitutionList;
for ( const std::string& pathSubst : pathSubstitutionsStrings )
{
std::size_t pos = pathSubst.find(';');
if ( pos == std::string::npos )
{
std::cerr << "Ignoring invalid path substitution: '" << pathSubst
<< " '(please separate the regex of the string to replace with a ';')" << std::endl;
continue;
}
try
{
std::regex reg(pathSubst.substr(0, pos));
std::string replacementStr(pathSubst.substr(pos + 1));
pathSubstitutionList.push_back(std::pair(reg, replacementStr));
}
catch ( std::exception& e )
{
std::cerr << "Ignoring invalid path substitution: '" << pathSubst
<< "' (" << e.what() << ")" << std::endl;
continue;
}
}
if ( !PatchSymbolsWithRegex(worker, pathSubstitutionList, verbose) )
{
std::cerr << "Failed to patch symbols" << std::endl;
}
}

View File

@@ -0,0 +1,40 @@
#ifndef __SYMBOLRESOLVER_HPP__
#define __SYMBOLRESOLVER_HPP__
#include <string>
#include <vector>
#include <regex>
#include <cstdint>
namespace tracy
{
struct CallstackFrame;
class Worker;
}
struct FrameEntry
{
tracy::CallstackFrame* frame = nullptr;
uint64_t symbolOffset = 0;
};
using FrameEntryList = std::vector<FrameEntry>;
struct SymbolEntry
{
std::string name;
std::string file;
int line = 0;
};
using SymbolEntryList = std::vector<SymbolEntry>;
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
SymbolEntryList& resolvedEntries );
void PatchSymbols( tracy::Worker& worker, const std::vector<std::string>& pathSubstitutionsStrings, bool verbose = false );
using PathSubstitutionList = std::vector<std::pair<std::regex, std::string> >;
bool PatchSymbolsWithRegex( tracy::Worker& worker, const PathSubstitutionList& pathSubstituionlist, bool verbose = false );
#endif // __SYMBOLRESOLVER_HPP__

View File

@@ -0,0 +1,114 @@
#ifndef _WIN32
#include "OfflineSymbolResolver.h"
#include <fstream>
#include <iostream>
#include <string>
#include <array>
#include <sstream>
#include <memory>
#include <stdio.h>
std::string ExecShellCommand( const char* cmd )
{
std::array<char, 128> buffer;
std::string result;
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
if( !pipe )
{
return "";
}
while( fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr )
{
result += buffer.data();
}
return result;
}
class SymbolResolver
{
public:
SymbolResolver()
{
std::stringstream result( ExecShellCommand("which addr2line") );
std::getline(result, m_addr2LinePath);
if( !m_addr2LinePath.length() )
{
std::cerr << "'addr2line' was not found in the system, please installed it" << std::endl;
}
else
{
std::cout << "Using 'addr2line' found at: '" << m_addr2LinePath.c_str() << "'" << std::endl;
}
}
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
SymbolEntryList& resolvedEntries )
{
if (!m_addr2LinePath.length()) return false;
// generate a single addr2line cmd line for all addresses in one invocation
std::stringstream ss;
ss << m_addr2LinePath << " -C -f -e " << imagePath << " -a ";
for ( const FrameEntry& entry : inputEntryList )
{
ss << " 0x" << std::hex << entry.symbolOffset;
}
std::string resultStr = ExecShellCommand( ss.str().c_str() );
std::stringstream result(resultStr);
//printf("executing: '%s' got '%s'\n", ss.str().c_str(), result.str().c_str());
// The output is 2 lines per entry with the following contents:
// hex_address_of_symbol
// symbol_name
// file:line
for( size_t i = 0; i < inputEntryList.size(); ++i )
{
const FrameEntry& inputEntry = inputEntryList[i];
SymbolEntry newEntry;
std::string addr;
std::getline( result, addr );
std::getline( result, newEntry.name );
if (newEntry.name == "??")
{
newEntry.name = "[unknown] + " + std::to_string(inputEntry.symbolOffset);
}
std::string fileLine;
std::getline(result, fileLine);
if ( fileLine != "??:?" )
{
size_t pos = fileLine.find_last_of(':');
if ( pos != std::string::npos )
{
newEntry.file = fileLine.substr( 0, pos );
std::string lineStr = fileLine.substr( pos + 1 );
char* after = nullptr;
newEntry.line = strtol( lineStr.c_str(), &after, 10 );
}
}
resolvedEntries.push_back( std::move(newEntry) );
}
return true;
}
private:
std::string m_addr2LinePath;
};
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
SymbolEntryList& resolvedEntries )
{
static SymbolResolver symbolResolver;
return symbolResolver.ResolveSymbols( imagePath, inputEntryList, resolvedEntries );
}
#endif // #ifndef _WIN32

View File

@@ -0,0 +1,130 @@
#ifdef _WIN32
#ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <dbghelp.h>
#include <cstdio>
#include <iostream>
#include <stdint.h>
#include <stdlib.h>
#include <windows.h>
#include <string>
#include "OfflineSymbolResolver.h"
class SymbolResolver
{
public:
SymbolResolver()
{
m_procHandle = GetCurrentProcess();
if( !SymInitialize(m_procHandle, NULL, FALSE) )
{
std::cerr << "SymInitialize() failed with: " << GetLastErrorString() << std::endl;
}
else
{
const DWORD symopts = SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES;
SymSetOptions( symopts );
m_dbgHelpInitialized = true;
}
}
~SymbolResolver()
{
SymCleanup( m_procHandle );
}
bool ResolveSymbolsForModule( const std::string& imagePath, const FrameEntryList& inputEntryList,
SymbolEntryList& resolvedEntries )
{
if( !m_dbgHelpInitialized ) return false;
ULONG64 moduleBase = SymLoadModuleEx( m_procHandle, NULL, imagePath.c_str(), NULL, 0, 0, NULL, 0 );
if( !moduleBase )
{
std::cerr << "SymLoadModuleEx() failed for module " << imagePath
<< ": " << GetLastErrorString() << std::endl;
return false;
}
for( size_t i = 0; i < inputEntryList.size(); ++i )
{
uint64_t offset = inputEntryList[i].symbolOffset;
DWORD64 address = moduleBase + offset;
SYMBOL_INFO* symbolInfo = (SYMBOL_INFO*)s_symbolResolutionBuffer;
symbolInfo->SizeOfStruct = sizeof(SYMBOL_INFO);
symbolInfo->MaxNameLen = MAX_SYM_NAME;
SymbolEntry newEntry;
if( SymFromAddr( m_procHandle, address, NULL, symbolInfo ) )
{
newEntry.name = symbolInfo->Name;
//std::cout << "Resolved symbol to: '" << newEntry.name << "'" << std::endl;
}
else
{
newEntry.name = "[unknown] + " + std::to_string(offset);
}
IMAGEHLP_LINE lineInfo = { 0 };
lineInfo.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
DWORD displaceMent = 0;
if ( SymGetLineFromAddr64( m_procHandle, address, &displaceMent, &lineInfo ) )
{
newEntry.file = lineInfo.FileName;
newEntry.line = int(lineInfo.LineNumber);
///std::cout << "\tline_file: " lineInfo.FileName << ":" << int(lineInfo.LineNumber) << std::endl;
}
resolvedEntries.push_back(std::move(newEntry));
}
SymUnloadModule64( m_procHandle, moduleBase );
return true;
}
private:
static const size_t symbolResolutionBufferSize = sizeof(SYMBOL_INFO) + MAX_SYM_NAME;
static char s_symbolResolutionBuffer[symbolResolutionBufferSize];
std::string GetLastErrorString()
{
DWORD error = GetLastError();
if (error == 0)
{
return "";
}
LPSTR messageBuffer = nullptr;
DWORD dwFlags = FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS;
size_t size = FormatMessageA( dwFlags, NULL, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPSTR)&messageBuffer, 0, NULL );
std::string message(messageBuffer, size);
LocalFree(messageBuffer);
return message;
}
bool m_dbgHelpInitialized = false;
HANDLE m_procHandle = nullptr;
};
char SymbolResolver::s_symbolResolutionBuffer[symbolResolutionBufferSize];
bool ResolveSymbols( const std::string& imagePath, const FrameEntryList& inputEntryList,
SymbolEntryList& resolvedEntries )
{
static SymbolResolver resolver;
return resolver.ResolveSymbolsForModule( imagePath, inputEntryList, resolvedEntries );
}
#endif // #ifdef _WIN32

View File

@@ -15,6 +15,8 @@
#include "../../zstd/zstd.h"
#include "../../getopt/getopt.h"
#include "OfflineSymbolResolver.h"
#ifdef __APPLE__
# define ftello64(x) ftello(x)
#elif defined _WIN32
@@ -32,6 +34,9 @@ void Usage()
printf( " l: locks, m: messages, p: plots, M: memory, i: frame images\n" );
printf( " c: context switches, s: sampling data, C: symbol code, S: source cache\n" );
printf( " -c: scan for source files missing in cache and add if found\n" );
printf( " -r resolve symbols and patch callstack frames\n");
printf( " -p: substitute symbol resolution path with an alternative: \"REGEX_MATCH;REPLACEMENT\"\n");
exit( 1 );
}
@@ -50,8 +55,11 @@ int main( int argc, char** argv )
int zstdLevel = 1;
bool buildDict = false;
bool cacheSource = false;
bool resolveSymbols = false;
std::vector<std::string> pathSubstitutions;
int c;
while( ( c = getopt( argc, argv, "hez:ds:c" ) ) != -1 )
while( ( c = getopt( argc, argv, "hez:ds:crp:" ) ) != -1 )
{
switch( c )
{
@@ -118,12 +126,19 @@ int main( int argc, char** argv )
case 'c':
cacheSource = true;
break;
case 'r':
resolveSymbols = true;
break;
case 'p':
pathSubstitutions.push_back(optarg);
break;
default:
Usage();
break;
}
}
if( argc - optind != 2 ) Usage();
if (argc != optind + 2) Usage();
const char* input = argv[optind];
const char* output = argv[optind+1];
@@ -144,7 +159,9 @@ int main( int argc, char** argv )
int inVer;
{
const auto t0 = std::chrono::high_resolution_clock::now();
tracy::Worker worker( *f, (tracy::EventType::Type)events, false );
const bool allowBgThreads = false;
const bool allowStringModification = resolveSymbols;
tracy::Worker worker( *f, (tracy::EventType::Type)events, allowBgThreads, allowStringModification);
#ifndef TRACY_NO_STATISTICS
while( !worker.AreSourceLocationZonesReady() ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
@@ -152,6 +169,8 @@ int main( int argc, char** argv )
if( cacheSource ) worker.CacheSourceFiles();
if ( resolveSymbols ) PatchSymbols( worker, pathSubstitutions );
auto w = std::unique_ptr<tracy::FileWrite>( tracy::FileWrite::Open( output, clev, zstdLevel ) );
if( !w )
{