Merge pull request #1390 from wolfpld/slomp/cuda-tests

Relocating existing CUDA tests
This commit is contained in:
Bartosz Taudul
2026-06-05 21:50:11 +02:00
committed by GitHub
6 changed files with 63 additions and 51 deletions

View File

@@ -1,49 +0,0 @@
TRACY_PUBLIC := ../../public
NVCC := nvcc
CXX := g++
CUPTI_INC := /usr/local/cuda/include
CUPTI_LIB := /usr/local/cuda/lib64
TRACY_SRCS := $(TRACY_PUBLIC)/TracyClient.cpp
INCLUDES := -I$(TRACY_PUBLIC) -I$(CUPTI_INC)
LIBS := -L$(CUPTI_LIB) -lcuda -lcupti -lpthread -ldl
CXXFLAGS_REL := -O2 -DTRACY_ENABLE
CXXFLAGS_DBG := -g -O0 -DTRACY_ENABLE
NVCCFLAGS_REL := -arch=native -O2 -DTRACY_ENABLE
NVCCFLAGS_DBG := -arch=native -g -O0 -DTRACY_ENABLE
.PHONY: all debug investigate investigate2 clean
all: repro
debug: repro_debug
investigate: test_corr_reuse
investigate2: test_graphid_recycle
# Release build
repro: repro.cu tracy_client.o
$(NVCC) $(NVCCFLAGS_REL) $(INCLUDES) -o $@ $< tracy_client.o $(LIBS)
tracy_client.o: $(TRACY_SRCS)
$(CXX) $(CXXFLAGS_REL) $(INCLUDES) -c -o $@ $<
# Debug build (asserts enabled, no NDEBUG)
repro_debug: repro.cu tracy_client_debug.o
$(NVCC) $(NVCCFLAGS_DBG) $(INCLUDES) -o $@ $< tracy_client_debug.o $(LIBS)
tracy_client_debug.o: $(TRACY_SRCS)
$(CXX) $(CXXFLAGS_DBG) $(INCLUDES) -c -o $@ $<
# Investigation: correlationId uniqueness per graph launch (no Tracy dependency)
test_corr_reuse: test_corr_reuse.cu
$(NVCC) $(NVCCFLAGS_REL) $(INCLUDES) -o $@ $< $(LIBS)
# Investigation: does CUPTI recycle graphId values after cudaGraphExecDestroy?
test_graphid_recycle: test_graphid_recycle.cu
$(NVCC) $(NVCCFLAGS_REL) $(INCLUDES) -o $@ $< $(LIBS)
clean:
rm -f repro repro_debug test_corr_reuse test_graphid_recycle tracy_client.o tracy_client_debug.o

View File

@@ -0,0 +1,60 @@
cmake_minimum_required(VERSION 3.18)
project(CUDAGraphReproTests LANGUAGES CXX CUDA)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 17)
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
set(CMAKE_CUDA_ARCHITECTURES native)
endif()
set(TRACY_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../.."
CACHE PATH "Root of the Tracy repository")
set(TRACY_PUBLIC "${TRACY_PATH}/public")
option(TRACY_ENABLE "Enable profiling" ON)
find_package(CUDAToolkit REQUIRED)
find_package(Threads REQUIRED)
# Tracy client (CXX-only, matching the Makefile's g++ step for TracyClient.cpp)
add_library(TracyClient STATIC ${TRACY_PUBLIC}/TracyClient.cpp)
target_include_directories(TracyClient PUBLIC ${TRACY_PUBLIC})
target_link_libraries(TracyClient PUBLIC Threads::Threads ${CMAKE_DL_LIBS})
if(TRACY_ENABLE)
target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE)
endif()
# repro: Tracy-integrated CUDA Graph reproducer
add_executable(repro repro.cu)
target_link_libraries(repro PRIVATE TracyClient CUDA::cupti CUDA::cuda_driver)
# Standalone CUPTI probes (no Tracy dependency)
add_executable(test_corr_reuse test_corr_reuse.cu)
target_link_libraries(test_corr_reuse PRIVATE CUDA::cupti CUDA::cuda_driver)
add_executable(test_graphid_recycle test_graphid_recycle.cu)
target_link_libraries(test_graphid_recycle PRIVATE CUDA::cupti CUDA::cuda_driver)
set(_all_targets repro test_corr_reuse test_graphid_recycle)
# ctest-related integration below
# to run the binaries via ctest:
# ctest --test-dir <cmake-build-dir> -R <binary-name> -C <build-config>
enable_testing()
foreach(_target ${_all_targets})
add_test(NAME ${_target} COMMAND ${_target})
endforeach()
# On Windows, CUPTI's DLL must be on PATH at runtime.
# Propagate the DLL directory to both the VS debugger and ctest.
if(WIN32)
set(_cupti_dir "$<TARGET_FILE_DIR:CUDA::cupti>")
foreach(_target ${_all_targets})
set_target_properties(${_target} PROPERTIES
VS_DEBUGGER_ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}")
set_tests_properties(${_target} PROPERTIES
ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}")
endforeach()
endif()

View File

@@ -14,8 +14,9 @@ drops every GPU zone.
## Build and run
```bash
make
./repro
cmake -S . -B ./build
cmake --build ./build --parallel --config Release
ctest --test-dir ./build -C Release -R repro
```
## What to expect