From 77fb86155f63e87c6cf37cc2d0169b1389305c03 Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Fri, 5 Jun 2026 10:14:14 -0700 Subject: [PATCH 1/6] relocating CUDAGraphRepro "example" to the tests folder --- {examples/CUDAGraphRepro => tests/cuda/repro/graph}/Makefile | 3 ++- {examples/CUDAGraphRepro => tests/cuda/repro/graph}/README.md | 0 {examples/CUDAGraphRepro => tests/cuda/repro/graph}/repro.cu | 0 .../cuda/repro/graph}/test_corr_reuse.cu | 0 .../cuda/repro/graph}/test_graphid_recycle.cu | 0 5 files changed, 2 insertions(+), 1 deletion(-) rename {examples/CUDAGraphRepro => tests/cuda/repro/graph}/Makefile (95%) rename {examples/CUDAGraphRepro => tests/cuda/repro/graph}/README.md (100%) rename {examples/CUDAGraphRepro => tests/cuda/repro/graph}/repro.cu (100%) rename {examples/CUDAGraphRepro => tests/cuda/repro/graph}/test_corr_reuse.cu (100%) rename {examples/CUDAGraphRepro => tests/cuda/repro/graph}/test_graphid_recycle.cu (100%) diff --git a/examples/CUDAGraphRepro/Makefile b/tests/cuda/repro/graph/Makefile similarity index 95% rename from examples/CUDAGraphRepro/Makefile rename to tests/cuda/repro/graph/Makefile index 07a06a19..85af04a9 100644 --- a/examples/CUDAGraphRepro/Makefile +++ b/tests/cuda/repro/graph/Makefile @@ -1,9 +1,10 @@ -TRACY_PUBLIC := ../../public +TRACY_PATH := ../../../.. NVCC := nvcc CXX := g++ CUPTI_INC := /usr/local/cuda/include CUPTI_LIB := /usr/local/cuda/lib64 +TRACY_PUBLIC := $(TRACY_PATH)/public TRACY_SRCS := $(TRACY_PUBLIC)/TracyClient.cpp INCLUDES := -I$(TRACY_PUBLIC) -I$(CUPTI_INC) LIBS := -L$(CUPTI_LIB) -lcuda -lcupti -lpthread -ldl diff --git a/examples/CUDAGraphRepro/README.md b/tests/cuda/repro/graph/README.md similarity index 100% rename from examples/CUDAGraphRepro/README.md rename to tests/cuda/repro/graph/README.md diff --git a/examples/CUDAGraphRepro/repro.cu b/tests/cuda/repro/graph/repro.cu similarity index 100% rename from examples/CUDAGraphRepro/repro.cu rename to tests/cuda/repro/graph/repro.cu diff --git a/examples/CUDAGraphRepro/test_corr_reuse.cu b/tests/cuda/repro/graph/test_corr_reuse.cu similarity index 100% rename from examples/CUDAGraphRepro/test_corr_reuse.cu rename to tests/cuda/repro/graph/test_corr_reuse.cu diff --git a/examples/CUDAGraphRepro/test_graphid_recycle.cu b/tests/cuda/repro/graph/test_graphid_recycle.cu similarity index 100% rename from examples/CUDAGraphRepro/test_graphid_recycle.cu rename to tests/cuda/repro/graph/test_graphid_recycle.cu From ae275f239d29928ac7de21aacf439e8c74c392a6 Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Fri, 5 Jun 2026 10:22:52 -0700 Subject: [PATCH 2/6] adding cmake recipe file --- tests/cuda/repro/graph/CMakeLists.txt | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 tests/cuda/repro/graph/CMakeLists.txt diff --git a/tests/cuda/repro/graph/CMakeLists.txt b/tests/cuda/repro/graph/CMakeLists.txt new file mode 100644 index 00000000..9ed8a3c8 --- /dev/null +++ b/tests/cuda/repro/graph/CMakeLists.txt @@ -0,0 +1,34 @@ +cmake_minimum_required(VERSION 3.25) +project(CUDAGraphReproTests LANGUAGES CXX CUDA) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CUDA_STANDARD 17) +set(CMAKE_CUDA_ARCHITECTURES native) + +set(TRACY_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../.." + CACHE PATH "Root of the Tracy repository") +set(TRACY_PUBLIC "${TRACY_PATH}/public") + +option(TRACY_ENABLE "Enable profiling" ON) + +find_package(CUDAToolkit REQUIRED) +find_package(Threads REQUIRED) + +# Tracy client (CXX-only, matching the Makefile's g++ step for TracyClient.cpp) +add_library(TracyClient STATIC ${TRACY_PUBLIC}/TracyClient.cpp) +target_include_directories(TracyClient PUBLIC ${TRACY_PUBLIC}) +target_link_libraries(TracyClient PUBLIC Threads::Threads ${CMAKE_DL_LIBS}) +if(TRACY_ENABLE) + target_compile_definitions(TracyClient PUBLIC TRACY_ENABLE) +endif() + +# repro: Tracy-integrated CUDA Graph reproducer +add_executable(repro repro.cu) +target_link_libraries(repro PRIVATE TracyClient CUDA::cupti CUDA::cuda_driver) + +# Standalone CUPTI probes (no Tracy dependency) +add_executable(test_corr_reuse test_corr_reuse.cu) +target_link_libraries(test_corr_reuse PRIVATE CUDA::cupti CUDA::cuda_driver) + +add_executable(test_graphid_recycle test_graphid_recycle.cu) +target_link_libraries(test_graphid_recycle PRIVATE CUDA::cupti CUDA::cuda_driver) From 79467b4b314981549a494fc264d556c96a200eb8 Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Fri, 5 Jun 2026 10:26:49 -0700 Subject: [PATCH 3/6] cmake version shenanigans --- tests/cuda/repro/graph/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/cuda/repro/graph/CMakeLists.txt b/tests/cuda/repro/graph/CMakeLists.txt index 9ed8a3c8..6496d395 100644 --- a/tests/cuda/repro/graph/CMakeLists.txt +++ b/tests/cuda/repro/graph/CMakeLists.txt @@ -1,9 +1,12 @@ -cmake_minimum_required(VERSION 3.25) +cmake_minimum_required(VERSION 3.18) project(CUDAGraphReproTests LANGUAGES CXX CUDA) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CUDA_STANDARD 17) -set(CMAKE_CUDA_ARCHITECTURES native) + +if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24") + set(CMAKE_CUDA_ARCHITECTURES native) +endif() set(TRACY_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../.." CACHE PATH "Root of the Tracy repository") From d89c956394c80e9883b2e6d614d42d829ac810e1 Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Fri, 5 Jun 2026 10:34:34 -0700 Subject: [PATCH 4/6] CUPTI DLL paths... --- tests/cuda/repro/graph/CMakeLists.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/cuda/repro/graph/CMakeLists.txt b/tests/cuda/repro/graph/CMakeLists.txt index 6496d395..a2bb1624 100644 --- a/tests/cuda/repro/graph/CMakeLists.txt +++ b/tests/cuda/repro/graph/CMakeLists.txt @@ -35,3 +35,22 @@ target_link_libraries(test_corr_reuse PRIVATE CUDA::cupti CUDA::cuda_driver) add_executable(test_graphid_recycle test_graphid_recycle.cu) target_link_libraries(test_graphid_recycle PRIVATE CUDA::cupti CUDA::cuda_driver) + +set(_all_targets repro test_corr_reuse test_graphid_recycle) + +# On Windows, CUPTI's DLL must be on PATH at runtime. +# Propagate the DLL directory to both the VS debugger and ctest. +if(WIN32) + set(_cupti_dir "$") + foreach(_target ${_all_targets}) + set_target_properties(${_target} PROPERTIES + VS_DEBUGGER_ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}") + endforeach() + + enable_testing() + foreach(_target ${_all_targets}) + add_test(NAME ${_target} COMMAND ${_target}) + set_tests_properties(${_target} PROPERTIES + ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}") + endforeach() +endif() From 10d64d69b57a590ee454fd06c31f554b04ad0256 Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Fri, 5 Jun 2026 10:37:27 -0700 Subject: [PATCH 5/6] better ctest integration across the board --- tests/cuda/repro/graph/CMakeLists.txt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/cuda/repro/graph/CMakeLists.txt b/tests/cuda/repro/graph/CMakeLists.txt index a2bb1624..fb7fd33c 100644 --- a/tests/cuda/repro/graph/CMakeLists.txt +++ b/tests/cuda/repro/graph/CMakeLists.txt @@ -38,6 +38,15 @@ target_link_libraries(test_graphid_recycle PRIVATE CUDA::cupti CUDA::cuda_driver set(_all_targets repro test_corr_reuse test_graphid_recycle) +# ctest-related integration below +# to run the binaries via ctest: +# ctest --test-dir -R -C + +enable_testing() +foreach(_target ${_all_targets}) + add_test(NAME ${_target} COMMAND ${_target}) +endforeach() + # On Windows, CUPTI's DLL must be on PATH at runtime. # Propagate the DLL directory to both the VS debugger and ctest. if(WIN32) @@ -45,11 +54,6 @@ if(WIN32) foreach(_target ${_all_targets}) set_target_properties(${_target} PROPERTIES VS_DEBUGGER_ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}") - endforeach() - - enable_testing() - foreach(_target ${_all_targets}) - add_test(NAME ${_target} COMMAND ${_target}) set_tests_properties(${_target} PROPERTIES ENVIRONMENT "PATH=${_cupti_dir};$ENV{PATH}") endforeach() From 19549693a0d012b64d7d23688e189a501d491a7d Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Fri, 5 Jun 2026 10:45:07 -0700 Subject: [PATCH 6/6] removing Makefile --- tests/cuda/repro/graph/Makefile | 50 -------------------------------- tests/cuda/repro/graph/README.md | 5 ++-- 2 files changed, 3 insertions(+), 52 deletions(-) delete mode 100644 tests/cuda/repro/graph/Makefile diff --git a/tests/cuda/repro/graph/Makefile b/tests/cuda/repro/graph/Makefile deleted file mode 100644 index 85af04a9..00000000 --- a/tests/cuda/repro/graph/Makefile +++ /dev/null @@ -1,50 +0,0 @@ -TRACY_PATH := ../../../.. -NVCC := nvcc -CXX := g++ -CUPTI_INC := /usr/local/cuda/include -CUPTI_LIB := /usr/local/cuda/lib64 - -TRACY_PUBLIC := $(TRACY_PATH)/public -TRACY_SRCS := $(TRACY_PUBLIC)/TracyClient.cpp -INCLUDES := -I$(TRACY_PUBLIC) -I$(CUPTI_INC) -LIBS := -L$(CUPTI_LIB) -lcuda -lcupti -lpthread -ldl - -CXXFLAGS_REL := -O2 -DTRACY_ENABLE -CXXFLAGS_DBG := -g -O0 -DTRACY_ENABLE -NVCCFLAGS_REL := -arch=native -O2 -DTRACY_ENABLE -NVCCFLAGS_DBG := -arch=native -g -O0 -DTRACY_ENABLE - -.PHONY: all debug investigate investigate2 clean - -all: repro - -debug: repro_debug - -investigate: test_corr_reuse - -investigate2: test_graphid_recycle - -# Release build -repro: repro.cu tracy_client.o - $(NVCC) $(NVCCFLAGS_REL) $(INCLUDES) -o $@ $< tracy_client.o $(LIBS) - -tracy_client.o: $(TRACY_SRCS) - $(CXX) $(CXXFLAGS_REL) $(INCLUDES) -c -o $@ $< - -# Debug build (asserts enabled, no NDEBUG) -repro_debug: repro.cu tracy_client_debug.o - $(NVCC) $(NVCCFLAGS_DBG) $(INCLUDES) -o $@ $< tracy_client_debug.o $(LIBS) - -tracy_client_debug.o: $(TRACY_SRCS) - $(CXX) $(CXXFLAGS_DBG) $(INCLUDES) -c -o $@ $< - -# Investigation: correlationId uniqueness per graph launch (no Tracy dependency) -test_corr_reuse: test_corr_reuse.cu - $(NVCC) $(NVCCFLAGS_REL) $(INCLUDES) -o $@ $< $(LIBS) - -# Investigation: does CUPTI recycle graphId values after cudaGraphExecDestroy? -test_graphid_recycle: test_graphid_recycle.cu - $(NVCC) $(NVCCFLAGS_REL) $(INCLUDES) -o $@ $< $(LIBS) - -clean: - rm -f repro repro_debug test_corr_reuse test_graphid_recycle tracy_client.o tracy_client_debug.o diff --git a/tests/cuda/repro/graph/README.md b/tests/cuda/repro/graph/README.md index f45d3efe..5704fcd8 100644 --- a/tests/cuda/repro/graph/README.md +++ b/tests/cuda/repro/graph/README.md @@ -14,8 +14,9 @@ drops every GPU zone. ## Build and run ```bash -make -./repro +cmake -S . -B ./build +cmake --build ./build --parallel --config Release +ctest --test-dir ./build -C Release -R repro ``` ## What to expect