From 2aeac50277e9bb9861deb8074a71de0792ca0da7 Mon Sep 17 00:00:00 2001 From: Syoyo Fujita Date: Fri, 20 Mar 2026 09:00:30 +0900 Subject: [PATCH] Add fast float parser and benchmark float-heavy scene MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace strtod() with Clinger's fast path in tinygltf_json.h for ~1.5x faster JSON float parsing. The new parser accumulates all digits into a uint64 mantissa and uses exact power-of-10 tables for conversion, avoiding locale-dependent strtod for ~99% of JSON float values. Add optional float32 parse mode (parse_float32 option) that parses JSON floats at single precision — fewer significant digits needed, wider fast path range. Breaks strict double-precision conformance but sufficient for glTF data which is typically single-precision. Benchmark additions: - gen_synthetic: add float_heavy preset (~500MB ASCII float JSON) - bench_v3: add --float32 flag for float32 parse mode benchmarking Co-Authored-By: Claude Opus 4.6 --- benchmark/Makefile | 70 ++++ benchmark/bench_v3.cpp | 396 +++++++++++++++++++ benchmark/gen_synthetic.cpp | 740 ++++++++++++++++++++++++++++++++++++ tiny_gltf_v3.h | 22 +- tinygltf_json.h | 293 +++++++++++--- 5 files changed, 1463 insertions(+), 58 deletions(-) create mode 100644 benchmark/Makefile create mode 100644 benchmark/bench_v3.cpp create mode 100644 benchmark/gen_synthetic.cpp diff --git a/benchmark/Makefile b/benchmark/Makefile new file mode 100644 index 0000000..928cdc9 --- /dev/null +++ b/benchmark/Makefile @@ -0,0 +1,70 @@ +# benchmark/Makefile — Build and run tinygltf v3 benchmarks +# +# Targets: +# make — build gen_synthetic + bench_v3 +# make generate — generate synthetic test scenes +# make run — run benchmarks on all generated scenes +# make report — run benchmarks and produce CSV report +# make clean — remove binaries and generated scenes + +CXX ?= g++ +CXXFLAGS ?= -O2 -std=c++17 -Wall -Wextra -Wno-unused-function +CXXFLAGS += -fno-rtti -fno-exceptions +INCLUDES = -I.. + +BINDIR = . +GEN = $(BINDIR)/gen_synthetic +BENCH_V3 = $(BINDIR)/bench_v3 + +# Iteration counts +ITERATIONS ?= 10 +WARMUP ?= 2 +PREFIX ?= synthetic + +.PHONY: all generate run report clean + +all: $(GEN) $(BENCH_V3) + +$(GEN): gen_synthetic.cpp + $(CXX) $(CXXFLAGS) -o $@ $< + +$(BENCH_V3): bench_v3.cpp ../tiny_gltf_v3.h ../tinygltf_json.h + $(CXX) $(CXXFLAGS) $(INCLUDES) -o $@ $< + +# Generate synthetic scenes of varying sizes +generate: $(GEN) + @echo "=== Generating synthetic scenes ===" + ./$(GEN) --prefix $(PREFIX) + @echo "" + @echo "Generated files (binary + GLB):" + @ls -lh $(PREFIX)_*.gltf $(PREFIX)_*.glb $(PREFIX)_*.bin 2>/dev/null || true + +# Run benchmarks on all generated scenes +run: $(BENCH_V3) generate + @echo "" + @echo "=================================================================" + @echo " tinygltf v3 Benchmark" + @echo "=================================================================" + @echo "" + @for f in $(PREFIX)_*.glb $(PREFIX)_*.gltf; do \ + if [ -f "$$f" ]; then \ + ./$(BENCH_V3) "$$f" --iterations $(ITERATIONS) --warmup $(WARMUP); \ + echo ""; \ + fi; \ + done + +# Run benchmarks and produce CSV report +report: $(BENCH_V3) generate + @echo "file,size_bytes,iterations,parse_min_ms,parse_max_ms,parse_avg_ms,parse_median_ms,throughput_mbs,arena_peak_bytes,meshes,nodes,accessors,materials,animations" > benchmark_report.csv + @for f in $(PREFIX)_*.glb $(PREFIX)_*.gltf; do \ + if [ -f "$$f" ]; then \ + ./$(BENCH_V3) "$$f" --iterations $(ITERATIONS) --warmup $(WARMUP) --csv | tail -1 >> benchmark_report.csv; \ + fi; \ + done + @echo "=== Report written to benchmark_report.csv ===" + @cat benchmark_report.csv | column -t -s, + +clean: + rm -f $(GEN) $(BENCH_V3) + rm -f $(PREFIX)_*.gltf $(PREFIX)_*.glb $(PREFIX)_*.bin + rm -f benchmark_report.csv diff --git a/benchmark/bench_v3.cpp b/benchmark/bench_v3.cpp new file mode 100644 index 0000000..ab8117b --- /dev/null +++ b/benchmark/bench_v3.cpp @@ -0,0 +1,396 @@ +/* + * bench_v3.cpp — Benchmark tinygltf v3 parser: parse speed & memory. + * + * Measures: + * - File read time + * - JSON parse + model build time + * - Peak arena memory usage + * - Throughput (MB/s) + * + * Usage: + * bench_v3 [--iterations N] [--warmup N] [--quiet] + * bench_v3 --batch ... [--iterations N] + */ + +#define TINYGLTF3_IMPLEMENTATION +#define TINYGLTF3_ENABLE_FS +#include "tiny_gltf_v3.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__linux__) +#include +#endif + +/* ------------------------------------------------------------------ */ +/* Timing helpers */ +/* ------------------------------------------------------------------ */ + +using Clock = std::chrono::high_resolution_clock; +using TimePoint = Clock::time_point; + +static double elapsed_ms(TimePoint start, TimePoint end) { + return std::chrono::duration(end - start).count(); +} + +/* ------------------------------------------------------------------ */ +/* Memory tracking allocator */ +/* ------------------------------------------------------------------ */ + +struct MemTracker { + size_t current; + size_t peak; + size_t total_allocs; + size_t total_frees; +}; + +static void *tracked_alloc(size_t size, void *ud) { + MemTracker *mt = (MemTracker *)ud; + void *ptr = malloc(size); + if (ptr) { + mt->current += size; + if (mt->current > mt->peak) mt->peak = mt->current; + mt->total_allocs++; + } + return ptr; +} + +static void *tracked_realloc(void *ptr, size_t old_size, size_t new_size, void *ud) { + MemTracker *mt = (MemTracker *)ud; + void *new_ptr = realloc(ptr, new_size); + if (new_ptr) { + mt->current -= old_size; + mt->current += new_size; + if (mt->current > mt->peak) mt->peak = mt->current; + } + return new_ptr; +} + +static void tracked_free(void *ptr, size_t size, void *ud) { + MemTracker *mt = (MemTracker *)ud; + if (ptr) { + mt->current -= size; + mt->total_frees++; + free(ptr); + } +} + +/* ------------------------------------------------------------------ */ +/* RSS measurement (Linux) */ +/* ------------------------------------------------------------------ */ + +static size_t get_rss_bytes() { +#if defined(__linux__) + FILE *f = fopen("/proc/self/statm", "r"); + if (!f) return 0; + long pages = 0; + if (fscanf(f, "%*s %ld", &pages) != 1) pages = 0; + fclose(f); + return (size_t)pages * 4096; +#else + return 0; +#endif +} + +/* ------------------------------------------------------------------ */ +/* Benchmark result */ +/* ------------------------------------------------------------------ */ + +struct BenchResult { + std::string filename; + uint64_t file_size; + int iterations; + + /* Parse timing (ms) */ + double parse_min; + double parse_max; + double parse_avg; + double parse_median; + + /* Memory */ + size_t arena_peak; /* Peak arena allocation */ + size_t rss_before; + size_t rss_after; + + /* Model stats */ + uint32_t meshes; + uint32_t nodes; + uint32_t accessors; + uint32_t materials; + uint32_t animations; + uint32_t buffers; + uint32_t buffer_views; + uint32_t images; + uint32_t textures; + + /* Derived */ + double throughput_mbs; /* MB/s based on median */ +}; + +/* ------------------------------------------------------------------ */ +/* Run benchmark for a single file */ +/* ------------------------------------------------------------------ */ + +static BenchResult bench_file(const char *filename, int iterations, int warmup, + bool quiet, int float32_mode = 0) { + BenchResult r = {}; + r.filename = filename; + r.iterations = iterations; + + /* Read file into memory */ + FILE *f = fopen(filename, "rb"); + if (!f) { + fprintf(stderr, "ERROR: Cannot open '%s'\n", filename); + return r; + } + fseek(f, 0, SEEK_END); + long sz = ftell(f); + fseek(f, 0, SEEK_SET); + if (sz <= 0) { fclose(f); return r; } + + std::vector data((size_t)sz); + size_t rd = fread(data.data(), 1, (size_t)sz, f); + fclose(f); + if ((long)rd != sz) { return r; } + + r.file_size = (uint64_t)sz; + + /* Extract base dir */ + std::string path(filename); + std::string base_dir; + size_t sep = path.find_last_of("/\\"); + if (sep != std::string::npos) base_dir = path.substr(0, sep); + + /* Warmup iterations (not measured) */ + for (int i = 0; i < warmup; ++i) { + tg3_model model; + tg3_error_stack errors; + tg3_error_stack_init(&errors); + tg3_parse_auto(&model, &errors, data.data(), data.size(), + base_dir.c_str(), (uint32_t)base_dir.size(), NULL); + tg3_model_free(&model); + tg3_error_stack_free(&errors); + } + + /* Benchmark iterations */ + std::vector times; + times.reserve(iterations); + + MemTracker tracker_best; + memset(&tracker_best, 0, sizeof(tracker_best)); + + r.rss_before = get_rss_bytes(); + + for (int i = 0; i < iterations; ++i) { + MemTracker tracker; + memset(&tracker, 0, sizeof(tracker)); + + tg3_parse_options opts; + tg3_parse_options_init(&opts); + opts.memory.allocator.alloc = tracked_alloc; + opts.memory.allocator.realloc = tracked_realloc; + opts.memory.allocator.free = tracked_free; + opts.memory.allocator.user_data = &tracker; + opts.parse_float32 = float32_mode; + + tg3_model model; + tg3_error_stack errors; + tg3_error_stack_init(&errors); + + TimePoint t0 = Clock::now(); + + tg3_error_code err = tg3_parse_auto(&model, &errors, + data.data(), data.size(), + base_dir.c_str(), + (uint32_t)base_dir.size(), + &opts); + + TimePoint t1 = Clock::now(); + double ms = elapsed_ms(t0, t1); + times.push_back(ms); + + /* Capture model stats on first successful iteration */ + if (i == 0 && err == TG3_OK) { + r.meshes = model.meshes_count; + r.nodes = model.nodes_count; + r.accessors = model.accessors_count; + r.materials = model.materials_count; + r.animations = model.animations_count; + r.buffers = model.buffers_count; + r.buffer_views = model.buffer_views_count; + r.images = model.images_count; + r.textures = model.textures_count; + } + + if (tracker.peak > tracker_best.peak) { + tracker_best = tracker; + } + + tg3_model_free(&model); + tg3_error_stack_free(&errors); + + if (err != TG3_OK && !quiet) { + fprintf(stderr, " Parse error on iteration %d: code %d\n", i, (int)err); + } + } + + r.rss_after = get_rss_bytes(); + r.arena_peak = tracker_best.peak; + + /* Compute stats */ + std::sort(times.begin(), times.end()); + r.parse_min = times.front(); + r.parse_max = times.back(); + double sum = 0; + for (double t : times) sum += t; + r.parse_avg = sum / times.size(); + r.parse_median = times[times.size() / 2]; + + /* Throughput: file_size / median_time */ + if (r.parse_median > 0) { + r.throughput_mbs = ((double)r.file_size / (1024.0 * 1024.0)) / + (r.parse_median / 1000.0); + } + + return r; +} + +/* ------------------------------------------------------------------ */ +/* Print results */ +/* ------------------------------------------------------------------ */ + +static const char *human_bytes(size_t bytes, char *buf, size_t buf_sz) { + if (bytes >= 1024ULL * 1024 * 1024) + snprintf(buf, buf_sz, "%.2f GB", (double)bytes / (1024.0 * 1024 * 1024)); + else if (bytes >= 1024 * 1024) + snprintf(buf, buf_sz, "%.2f MB", (double)bytes / (1024.0 * 1024)); + else if (bytes >= 1024) + snprintf(buf, buf_sz, "%.2f KB", (double)bytes / 1024.0); + else + snprintf(buf, buf_sz, "%zu B", bytes); + return buf; +} + +static void print_result(const BenchResult &r) { + char buf1[64], buf2[64]; + + printf("┌─────────────────────────────────────────────────────────────────┐\n"); + printf("│ %-63s │\n", r.filename.c_str()); + printf("├─────────────────────────────────────────────────────────────────┤\n"); + printf("│ File size: %-47s │\n", human_bytes((size_t)r.file_size, buf1, sizeof(buf1))); + printf("│ Iterations: %-47d │\n", r.iterations); + printf("│ │\n"); + printf("│ Parse time (ms): │\n"); + printf("│ min: %10.3f │\n", r.parse_min); + printf("│ max: %10.3f │\n", r.parse_max); + printf("│ avg: %10.3f │\n", r.parse_avg); + printf("│ median: %10.3f │\n", r.parse_median); + printf("│ │\n"); + printf("│ Throughput: %-47s │\n", + (snprintf(buf1, sizeof(buf1), "%.2f MB/s", r.throughput_mbs), buf1)); + printf("│ Arena peak: %-47s │\n", human_bytes(r.arena_peak, buf1, sizeof(buf1))); + if (r.rss_before > 0) { + printf("│ RSS before: %-47s │\n", human_bytes(r.rss_before, buf1, sizeof(buf1))); + printf("│ RSS after: %-47s │\n", human_bytes(r.rss_after, buf2, sizeof(buf2))); + } + printf("│ │\n"); + printf("│ Model: %u meshes, %u nodes, %u accessors, %u materials", + r.meshes, r.nodes, r.accessors, r.materials); + printf(" │\n"); + printf("│ %u animations, %u buffers, %u images", + r.animations, r.buffers, r.images); + printf(" │\n"); + printf("└─────────────────────────────────────────────────────────────────┘\n"); +} + +static void print_csv_header() { + printf("file,size_bytes,iterations,parse_min_ms,parse_max_ms,parse_avg_ms," + "parse_median_ms,throughput_mbs,arena_peak_bytes," + "meshes,nodes,accessors,materials,animations\n"); +} + +static void print_csv_row(const BenchResult &r) { + printf("%s,%lu,%d,%.3f,%.3f,%.3f,%.3f,%.2f,%zu,%u,%u,%u,%u,%u\n", + r.filename.c_str(), (unsigned long)r.file_size, r.iterations, + r.parse_min, r.parse_max, r.parse_avg, r.parse_median, + r.throughput_mbs, r.arena_peak, + r.meshes, r.nodes, r.accessors, r.materials, r.animations); +} + +/* ------------------------------------------------------------------ */ +/* Main */ +/* ------------------------------------------------------------------ */ + +static void usage() { + fprintf(stderr, + "Usage:\n" + " bench_v3 [--iterations N] [--warmup N] [--csv] [--quiet]\n" + " bench_v3 --batch [file2] ... [--iterations N] [--csv]\n" + "\n" + "Options:\n" + " --iterations N Number of timed parse iterations (default: 10)\n" + " --warmup N Number of warmup iterations (default: 2)\n" + " --csv Output in CSV format\n" + " --quiet Suppress per-iteration error messages\n" + " --batch Benchmark multiple files\n" + " --float32 Parse JSON floats as float32 (faster, less precise)\n"); +} + +int main(int argc, char **argv) { + if (argc < 2) { usage(); return 1; } + + int iterations = 10; + int warmup = 2; + bool csv = false; + bool quiet = false; + int float32_mode = 0; + std::vector files; + + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "--iterations") == 0 && i + 1 < argc) { + iterations = atoi(argv[++i]); + } else if (strcmp(argv[i], "--warmup") == 0 && i + 1 < argc) { + warmup = atoi(argv[++i]); + } else if (strcmp(argv[i], "--csv") == 0) { + csv = true; + } else if (strcmp(argv[i], "--quiet") == 0) { + quiet = true; + } else if (strcmp(argv[i], "--float32") == 0) { + float32_mode = 1; + } else if (strcmp(argv[i], "--batch") == 0) { + /* batch mode: just collect files */ + } else if (argv[i][0] != '-') { + files.push_back(argv[i]); + } + } + + if (files.empty()) { usage(); return 1; } + + if (csv) print_csv_header(); + + for (const auto &file : files) { + if (!csv && !quiet) { + printf("Benchmarking: %s (%d iterations, %d warmup%s)\n", + file.c_str(), iterations, warmup, + float32_mode ? ", float32" : ""); + } + + BenchResult r = bench_file(file.c_str(), iterations, warmup, quiet, float32_mode); + + if (csv) { + print_csv_row(r); + } else { + print_result(r); + printf("\n"); + } + } + + return 0; +} diff --git a/benchmark/gen_synthetic.cpp b/benchmark/gen_synthetic.cpp new file mode 100644 index 0000000..b356221 --- /dev/null +++ b/benchmark/gen_synthetic.cpp @@ -0,0 +1,740 @@ +/* + * gen_synthetic.cpp — Generate synthetic glTF 2.0 scenes for benchmarking. + * + * Produces .gltf (ASCII) and .glb (binary) files with configurable: + * - Number of meshes, each with N vertices/triangles + * - Number of nodes (flat hierarchy) + * - Number of materials + * - Number of animations with M keyframes + * + * Usage: + * gen_synthetic [--meshes N] [--verts-per-mesh N] [--nodes N] + * [--materials N] [--animations N] [--keyframes N] + * [--prefix NAME] + * + * Outputs: _