From 2c7bf2c93263ec2b3a75b5f5e3786eca1c08eb38 Mon Sep 17 00:00:00 2001
From: Syoyo Fujita <syoyo@lighttransport.com>
Date: Sat, 21 Mar 2026 04:10:25 +0900
Subject: [PATCH] Fix fuzzer-found bugs, add libFuzzer harness for v3

Add tests/v3/fuzzer/ with libFuzzer harness covering all four parse
paths (auto-detect, JSON, GLB, float32 mode) with ASan+UBSan.

Fix two bugs found by 10+ hours of fuzzing (~23M iterations):

1. UB: (int64_t)inf in cj_parse_number when extreme exponents like
   22222222e222222 produce infinity. Add cj_dbl_to_i64() that clamps
   inf/NaN/out-of-range values before casting.

2. Null deref in tg3__parse_string when glTF array elements are not
   JSON objects (e.g. "scenes": [[3]]). Add is_object() validation
   in TG3__PARSE_ARRAY_SIMPLE and TG3__PARSE_ARRAY_IDX macros.

Verified clean: 5.8M additional runs with zero crashes after fixes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/v3/fuzzer/Makefile        |  67 +++++++++++++++++++
 tests/v3/fuzzer/fuzz_gltf_v3.cc | 110 ++++++++++++++++++++++++++++++++
 tiny_gltf_v3.h                  |  12 ++++
 tinygltf_json.h                 |  14 +++-
 4 files changed, 200 insertions(+), 3 deletions(-)
 create mode 100644 tests/v3/fuzzer/Makefile
 create mode 100644 tests/v3/fuzzer/fuzz_gltf_v3.cc

diff --git a/tests/v3/fuzzer/Makefile b/tests/v3/fuzzer/Makefile
new file mode 100644
index 0000000..086b428
--- /dev/null
+++ b/tests/v3/fuzzer/Makefile
@@ -0,0 +1,67 @@
+# tests/v3/fuzzer/Makefile — Build libFuzzer harness for tinygltf v3
+#
+# Requires: clang++ with libFuzzer support
+#
+# Targets:
+#   make             — build fuzzer with ASan + UBSan
+#   make run         — run fuzzer with default settings
+#   make seed        — generate seed corpus from test models
+#   make clean       — remove binaries and corpus
+
+CXX       = clang++
+CXXFLAGS  = -g -O1 -std=c++17 -fno-rtti -fno-exceptions
+SANITIZE  = -fsanitize=fuzzer,address,undefined
+INCLUDES  = -I../../..
+
+FUZZER    = fuzz_gltf_v3
+CORPUS    = corpus
+ARTIFACTS = artifacts
+
+# Fuzzer runtime options
+MAX_LEN   ?= 65536
+JOBS      ?= $(shell nproc 2>/dev/null || echo 4)
+MAX_TIME  ?= 0
+
+.PHONY: all run seed clean
+
+all: $(FUZZER)
+
+$(FUZZER): fuzz_gltf_v3.cc ../../../tiny_gltf_v3.h ../../../tinygltf_json.h
+	$(CXX) $(CXXFLAGS) $(SANITIZE) $(INCLUDES) -o $@ $<
+
+run: $(FUZZER) | $(CORPUS) $(ARTIFACTS)
+	./$(FUZZER) $(CORPUS) \
+		-artifact_prefix=$(ARTIFACTS)/ \
+		-max_len=$(MAX_LEN) \
+		-jobs=$(JOBS) \
+		-workers=$(JOBS) \
+		$(if $(filter-out 0,$(MAX_TIME)),-max_total_time=$(MAX_TIME))
+
+# Generate seed corpus from existing test models
+seed: | $(CORPUS)
+	@echo "Seeding corpus from test models..."
+	@for f in ../../../models/Cube/Cube.gltf \
+	          ../../../models/Cube/Cube.glb; do \
+		if [ -f "$$f" ]; then \
+			cp "$$f" $(CORPUS)/; \
+			echo "  Added: $$f"; \
+		fi; \
+	done
+	@# Add a minimal valid glTF JSON
+	@echo '{"asset":{"version":"2.0"},"scene":0,"scenes":[{"nodes":[0]}],"nodes":[{"name":"n"}]}' > $(CORPUS)/minimal.gltf
+	@# Add a minimal valid GLB (header + empty JSON chunk)
+	@printf 'glTF\x02\x00\x00\x00\x1c\x00\x00\x00\x04\x00\x00\x00JSON{}  ' > $(CORPUS)/minimal.glb
+	@# Add edge cases
+	@echo '{}' > $(CORPUS)/empty_object.gltf
+	@echo '{"asset":{"version":"2.0"}}' > $(CORPUS)/asset_only.gltf
+	@echo "Corpus: $$(ls $(CORPUS) | wc -l) files"
+
+$(CORPUS):
+	mkdir -p $(CORPUS)
+
+$(ARTIFACTS):
+	mkdir -p $(ARTIFACTS)
+
+clean:
+	rm -f $(FUZZER)
+	rm -rf $(CORPUS) $(ARTIFACTS)
diff --git a/tests/v3/fuzzer/fuzz_gltf_v3.cc b/tests/v3/fuzzer/fuzz_gltf_v3.cc
new file mode 100644
index 0000000..d0f769a
--- /dev/null
+++ b/tests/v3/fuzzer/fuzz_gltf_v3.cc
@@ -0,0 +1,110 @@
+/*
+ * fuzz_gltf_v3.cc — libFuzzer harness for tinygltf v3 parser.
+ *
+ * Fuzz targets:
+ *   - Auto-detect (GLB or JSON) parse from arbitrary bytes
+ *   - Exercises JSON parser, GLB header parsing, arena allocator,
+ *     error stack, and all glTF entity parsing paths.
+ *
+ * Build (clang with libFuzzer):
+ *   clang++ -g -O1 -fsanitize=fuzzer,address,undefined \
+ *       -std=c++17 -fno-rtti -fno-exceptions \
+ *       -I../../.. -o fuzz_gltf_v3 fuzz_gltf_v3.cc
+ *
+ * Run:
+ *   ./fuzz_gltf_v3 corpus/ -max_len=65536
+ *
+ * Seed corpus: place valid .gltf and .glb files in corpus/
+ */
+
+#define TINYGLTF3_IMPLEMENTATION
+#include "tiny_gltf_v3.h"
+
+#include <cstdint>
+#include <cstddef>
+
+/* Memory budget to prevent OOM during fuzzing */
+static const uint64_t FUZZ_MEMORY_BUDGET = 64ULL * 1024 * 1024; /* 64 MB */
+
+static void fuzz_parse_auto(const uint8_t *data, size_t size) {
+    tg3_model model;
+    tg3_error_stack errors;
+    tg3_error_stack_init(&errors);
+
+    tg3_parse_options opts;
+    tg3_parse_options_init(&opts);
+    opts.memory.memory_budget = FUZZ_MEMORY_BUDGET;
+
+    tg3_parse_auto(&model, &errors, data, (uint64_t)size,
+                   "", 0, &opts);
+
+    tg3_model_free(&model);
+    tg3_error_stack_free(&errors);
+}
+
+static void fuzz_parse_json(const uint8_t *data, size_t size) {
+    tg3_model model;
+    tg3_error_stack errors;
+    tg3_error_stack_init(&errors);
+
+    tg3_parse_options opts;
+    tg3_parse_options_init(&opts);
+    opts.memory.memory_budget = FUZZ_MEMORY_BUDGET;
+
+    tg3_parse(&model, &errors, data, (uint64_t)size,
+              "", 0, &opts);
+
+    tg3_model_free(&model);
+    tg3_error_stack_free(&errors);
+}
+
+static void fuzz_parse_glb(const uint8_t *data, size_t size) {
+    tg3_model model;
+    tg3_error_stack errors;
+    tg3_error_stack_init(&errors);
+
+    tg3_parse_options opts;
+    tg3_parse_options_init(&opts);
+    opts.memory.memory_budget = FUZZ_MEMORY_BUDGET;
+
+    tg3_parse_glb(&model, &errors, data, (uint64_t)size,
+                  "", 0, &opts);
+
+    tg3_model_free(&model);
+    tg3_error_stack_free(&errors);
+}
+
+static void fuzz_parse_float32(const uint8_t *data, size_t size) {
+    tg3_model model;
+    tg3_error_stack errors;
+    tg3_error_stack_init(&errors);
+
+    tg3_parse_options opts;
+    tg3_parse_options_init(&opts);
+    opts.memory.memory_budget = FUZZ_MEMORY_BUDGET;
+    opts.parse_float32 = 1;
+
+    tg3_parse_auto(&model, &errors, data, (uint64_t)size,
+                   "", 0, &opts);
+
+    tg3_model_free(&model);
+    tg3_error_stack_free(&errors);
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+    if (size == 0) return 0;
+
+    /* Use first byte to select parse path, rest is the payload */
+    uint8_t selector = data[0] % 4;
+    const uint8_t *payload = data + 1;
+    size_t payload_size = size - 1;
+
+    switch (selector) {
+    case 0: fuzz_parse_auto(payload, payload_size);    break;
+    case 1: fuzz_parse_json(payload, payload_size);    break;
+    case 2: fuzz_parse_glb(payload, payload_size);     break;
+    case 3: fuzz_parse_float32(payload, payload_size); break;
+    }
+
+    return 0;
+}
diff --git a/tiny_gltf_v3.h b/tiny_gltf_v3.h
index 95b8f4c..1d100a4 100644
--- a/tiny_gltf_v3.h
+++ b/tiny_gltf_v3.h
@@ -2944,6 +2944,12 @@ static int tg3__parse_audio_emitter(tg3__parse_ctx *ctx, const tg3__json &o,
                 if (_items) { \
                     uint32_t _i = 0; \
                     for (auto _it = _arr_it->begin(); _it != _arr_it->end(); ++_it, ++_i) { \
+                        if (!_it->is_object()) { \
+                            tg3__error_pushf((ctx)->errors, (ctx)->arena, \
+                                TG3_SEVERITY_ERROR, TG3_ERR_JSON_TYPE_MISMATCH, \
+                                json_key, "Element %u must be an object", _i); \
+                            continue; \
+                        } \
                         parse_fn((ctx), *_it, &_items[_i], (int32_t)_i); \
                     } \
                     (model_field) = _items; \
@@ -2965,6 +2971,12 @@ static int tg3__parse_audio_emitter(tg3__parse_ctx *ctx, const tg3__json &o,
                 if (_items) { \
                     uint32_t _i = 0; \
                     for (auto _it = _arr_it->begin(); _it != _arr_it->end(); ++_it, ++_i) { \
+                        if (!_it->is_object()) { \
+                            tg3__error_pushf((ctx)->errors, (ctx)->arena, \
+                                TG3_SEVERITY_ERROR, TG3_ERR_JSON_TYPE_MISMATCH, \
+                                json_key, "Element %u must be an object", _i); \
+                            continue; \
+                        } \
                         parse_fn((ctx), *_it, &_items[_i]); \
                     } \
                     (model_field) = _items; \
diff --git a/tinygltf_json.h b/tinygltf_json.h
index c428edc..2cf08b3 100644
--- a/tinygltf_json.h
+++ b/tinygltf_json.h
@@ -307,6 +307,14 @@ static const char *cj_scan_str(const char *p, const char *end) {
  *   Breaks strict JSON/IEEE-754-double conformance.
  * ====================================================================== */
 
+/* Safe double-to-int64 cast: clamp inf/NaN/out-of-range to 0. */
+static int64_t cj_dbl_to_i64(double d) {
+    if (d != d) return 0;                             /* NaN */
+    if (d >= (double)INT64_MAX)  return INT64_MAX;
+    if (d <= (double)INT64_MIN)  return INT64_MIN;
+    return (int64_t)d;
+}
+
 /* Exact powers of 10 that are representable as IEEE 754 double.
  * 10^0 through 10^22 are all exactly representable. */
 static const double cj_exact_pow10[23] = {
@@ -526,7 +534,7 @@ static const char *cj_parse_number(const char *p, const char *end,
             if (cj_fast_flt_convert(mantissa, exp10, neg, &f)) {
                 *is_int = 0;
                 *dval   = (double)f;
-                *ival   = (int64_t)f;
+                *ival   = cj_dbl_to_i64((double)f);
                 return p;
             }
         } else {
@@ -534,7 +542,7 @@ static const char *cj_parse_number(const char *p, const char *end,
             if (cj_fast_dbl_convert(mantissa, exp10, neg, &d)) {
                 *is_int = 0;
                 *dval   = d;
-                *ival   = (int64_t)d;
+                *ival   = cj_dbl_to_i64(d);
                 return p;
             }
         }
@@ -547,7 +555,7 @@ static const char *cj_parse_number(const char *p, const char *end,
     if (float32_mode) d = (double)(float)d;
     *is_int = 0;
     *dval   = d;
-    *ival   = (int64_t)d;
+    *ival   = cj_dbl_to_i64(d);
     return eptr;
 }