Add fast float parser and benchmark float-heavy scene

Replace strtod() with Clinger's fast path in tinygltf_json.h for ~1.5x
faster JSON float parsing. The new parser accumulates all digits into a
uint64 mantissa and uses exact power-of-10 tables for conversion,
avoiding locale-dependent strtod for ~99% of JSON float values.

Add optional float32 parse mode (parse_float32 option) that parses JSON
floats at single precision — fewer significant digits needed, wider fast
path range. Breaks strict double-precision conformance but sufficient
for glTF data which is typically single-precision.

Benchmark additions:
- gen_synthetic: add float_heavy preset (~500MB ASCII float JSON)
- bench_v3: add --float32 flag for float32 parse mode benchmarking

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Syoyo Fujita
2026-03-20 09:00:30 +09:00
parent 78f4a5cfe8
commit 2aeac50277
5 changed files with 1463 additions and 58 deletions

View File

@@ -295,90 +295,256 @@ static const char *cj_scan_str(const char *p, const char *end) {
/* ======================================================================
* FAST NUMBER PARSING (C-style)
*
* Uses Clinger's fast path for float conversion, avoiding strtod() for the
* vast majority of JSON numbers. This is locale-independent and typically
* 4-10x faster than strtod.
*
* Optional float32 mode (CJ_FLOAT32_MODE flag in cj_parse_number):
* Parses floating-point values to float (single) precision and stores
* the result as double. Faster because fewer significant digits are
* needed and the fast path covers a wider exponent range.
* Breaks strict JSON/IEEE-754-double conformance.
* ====================================================================== */
static const char *cj_parse_uint64(const char *p, const char *end,
uint64_t *result, int *overflow) {
uint64_t v = 0;
*overflow = 0;
while (p < end && (unsigned)(*p - '0') <= 9u) {
unsigned char d = (unsigned char)*p - '0';
/* Detect multiplication/addition overflow */
if (v > (UINT64_MAX - d) / 10u) {
*overflow = 1;
/* Consume remaining digits so caller sees the full token */
while (p < end && (unsigned)(*p - '0') <= 9u) ++p;
*result = UINT64_MAX;
return p;
}
v = v * 10u + (uint64_t)d;
++p;
/* Exact powers of 10 that are representable as IEEE 754 double.
* 10^0 through 10^22 are all exactly representable. */
static const double cj_exact_pow10[23] = {
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22
};
/* Clinger's fast path: mantissa * 10^exp10 → double.
* Requires mantissa <= 2^53 (exactly representable as double).
* Returns 1 on success, 0 if fallback needed. */
static int cj_fast_dbl_convert(uint64_t mantissa, int exp10, int neg, double *out) {
if (mantissa == 0) {
*out = neg ? -0.0 : 0.0;
return 1;
}
*result = v;
return p;
/* Primary: |exp10| <= 22, mantissa fits in double mantissa bits */
if (mantissa <= (1ULL << 53)) {
double d;
if (exp10 >= 0 && exp10 <= 22) {
d = (double)mantissa * cj_exact_pow10[exp10];
*out = neg ? -d : d;
return 1;
}
if (exp10 < 0 && exp10 >= -22) {
d = (double)mantissa / cj_exact_pow10[-exp10];
*out = neg ? -d : d;
return 1;
}
/* Extended: split exponent into two steps, each <= 22.
* Positive: exp10 = 22 + remainder, both halves exact.
* Negative: exp10 = -22 + remainder. */
if (exp10 > 22 && exp10 <= 22 + 22) {
d = (double)mantissa * cj_exact_pow10[exp10 - 22];
d *= cj_exact_pow10[22];
*out = neg ? -d : d;
return 1;
}
if (exp10 < -22 && exp10 >= -(22 + 22)) {
d = (double)mantissa / cj_exact_pow10[-exp10 - 22];
d /= cj_exact_pow10[22];
*out = neg ? -d : d;
return 1;
}
}
return 0;
}
/*
* Parse a JSON number starting at [p, end).
/* Fast path for float32: wider range because float mantissa is only 24 bits. */
static int cj_fast_flt_convert(uint64_t mantissa, int exp10, int neg, float *out) {
if (mantissa == 0) {
*out = neg ? -0.0f : 0.0f;
return 1;
}
/* Direct float path: mantissa fits in 24 bits, pow10 exact in float */
if (mantissa <= (1ULL << 24)) {
if (exp10 >= 0 && exp10 <= 10) {
float f = (float)mantissa * (float)cj_exact_pow10[exp10];
*out = neg ? -f : f;
return 1;
}
if (exp10 < 0 && exp10 >= -10) {
float f = (float)mantissa / (float)cj_exact_pow10[-exp10];
*out = neg ? -f : f;
return 1;
}
}
/* Wider path via double arithmetic (still float-precision result) */
if (mantissa <= (1ULL << 53)) {
double d;
if (exp10 >= 0 && exp10 <= 22) {
d = (double)mantissa * cj_exact_pow10[exp10];
*out = neg ? -(float)d : (float)d;
return 1;
}
if (exp10 < 0 && exp10 >= -22) {
d = (double)mantissa / cj_exact_pow10[-exp10];
*out = neg ? -(float)d : (float)d;
return 1;
}
if (exp10 > 22 && exp10 <= 44) {
d = (double)mantissa * cj_exact_pow10[exp10 - 22];
d *= cj_exact_pow10[22];
*out = neg ? -(float)d : (float)d;
return 1;
}
if (exp10 < -22 && exp10 >= -44) {
d = (double)mantissa / cj_exact_pow10[-exp10 - 22];
d /= cj_exact_pow10[22];
*out = neg ? -(float)d : (float)d;
return 1;
}
}
return 0;
}
/* Parse a JSON number starting at [p, end).
* Sets *is_int, *ival (integer result), *dval (floating-point result).
* Returns pointer past the last character consumed, or NULL on error.
*
* NOTE: strtod is locale-dependent on some platforms (decimal separator).
* JSON mandates '.' as decimal separator. Callers in environments where the
* C locale may be overridden should ensure the locale is reset to "C" before
* parsing floating-point JSON values.
*/
* float32_mode: when non-zero, floating-point values are parsed at float
* (single) precision — fewer digits are significant, and the result is
* stored as (double)(float)value. This is faster but not JSON-conformant
* for high-precision doubles.
*
* Uses Clinger's fast path (no strtod) for ~99% of JSON float values.
* Falls back to strtod only for extreme exponents or >19 significant digits. */
static const char *cj_parse_number(const char *p, const char *end,
int *is_int, int64_t *ival, double *dval) {
int *is_int, int64_t *ival, double *dval,
int float32_mode) {
const char *start = p;
int neg = 0;
if (p < end && *p == '-') { neg = 1; ++p; }
if (p >= end) return NULL;
uint64_t int_part = 0;
/* Accumulate ALL digits (integer + fraction) into a single mantissa.
* Track the decimal exponent adjustment from the '.' position. */
uint64_t mantissa = 0;
int ndigits = 0; /* total significant digits consumed */
int exp10 = 0; /* decimal exponent adjustment */
int mantissa_overflow = 0; /* set if >19 significant digits */
int has_frac = 0, has_exp = 0;
int int_overflow = 0;
/* Max significant digits we track: 19 for double, 9 for float32 */
int max_sig = float32_mode ? 9 : 19;
/* Integer part */
if (*p == '0') {
++p;
} else if ((unsigned)(*p - '1') <= 8u) {
p = cj_parse_uint64(p, end, &int_part, &int_overflow);
while (p < end && (unsigned)(*p - '0') <= 9u) {
unsigned d = (unsigned)(*p - '0');
if (ndigits < max_sig) {
mantissa = mantissa * 10 + d;
} else {
exp10++; /* excess digit: bump exponent instead */
if (ndigits >= 19) mantissa_overflow = 1;
}
ndigits++;
++p;
}
} else {
return NULL;
}
if (p < end && *p == '.') has_frac = 1;
if (p < end && (*p == 'e' || *p == 'E')) has_exp = 1;
if (!has_frac && !has_exp && !int_overflow) {
/* Guard signed overflow: -(int64_t)x is UB when x > INT64_MAX.
* Positive: x must fit in [0, INT64_MAX].
* Negative: magnitude must fit in [0, 2^63] i.e. <= INT64_MAX+1
* (the upper bound covers INT64_MIN = -2^63). */
int fits;
if (!neg) {
fits = (int_part <= (uint64_t)INT64_MAX);
} else {
fits = (int_part <= (uint64_t)INT64_MAX + 1u);
/* Fraction part */
if (p < end && *p == '.') {
has_frac = 1;
++p;
/* JSON requires at least one digit after '.' */
if (p >= end || (unsigned)(*p - '0') > 9u) return NULL;
while (p < end && (unsigned)(*p - '0') <= 9u) {
unsigned d = (unsigned)(*p - '0');
if (ndigits < max_sig) {
mantissa = mantissa * 10 + d;
exp10--;
}
/* else: ignore trailing fraction digits beyond precision */
ndigits++;
++p;
}
}
/* Exponent part */
if (p < end && (*p == 'e' || *p == 'E')) {
has_exp = 1;
++p;
int exp_neg = 0;
if (p < end && *p == '+') ++p;
else if (p < end && *p == '-') { exp_neg = 1; ++p; }
/* JSON requires at least one digit in exponent */
if (p >= end || (unsigned)(*p - '0') > 9u) return NULL;
int exp_val = 0;
while (p < end && (unsigned)(*p - '0') <= 9u) {
exp_val = exp_val * 10 + (*p - '0');
if (exp_val > 9999) {
/* Prevent overflow; will fall through to strtod */
while (p < end && (unsigned)(*p - '0') <= 9u) ++p;
break;
}
++p;
}
exp10 += exp_neg ? -exp_val : exp_val;
}
/* ---- Integer fast path (no fraction, no exponent, fits int64) ---- */
/* exp10 == 0 ensures all digits were accumulated (none truncated by max_sig) */
if (!has_frac && !has_exp && !mantissa_overflow && exp10 == 0) {
uint64_t mag = mantissa;
int fits;
if (!neg)
fits = (mag <= (uint64_t)INT64_MAX);
else
fits = (mag <= (uint64_t)INT64_MAX + 1u);
if (fits) {
int64_t sv;
if (neg && int_part == (uint64_t)INT64_MAX + 1u)
sv = INT64_MIN; /* special case: magnitude 2^63 → INT64_MIN */
if (neg && mag == (uint64_t)INT64_MAX + 1u)
sv = INT64_MIN;
else
sv = neg ? -(int64_t)int_part : (int64_t)int_part;
sv = neg ? -(int64_t)mag : (int64_t)mag;
*is_int = 1;
*ival = sv;
*dval = (double)sv;
return p;
}
/* Magnitude doesn't fit int64_t: fall through to strtod */
}
/* Floating-point, integer overflow, or out-of-int64-range: use strtod */
/* ---- Float fast path (Clinger's algorithm) ---- */
if (!mantissa_overflow) {
if (float32_mode) {
float f;
if (cj_fast_flt_convert(mantissa, exp10, neg, &f)) {
*is_int = 0;
*dval = (double)f;
*ival = (int64_t)f;
return p;
}
} else {
double d;
if (cj_fast_dbl_convert(mantissa, exp10, neg, &d)) {
*is_int = 0;
*dval = d;
*ival = (int64_t)d;
return p;
}
}
}
/* ---- Fallback: strtod (handles extreme exponents, >19 digits) ---- */
char *eptr = NULL;
double d = strtod(start, &eptr);
if (eptr == start) return NULL;
if (float32_mode) d = (double)(float)d;
*is_int = 0;
*dval = d;
*ival = (int64_t)d;
@@ -686,6 +852,12 @@ public:
static tinygltf_json parse(const char *first, const char *last,
std::nullptr_t = nullptr,
bool allow_exceptions = false);
/* Parse with float32 mode: floating-point values are parsed at single
* precision for speed. Breaks strict JSON double-precision conformance
* but sufficient for glTF (which stores geometry/animation data as
* single-precision floats in buffers anyway). */
static tinygltf_json parse_float32(const char *first, const char *last);
};
/* ======================================================================
@@ -1245,6 +1417,7 @@ struct cj_parse_ctx {
const char *cur;
const char *end;
int err;
int float32_mode; /* 0 = double (default), 1 = float32 */
char errmsg[256];
};
@@ -1367,7 +1540,7 @@ static void cj_parse_scalar(cj_parse_ctx *ctx, tinygltf_json *slot) {
} else { cj_ctx_error(ctx, "invalid literal 'null'"); }
} else if (c == '-' || (c >= '0' && c <= '9')) {
int is_int = 0; int64_t ival = 0; double dval = 0.0;
const char *next = cj_parse_number(ctx->cur, ctx->end, &is_int, &ival, &dval);
const char *next = cj_parse_number(ctx->cur, ctx->end, &is_int, &ival, &dval, ctx->float32_mode);
if (!next) { cj_ctx_error(ctx, "invalid number"); return; }
ctx->cur = next;
slot->destroy_(); slot->init_null_();
@@ -1757,10 +1930,11 @@ inline tinygltf_json tinygltf_json::parse(const char *first, const char *last,
std::nullptr_t,
bool allow_exceptions) {
cj_parse_ctx ctx;
ctx.cur = first;
ctx.end = last;
ctx.err = 0;
ctx.errmsg[0] = '\0';
ctx.cur = first;
ctx.end = last;
ctx.err = 0;
ctx.float32_mode = 0;
ctx.errmsg[0] = '\0';
tinygltf_json result;
cj_parse_json(&ctx, &result);
@@ -1779,6 +1953,21 @@ inline tinygltf_json tinygltf_json::parse(const char *first, const char *last,
return result;
}
inline tinygltf_json tinygltf_json::parse_float32(const char *first, const char *last) {
cj_parse_ctx ctx;
ctx.cur = first;
ctx.end = last;
ctx.err = 0;
ctx.float32_mode = 1;
ctx.errmsg[0] = '\0';
tinygltf_json result;
cj_parse_json(&ctx, &result);
if (ctx.err) return tinygltf_json();
return result;
}
/* ======================================================================
* TINYGLTF DETAIL NAMESPACE COMPATIBILITY
*