From 0e3043f3e9e4de478e1152e78d0addb6442cf12d Mon Sep 17 00:00:00 2001 From: Syoyo Fujita Date: Sun, 31 May 2026 22:20:46 +0900 Subject: [PATCH] Harden and optimize v3 C parser --- README.md | 4 +- benchmark/bench_v3.cpp | 26 +++- tests/tester_v3_c.c | 208 ++++++++++++++++++++++++++++++ tiny_gltf_v3.c | 279 ++++++++++++++++++++++++++++++++++++++--- tiny_gltf_v3.h | 5 + tinygltf_json_c.h | 171 +++++++++++++++++++++---- 6 files changed, 646 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 1bfdd61..69e5b45 100644 --- a/README.md +++ b/README.md @@ -61,8 +61,10 @@ The v3 C runtime is built for processing **untrusted glTF/GLB input** (server-si - **URI sanitization** — external buffer/image URIs are rejected before any filesystem call if they are empty, contain NUL bytes, begin with `/` or `\`, look like a Windows drive prefix (`X:`), or contain a `..` segment. Production callers SHOULD still provide a custom `tg3_fs_callbacks.read_file` that confines reads to a known directory (e.g. via `openat` plus a `realpath` prefix check) when the input is attacker-controlled. - **Index bounds validation** — every `int32_t` index field populated from JSON (accessor.bufferView, primitive.indices/material/attributes, scene.nodes[], skin.joints[], animation channel/sampler refs, KHR_audio + MSFT_lod refs, …) is checked after the structural parse. Out-of-range indices produce `TG3_ERR_INVALID_INDEX`. Default `tg3_parse_options.validate_indices = 1`; set to `0` only when you need raw round-trip and have your own validator. +- **Buffer/accessor range validation** — declared buffer lengths, bufferView ranges, accessor element spans, sparse accessor spans, component types, and overflow-prone size math are checked before returning a model. - **Strict numeric range checks** — JSON numbers feeding integer fields go through finite/round-trip-validated coercion (`tg3__json_number_to_int32` / `_uint64`). `byteStride` is restricted to 0 or [4, 252]. -- **Memory budget** — the arena is capped at `TINYGLTF3_MAX_MEMORY_BYTES` (1 GB by default; configurable via `tg3_memory_config`). +- **Memory budget** — the arena and C JSON parser enforce `TINYGLTF3_MAX_MEMORY_BYTES` by default; `max_single_alloc` and `TINYGLTF3_MAX_STRING_LENGTH` bound individual allocation and string size. +- **Opt-in fast paths** — `skip_extras_values` avoids materializing `extras` and unknown extension value trees, and `borrow_input_buffers` lets GLB buffer spans reference caller-owned input bytes instead of copying the BIN chunk. - **Image decoding off by default** — the parser does not decode image bytes; use `tg3_parse_options.images_as_is = 1` to skip any decoder entirely when handling untrusted input. - **Error message lifetime** — error strings on `tg3_error_stack` are arena-allocated and remain valid until `tg3_model_free()`. Read or copy them BEFORE freeing the model. diff --git a/benchmark/bench_v3.cpp b/benchmark/bench_v3.cpp index ab8117b..5b4db2c 100644 --- a/benchmark/bench_v3.cpp +++ b/benchmark/bench_v3.cpp @@ -139,7 +139,9 @@ struct BenchResult { /* ------------------------------------------------------------------ */ static BenchResult bench_file(const char *filename, int iterations, int warmup, - bool quiet, int float32_mode = 0) { + bool quiet, int float32_mode = 0, + int skip_extras_values = 0, + int borrow_input_buffers = 0) { BenchResult r = {}; r.filename = filename; r.iterations = iterations; @@ -199,6 +201,8 @@ static BenchResult bench_file(const char *filename, int iterations, int warmup, opts.memory.allocator.free = tracked_free; opts.memory.allocator.user_data = &tracker; opts.parse_float32 = float32_mode; + opts.skip_extras_values = skip_extras_values; + opts.borrow_input_buffers = borrow_input_buffers; tg3_model model; tg3_error_stack errors; @@ -340,7 +344,11 @@ static void usage() { " --csv Output in CSV format\n" " --quiet Suppress per-iteration error messages\n" " --batch Benchmark multiple files\n" - " --float32 Parse JSON floats as float32 (faster, less precise)\n"); + " --float32 Parse JSON floats as float32 (faster, less precise)\n" + " --skip-extras-values\n" + " Skip materializing extras/unknown extension values\n" + " --borrow-input-buffers\n" + " Let GLB buffers reference caller-owned input bytes\n"); } int main(int argc, char **argv) { @@ -351,6 +359,8 @@ int main(int argc, char **argv) { bool csv = false; bool quiet = false; int float32_mode = 0; + int skip_extras_values = 0; + int borrow_input_buffers = 0; std::vector files; for (int i = 1; i < argc; ++i) { @@ -364,6 +374,10 @@ int main(int argc, char **argv) { quiet = true; } else if (strcmp(argv[i], "--float32") == 0) { float32_mode = 1; + } else if (strcmp(argv[i], "--skip-extras-values") == 0) { + skip_extras_values = 1; + } else if (strcmp(argv[i], "--borrow-input-buffers") == 0) { + borrow_input_buffers = 1; } else if (strcmp(argv[i], "--batch") == 0) { /* batch mode: just collect files */ } else if (argv[i][0] != '-') { @@ -379,10 +393,14 @@ int main(int argc, char **argv) { if (!csv && !quiet) { printf("Benchmarking: %s (%d iterations, %d warmup%s)\n", file.c_str(), iterations, warmup, - float32_mode ? ", float32" : ""); + float32_mode ? ", float32" : + skip_extras_values ? ", skip extras" : + borrow_input_buffers ? ", borrow buffers" : ""); } - BenchResult r = bench_file(file.c_str(), iterations, warmup, quiet, float32_mode); + BenchResult r = bench_file(file.c_str(), iterations, warmup, quiet, + float32_mode, skip_extras_values, + borrow_input_buffers); if (csv) { print_csv_row(r); diff --git a/tests/tester_v3_c.c b/tests/tester_v3_c.c index fe477e5..b565c1b 100644 --- a/tests/tester_v3_c.c +++ b/tests/tester_v3_c.c @@ -649,6 +649,196 @@ fail: return 0; } +static int check_buffer_view_range_rejected(void) { + static const uint8_t json[] = + "{\"asset\":{\"version\":\"2.0\"}," + "\"buffers\":[{\"uri\":\"data:application/octet-stream;base64,AAAA\",\"byteLength\":4}]," + "\"bufferViews\":[{\"buffer\":0,\"byteOffset\":2,\"byteLength\":4}]}"; + tg3_model model; + tg3_error_stack errors; + tg3_parse_options opts; + tg3_error_code err; + + tg3_error_stack_init(&errors); + tg3_parse_options_init(&opts); + err = tg3_parse(&model, &errors, json, (uint64_t)(sizeof(json) - 1), "", 0, &opts); + if (err != TG3_ERR_INVALID_ACCESSOR) { + fprintf(stderr, "bufferView OOB expected TG3_ERR_INVALID_ACCESSOR, got %d\n", (int)err); + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 0; + } + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 1; +} + +static int check_accessor_range_rejected(void) { + static const uint8_t json[] = + "{\"asset\":{\"version\":\"2.0\"}," + "\"buffers\":[{\"uri\":\"data:application/octet-stream;base64,AAAA\",\"byteLength\":4}]," + "\"bufferViews\":[{\"buffer\":0,\"byteLength\":4}]," + "\"accessors\":[{\"bufferView\":0,\"componentType\":5126,\"count\":2,\"type\":\"SCALAR\"}]}"; + tg3_model model; + tg3_error_stack errors; + tg3_parse_options opts; + tg3_error_code err; + + tg3_error_stack_init(&errors); + tg3_parse_options_init(&opts); + err = tg3_parse(&model, &errors, json, (uint64_t)(sizeof(json) - 1), "", 0, &opts); + if (err != TG3_ERR_INVALID_ACCESSOR) { + fprintf(stderr, "accessor OOB expected TG3_ERR_INVALID_ACCESSOR, got %d\n", (int)err); + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 0; + } + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 1; +} + +static int check_sparse_accessor_range_rejected(void) { + static const uint8_t json[] = + "{\"asset\":{\"version\":\"2.0\"}," + "\"buffers\":[{\"uri\":\"data:application/octet-stream;base64,AAAAAAAA\",\"byteLength\":6}]," + "\"bufferViews\":[{\"buffer\":0,\"byteOffset\":0,\"byteLength\":2}," + "{\"buffer\":0,\"byteOffset\":2,\"byteLength\":4}]," + "\"accessors\":[{\"componentType\":5126,\"count\":2,\"type\":\"SCALAR\"," + "\"sparse\":{\"count\":2,\"indices\":{\"bufferView\":0,\"componentType\":5123}," + "\"values\":{\"bufferView\":1}}}]}"; + tg3_model model; + tg3_error_stack errors; + tg3_parse_options opts; + tg3_error_code err; + + tg3_error_stack_init(&errors); + tg3_parse_options_init(&opts); + err = tg3_parse(&model, &errors, json, (uint64_t)(sizeof(json) - 1), "", 0, &opts); + if (err != TG3_ERR_INVALID_ACCESSOR) { + fprintf(stderr, "sparse accessor OOB expected TG3_ERR_INVALID_ACCESSOR, got %d\n", (int)err); + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 0; + } + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 1; +} + +static int check_skip_extras_values_opt_in(void) { + static const uint8_t json[] = + "{\"asset\":{\"version\":\"2.0\"}," + "\"extras\":{\"large\":[1,2,3]}," + "\"nodes\":[{\"extensions\":{\"VENDOR_test\":{\"x\":1}}}]}"; + tg3_model model; + tg3_error_stack errors; + tg3_parse_options opts; + tg3_error_code err; + + tg3_error_stack_init(&errors); + tg3_parse_options_init(&opts); + err = tg3_parse(&model, &errors, json, (uint64_t)(sizeof(json) - 1), "", 0, &opts); + if (err != TG3_OK || !model.ext.extras || model.ext.extras->type != TG3_VALUE_OBJECT) { + fprintf(stderr, "default extras materialization failed\n"); + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 0; + } + tg3_model_free(&model); + tg3_error_stack_free(&errors); + + tg3_error_stack_init(&errors); + tg3_parse_options_init(&opts); + opts.skip_extras_values = 1; + err = tg3_parse(&model, &errors, json, (uint64_t)(sizeof(json) - 1), "", 0, &opts); + if (err != TG3_OK || model.ext.extras != NULL || + model.nodes_count != 1 || model.nodes[0].ext.extensions_count != 1 || + model.nodes[0].ext.extensions[0].value.type != TG3_VALUE_NULL) { + fprintf(stderr, "skip_extras_values did not skip value trees as expected\n"); + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 0; + } + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 1; +} + +static int check_json_limits_rejected(void) { + static const uint8_t json[] = + "{\"asset\":{\"version\":\"2.0\"},\"nodes\":[{\"name\":\"abcdef\"}]}"; + tg3_model model; + tg3_error_stack errors; + tg3_parse_options opts; + tg3_error_code err; + + tg3_error_stack_init(&errors); + tg3_parse_options_init(&opts); + opts.memory.max_single_alloc = 4; + err = tg3_parse(&model, &errors, json, (uint64_t)(sizeof(json) - 1), "", 0, &opts); + if (err != TG3_ERR_JSON_PARSE) { + fprintf(stderr, "small max_single_alloc expected JSON parse failure, got %d\n", (int)err); + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 0; + } + tg3_model_free(&model); + tg3_error_stack_free(&errors); + return 1; +} + +static int check_borrow_input_buffers(void) { + static const char json[] = + "{\"asset\":{\"version\":\"2.0\"},\"buffers\":[{\"byteLength\":4}]}"; + tg3_model model; + tg3_error_stack errors; + tg3_parse_options opts; + tg3_error_code err; + uint32_t json_len = (uint32_t)(sizeof(json) - 1); + uint32_t json_padded = (json_len + 3u) & ~3u; + uint32_t bin_len = 4; + uint32_t total = 12u + 8u + json_padded + 8u + bin_len; + uint32_t version = 2; + uint32_t json_type = 0x4E4F534Au; + uint32_t bin_type = 0x004E4942u; + uint8_t *glb = (uint8_t *)malloc(total); + uint32_t bin_off = 12u + 8u + json_padded + 8u; + int ok = 0; + + if (!glb) return 0; + memset(glb, ' ', total); + memcpy(glb, "glTF", 4); + memcpy(glb + 4, &version, 4); + memcpy(glb + 8, &total, 4); + memcpy(glb + 12, &json_padded, 4); + memcpy(glb + 16, &json_type, 4); + memcpy(glb + 20, json, json_len); + memcpy(glb + 20 + json_padded, &bin_len, 4); + memcpy(glb + 24 + json_padded, &bin_type, 4); + glb[bin_off + 0] = 1; + glb[bin_off + 1] = 2; + glb[bin_off + 2] = 3; + glb[bin_off + 3] = 4; + + tg3_error_stack_init(&errors); + tg3_parse_options_init(&opts); + opts.borrow_input_buffers = 1; + err = tg3_parse_glb(&model, &errors, glb, (uint64_t)total, "", 0, &opts); + if (err == TG3_OK && model.buffers_count == 1 && + model.buffers[0].data.data == glb + bin_off && + model.buffers[0].data.count == 4) { + ok = 1; + } else { + fprintf(stderr, "borrow_input_buffers failed: err=%d buffers=%u\n", + (int)err, model.buffers_count); + } + tg3_model_free(&model); + tg3_error_stack_free(&errors); + free(glb); + return ok; +} + static int parse_file_arg(const char *path) { FILE *fp = fopen(path, "rb"); uint8_t *buf; @@ -768,5 +958,23 @@ int main(int argc, char **argv) { if (!check_error_messages_survive_parse_failure()) { return 1; } + if (!check_buffer_view_range_rejected()) { + return 1; + } + if (!check_accessor_range_rejected()) { + return 1; + } + if (!check_sparse_accessor_range_rejected()) { + return 1; + } + if (!check_skip_extras_values_opt_in()) { + return 1; + } + if (!check_json_limits_rejected()) { + return 1; + } + if (!check_borrow_input_buffers()) { + return 1; + } return 0; } diff --git a/tiny_gltf_v3.c b/tiny_gltf_v3.c index f7a0d50..5120890 100644 --- a/tiny_gltf_v3.c +++ b/tiny_gltf_v3.c @@ -504,6 +504,18 @@ static double tg3__json_number_to_double(const tg3json_value *v) { static int tg3__json_is_object(const tg3json_value *v) { return v && v->type == TG3JSON_OBJECT; } static int tg3__json_is_array(const tg3json_value *v) { return v && v->type == TG3JSON_ARRAY; } +static int tg3__u64_add_overflow(uint64_t a, uint64_t b, uint64_t *out) { + if (a > UINT64_MAX - b) return 1; + *out = a + b; + return 0; +} + +static int tg3__u64_mul_overflow(uint64_t a, uint64_t b, uint64_t *out) { + if (a != 0 && b > UINT64_MAX / a) return 1; + *out = a * b; + return 0; +} + static int tg3__json_has(const tg3json_value *o, const char *key) { return tg3json_object_get(o, key) ? 1 : 0; } @@ -830,10 +842,12 @@ static void tg3__parse_extras_and_extensions(tg3__parse_ctx *ctx, const tg3json_ memset(ee, 0, sizeof(*ee)); extras_it = tg3__json_get(o, "extras"); if (extras_it) { - tg3_value *ev = (tg3_value *)tg3__arena_alloc(ctx->arena, sizeof(tg3_value)); - if (ev) { - *ev = tg3__json_to_value(ctx, extras_it); - ee->extras = ev; + if (!ctx->opts.skip_extras_values) { + tg3_value *ev = (tg3_value *)tg3__arena_alloc(ctx->arena, sizeof(tg3_value)); + if (ev) { + *ev = tg3__json_to_value(ctx, extras_it); + ee->extras = ev; + } } if (ctx->opts.store_original_json) { size_t raw_len = 0; @@ -853,8 +867,13 @@ static void tg3__parse_extras_and_extensions(tg3__parse_ctx *ctx, const tg3json_ if (exts) { for (i = 0; i < count; ++i) { const tg3json_object_entry *entry = tg3json_object_at(ext_it, i); + memset(&exts[i], 0, sizeof(exts[i])); exts[i].name = tg3__arena_str(ctx->arena, entry->key, (uint32_t)entry->key_len); - exts[i].value = tg3__json_to_value(ctx, entry->value); + if (!ctx->opts.skip_extras_values) { + exts[i].value = tg3__json_to_value(ctx, entry->value); + } else { + exts[i].value.type = TG3_VALUE_NULL; + } } ee->extensions = exts; ee->extensions_count = (uint32_t)count; @@ -1096,6 +1115,7 @@ static int tg3__parse_buffer(tg3__parse_ctx *ctx, const tg3json_value *o, tg3__parse_string(ctx, o, "name", &buf->name, 0, "/buffer"); tg3__parse_string(ctx, o, "uri", &buf->uri, 0, "/buffer"); tg3__parse_uint64(ctx, o, "byteLength", &byte_length, 1, "/buffer"); + buf->byte_length = byte_length; if (ctx->is_binary && buf_idx == 0 && buf->uri.len == 0) { uint8_t *data; if (!ctx->bin_data || ctx->bin_size < byte_length) { @@ -1103,13 +1123,24 @@ static int tg3__parse_buffer(tg3__parse_ctx *ctx, const tg3json_value *o, "GLB BIN chunk missing or smaller than buffer.byteLength", NULL, -1); return 0; } + if (ctx->opts.borrow_input_buffers) { + buf->data.data = ctx->bin_data; + buf->data.count = byte_length; + tg3__parse_extras_and_extensions(ctx, o, &buf->ext); + return 1; + } + if (byte_length > (uint64_t)((size_t)-1)) { + tg3__error_push(ctx->errors, TG3_SEVERITY_ERROR, TG3_ERR_OUT_OF_MEMORY, + "buffer.byteLength exceeds addressable size", NULL, -1); + return 0; + } data = (uint8_t *)tg3__arena_alloc(ctx->arena, (size_t)byte_length); - if (!data) { + if (!data && byte_length > 0) { tg3__error_push(ctx->errors, TG3_SEVERITY_ERROR, TG3_ERR_OUT_OF_MEMORY, "OOM for buffer data", NULL, -1); return 0; } - memcpy(data, ctx->bin_data, (size_t)byte_length); + if (byte_length > 0) memcpy(data, ctx->bin_data, (size_t)byte_length); buf->data.data = data; buf->data.count = byte_length; } else if (buf->uri.len > 0) { @@ -1130,11 +1161,22 @@ static int tg3__parse_buffer(tg3__parse_ctx *ctx, const tg3json_value *o, uint8_t *file_data = NULL; uint64_t file_size = 0; if (tg3__load_external_file(ctx, &file_data, &file_size, buf->uri.data, buf->uri.len)) { - uint8_t *data = (uint8_t *)tg3__arena_alloc(ctx->arena, (size_t)file_size); + uint8_t *data = NULL; + if (file_size > (uint64_t)((size_t)-1)) { + tg3__error_push(ctx->errors, TG3_SEVERITY_ERROR, TG3_ERR_OUT_OF_MEMORY, + "external buffer exceeds addressable size", NULL, -1); + if (ctx->opts.fs.free_file) ctx->opts.fs.free_file(file_data, file_size, ctx->opts.fs.user_data); + tg3__parse_extras_and_extensions(ctx, o, &buf->ext); + return 0; + } + data = (uint8_t *)tg3__arena_alloc(ctx->arena, (size_t)file_size); if (data) { memcpy(data, file_data, (size_t)file_size); buf->data.data = data; buf->data.count = file_size; + } else if (file_size > 0) { + tg3__error_push(ctx->errors, TG3_SEVERITY_ERROR, TG3_ERR_OUT_OF_MEMORY, + "OOM for external buffer data", NULL, -1); } if (ctx->opts.fs.free_file) ctx->opts.fs.free_file(file_data, file_size, ctx->opts.fs.user_data); } @@ -1808,6 +1850,176 @@ static int tg3__validate_indices(tg3__parse_ctx *ctx, const tg3_model *m) { return errs == 0; } +static int tg3__valid_accessor_component_type(int32_t component_type) { + switch (component_type) { + case TG3_COMPONENT_TYPE_BYTE: + case TG3_COMPONENT_TYPE_UNSIGNED_BYTE: + case TG3_COMPONENT_TYPE_SHORT: + case TG3_COMPONENT_TYPE_UNSIGNED_SHORT: + case TG3_COMPONENT_TYPE_UNSIGNED_INT: + case TG3_COMPONENT_TYPE_FLOAT: + return 1; + default: + return 0; + } +} + +static int tg3__valid_sparse_index_component_type(int32_t component_type) { + return component_type == TG3_COMPONENT_TYPE_UNSIGNED_BYTE || + component_type == TG3_COMPONENT_TYPE_UNSIGNED_SHORT || + component_type == TG3_COMPONENT_TYPE_UNSIGNED_INT; +} + +static int tg3__validate_range_in_buffer_view(tg3__parse_ctx *ctx, + const tg3_buffer_view *bv, + uint64_t byte_offset, + uint64_t byte_length, + const char *what) { + uint64_t end; + int overflow = tg3__u64_add_overflow(byte_offset, byte_length, &end); + if (overflow || end > bv->byte_length) { + tg3__error_pushf(ctx->errors, ctx->arena, TG3_SEVERITY_ERROR, + TG3_ERR_INVALID_ACCESSOR, "/accessors", + "%s byte range [%llu,%llu) exceeds bufferView length %llu", + what, + (unsigned long long)byte_offset, + (unsigned long long)(overflow ? UINT64_MAX : end), + (unsigned long long)bv->byte_length); + return 0; + } + return 1; +} + +static int tg3__validate_resources(tg3__parse_ctx *ctx, const tg3_model *m) { + uint32_t i; + int ok = 1; + + for (i = 0; i < m->buffers_count; ++i) { + const tg3_buffer *b = &m->buffers[i]; + if (b->data.count > 0 && b->byte_length > b->data.count) { + tg3__error_pushf(ctx->errors, ctx->arena, TG3_SEVERITY_ERROR, + TG3_ERR_BUFFER_SIZE_MISMATCH, "/buffers", + "buffers[%u].byteLength %llu exceeds loaded data size %llu", + i, (unsigned long long)b->byte_length, + (unsigned long long)b->data.count); + ok = 0; + } + } + + for (i = 0; i < m->buffer_views_count; ++i) { + const tg3_buffer_view *bv = &m->buffer_views[i]; + const tg3_buffer *b; + uint64_t buffer_size; + uint64_t end; + if (bv->buffer < 0 || (uint32_t)bv->buffer >= m->buffers_count) { + continue; + } + b = &m->buffers[bv->buffer]; + buffer_size = b->byte_length ? b->byte_length : b->data.count; + if (tg3__u64_add_overflow(bv->byte_offset, bv->byte_length, &end) || + end > buffer_size) { + tg3__error_pushf(ctx->errors, ctx->arena, TG3_SEVERITY_ERROR, + TG3_ERR_INVALID_BUFFER_VIEW, "/bufferViews", + "bufferViews[%u] byte range exceeds buffers[%d].byteLength", + i, bv->buffer); + ok = 0; + } + } + + for (i = 0; i < m->accessors_count; ++i) { + const tg3_accessor *a = &m->accessors[i]; + int32_t comp_size; + int32_t num_comp; + uint64_t elem_size; + if (!tg3__valid_accessor_component_type(a->component_type)) { + tg3__error_pushf(ctx->errors, ctx->arena, TG3_SEVERITY_ERROR, + TG3_ERR_INVALID_ACCESSOR, "/accessors", + "accessors[%u].componentType %d is invalid", + i, a->component_type); + ok = 0; + continue; + } + comp_size = tg3_component_size(a->component_type); + num_comp = tg3_num_components(a->type); + if (comp_size <= 0 || num_comp <= 0) { + tg3__error_pushf(ctx->errors, ctx->arena, TG3_SEVERITY_ERROR, + TG3_ERR_INVALID_ACCESSOR, "/accessors", + "accessors[%u].type %d is invalid", i, a->type); + ok = 0; + continue; + } + if (tg3__u64_mul_overflow((uint64_t)comp_size, (uint64_t)num_comp, &elem_size)) { + tg3__error_pushf(ctx->errors, ctx->arena, TG3_SEVERITY_ERROR, + TG3_ERR_INVALID_ACCESSOR, "/accessors", + "accessors[%u] element size overflows", i); + ok = 0; + continue; + } + if (a->buffer_view >= 0 && (uint32_t)a->buffer_view < m->buffer_views_count) { + const tg3_buffer_view *bv = &m->buffer_views[a->buffer_view]; + uint64_t stride = bv->byte_stride ? (uint64_t)bv->byte_stride : elem_size; + uint64_t last_offset = 0; + uint64_t span = 0; + if (stride < elem_size || + (a->count > 0 && + (tg3__u64_mul_overflow(a->count - 1u, stride, &last_offset) || + tg3__u64_add_overflow(last_offset, elem_size, &span)))) { + tg3__error_pushf(ctx->errors, ctx->arena, TG3_SEVERITY_ERROR, + TG3_ERR_INVALID_ACCESSOR, "/accessors", + "accessors[%u] byte stride/count overflows", i); + ok = 0; + } else if (!tg3__validate_range_in_buffer_view(ctx, bv, a->byte_offset, + a->count > 0 ? span : 0, + "accessor")) { + ok = 0; + } + } + if (a->sparse.is_sparse) { + uint64_t sparse_count; + if (a->sparse.count < 0 || (uint64_t)a->sparse.count > a->count) { + tg3__error_pushf(ctx->errors, ctx->arena, TG3_SEVERITY_ERROR, + TG3_ERR_INVALID_ACCESSOR, "/accessors", + "accessors[%u].sparse.count %d is invalid", + i, a->sparse.count); + ok = 0; + continue; + } + sparse_count = (uint64_t)a->sparse.count; + if (!tg3__valid_sparse_index_component_type(a->sparse.indices.component_type)) { + tg3__error_pushf(ctx->errors, ctx->arena, TG3_SEVERITY_ERROR, + TG3_ERR_INVALID_ACCESSOR, "/accessors", + "accessors[%u].sparse.indices.componentType %d is invalid", + i, a->sparse.indices.component_type); + ok = 0; + } + if (a->sparse.indices.buffer_view >= 0 && + (uint32_t)a->sparse.indices.buffer_view < m->buffer_views_count) { + const tg3_buffer_view *bv = &m->buffer_views[a->sparse.indices.buffer_view]; + uint64_t bytes = 0; + int32_t idx_size_i = tg3_component_size(a->sparse.indices.component_type); + if (idx_size_i <= 0 || + tg3__u64_mul_overflow(sparse_count, (uint64_t)idx_size_i, &bytes) || + !tg3__validate_range_in_buffer_view(ctx, bv, a->sparse.indices.byte_offset, + bytes, "sparse indices")) { + ok = 0; + } + } + if (a->sparse.values.buffer_view >= 0 && + (uint32_t)a->sparse.values.buffer_view < m->buffer_views_count) { + const tg3_buffer_view *bv = &m->buffer_views[a->sparse.values.buffer_view]; + uint64_t bytes = 0; + if (tg3__u64_mul_overflow(sparse_count, elem_size, &bytes) || + !tg3__validate_range_in_buffer_view(ctx, bv, a->sparse.values.byte_offset, + bytes, "sparse values")) { + ok = 0; + } + } + } + } + + return ok; +} + static tg3_error_code tg3__parse_from_json(tg3__parse_ctx *ctx, const tg3json_value *json_doc, tg3_model *model) { const tg3json_value *asset_it = tg3json_object_get(json_doc, "asset"); const tg3json_value *ext_it; @@ -1873,9 +2085,22 @@ static tg3_error_code tg3__parse_from_json(tg3__parse_ctx *ctx, const tg3json_va return TG3_ERR_INVALID_INDEX; } } + if (!tg3__validate_resources(ctx, model)) { + return TG3_ERR_INVALID_ACCESSOR; + } return (ctx->errors && ctx->errors->has_error) ? TG3_ERR_JSON_PARSE : TG3_OK; } +static void tg3__json_parse_options_from_tg3(const tg3_parse_options *options, + tg3json_parse_options *json_options) { + memset(json_options, 0, sizeof(*json_options)); + json_options->depth_limit = TINYGLTF3_MAX_NESTING_DEPTH; + json_options->memory_budget = options ? (size_t)options->memory.memory_budget : 0; + json_options->max_single_alloc = options ? (size_t)options->memory.max_single_alloc : 0; + json_options->max_string_length = TINYGLTF3_MAX_STRING_LENGTH; + json_options->parse_float32 = options ? options->parse_float32 : 0; +} + static tg3_error_code tg3__parse_glb_header(const uint8_t *data, uint64_t size, const uint8_t **json_out, uint64_t *json_size_out, const uint8_t **bin_out, uint64_t *bin_size_out, @@ -1901,17 +2126,17 @@ static tg3_error_code tg3__parse_glb_header(const uint8_t *data, uint64_t size, return TG3_ERR_GLB_INVALID_VERSION; } memcpy(&total_length, data + 8, 4); - if ((uint64_t)total_length > size) { + if ((uint64_t)total_length != size) { tg3__error_push(errors, TG3_SEVERITY_ERROR, TG3_ERR_GLB_SIZE_MISMATCH, - "GLB total length exceeds data size", NULL, -1); + "GLB total length does not match data size", NULL, -1); return TG3_ERR_GLB_SIZE_MISMATCH; } - while (offset + 8 <= size) { + while (offset + 8 <= (uint64_t)total_length) { uint32_t chunk_length; uint32_t chunk_type; memcpy(&chunk_length, data + offset, 4); offset += 4; memcpy(&chunk_type, data + offset, 4); offset += 4; - if (offset + chunk_length > size) { + if (offset + chunk_length > (uint64_t)total_length) { tg3__error_push(errors, TG3_SEVERITY_ERROR, TG3_ERR_GLB_CHUNK_ERROR, "GLB chunk exceeds data size", NULL, -1); return TG3_ERR_GLB_CHUNK_ERROR; @@ -1942,19 +2167,23 @@ TINYGLTF3_API tg3_error_code tg3_parse(tg3_model *model, tg3_error_stack *errors tg3_arena *arena; tg3__parse_ctx ctx; tg3json_value json_doc; + tg3json_parse_options json_options; const char *error_pos = NULL; tg3_error_code ret; int parsed_ok; - if (!options) { tg3_parse_options_init(&default_opts); options = &default_opts; } + if (!model) return TG3_ERR_JSON_PARSE; tg3__model_init(model); + if (!json_data) return TG3_ERR_JSON_PARSE; + if (!options) { tg3_parse_options_init(&default_opts); options = &default_opts; } arena = tg3__arena_create(&options->memory); if (!arena) { tg3__error_push(errors, TG3_SEVERITY_ERROR, TG3_ERR_OUT_OF_MEMORY, "Failed to create arena", NULL, -1); return TG3_ERR_OUT_OF_MEMORY; } model->arena_ = arena; - parsed_ok = tg3json_parse_n((const char *)json_data, (size_t)json_size, - TINYGLTF3_MAX_NESTING_DEPTH, &json_doc, &error_pos); + tg3__json_parse_options_from_tg3(options, &json_options); + parsed_ok = tg3json_parse_n_opts((const char *)json_data, (size_t)json_size, + &json_options, &json_doc, &error_pos); if (!parsed_ok || json_doc.type != TG3JSON_OBJECT) { tg3__error_push(errors, TG3_SEVERITY_ERROR, TG3_ERR_JSON_PARSE, "Failed to parse JSON", NULL, error_pos ? (int64_t)(error_pos - (const char *)json_data) : -1); @@ -1992,13 +2221,16 @@ TINYGLTF3_API tg3_error_code tg3_parse_glb(tg3_model *model, tg3_error_stack *er tg3_arena *arena; tg3__parse_ctx ctx; tg3json_value json_doc; + tg3json_parse_options json_options; const char *error_pos = NULL; tg3_error_code err; int parsed_ok; /* Initialize model before any failure-return so callers can safely call * tg3_model_free() on the error path; the GLB header parse must not run * against a model whose arena_ field is uninitialized garbage. */ + if (!model) return TG3_ERR_GLB_INVALID_HEADER; tg3__model_init(model); + if (!glb_data) return TG3_ERR_GLB_INVALID_HEADER; err = tg3__parse_glb_header(glb_data, glb_size, &json_chunk, &json_chunk_size, &bin_chunk, &bin_chunk_size, errors); if (err != TG3_OK) return err; if (!options) { tg3_parse_options_init(&default_opts); options = &default_opts; } @@ -2008,8 +2240,9 @@ TINYGLTF3_API tg3_error_code tg3_parse_glb(tg3_model *model, tg3_error_stack *er return TG3_ERR_OUT_OF_MEMORY; } model->arena_ = arena; - parsed_ok = tg3json_parse_n((const char *)json_chunk, (size_t)json_chunk_size, - TINYGLTF3_MAX_NESTING_DEPTH, &json_doc, &error_pos); + tg3__json_parse_options_from_tg3(options, &json_options); + parsed_ok = tg3json_parse_n_opts((const char *)json_chunk, (size_t)json_chunk_size, + &json_options, &json_doc, &error_pos); if (!parsed_ok || json_doc.type != TG3JSON_OBJECT) { tg3__error_push(errors, TG3_SEVERITY_ERROR, TG3_ERR_JSON_PARSE, "Failed to parse GLB JSON chunk", NULL, error_pos ? (int64_t)(error_pos - (const char *)json_chunk) : -1); @@ -2039,6 +2272,11 @@ TINYGLTF3_API tg3_error_code tg3_parse_auto(tg3_model *model, tg3_error_stack *e const uint8_t *data, uint64_t size, const char *base_dir, uint32_t base_dir_len, const tg3_parse_options *options) { + if (!model) return TG3_ERR_JSON_PARSE; + if (!data && size > 0) { + tg3__model_init(model); + return TG3_ERR_JSON_PARSE; + } if (size >= 4 && data[0] == 'g' && data[1] == 'l' && data[2] == 'T' && data[3] == 'F') { return tg3_parse_glb(model, errors, data, size, base_dir, base_dir_len, options); } @@ -2055,9 +2293,11 @@ TINYGLTF3_API tg3_error_code tg3_parse_file(tg3_model *model, tg3_error_stack *e uint32_t base_dir_len = 0; tg3_error_code result; uint32_t i; + if (!model) return TG3_ERR_FILE_NOT_FOUND; + tg3__model_init(model); + if (!filename) return TG3_ERR_FILE_NOT_FOUND; if (options) opts = *options; else tg3_parse_options_init(&opts); - tg3__model_init(model); #ifdef TINYGLTF3_ENABLE_FS tg3__set_default_fs(&opts.fs); #endif @@ -2464,7 +2704,8 @@ static int tg3__serialize_buffer(const tg3_buffer *b, int wd, int embed, tg3json (void)wd; tg3json_value_init_object(out); if (!tg3__serialize_str(out, "name", b->name) || - !tg3__json_set_int(out, "byteLength", (int64_t)b->data.count)) { + !tg3__json_set_int(out, "byteLength", + (int64_t)(b->byte_length ? b->byte_length : b->data.count))) { tg3json_value_free(out); return 0; } diff --git a/tiny_gltf_v3.h b/tiny_gltf_v3.h index 7bbdadb..3e4afe0 100644 --- a/tiny_gltf_v3.h +++ b/tiny_gltf_v3.h @@ -418,6 +418,7 @@ typedef struct tg3_asset { /* --- Buffer --- */ typedef struct tg3_buffer { tg3_str name; + uint64_t byte_length; /* Declared buffer.byteLength */ tg3_span_u8 data; tg3_str uri; tg3_extras_ext ext; @@ -941,6 +942,10 @@ typedef struct tg3_parse_options { int32_t images_as_is; /* 1 = don't decode images */ int32_t preserve_image_channels; /* 1 = keep original channels */ int32_t store_original_json; /* 1 = store raw JSON strings */ + int32_t skip_extras_values; /* 1 = skip materializing extras and + * unknown extension value trees */ + int32_t borrow_input_buffers; /* 1 = GLB BIN buffer spans may point + * into caller-owned input data */ int32_t parse_float32; /* 1 = parse JSON floats as float32 for speed * (breaks strict double-precision conformance * but sufficient for glTF data which is diff --git a/tinygltf_json_c.h b/tinygltf_json_c.h index 13bf284..380a01d 100644 --- a/tinygltf_json_c.h +++ b/tinygltf_json_c.h @@ -58,6 +58,17 @@ int tg3json_parse_n(const char *data, size_t len, size_t depth_limit, tg3json_value *out_value, const char **out_error_pos); int tg3json_parse(const char *begin, const char *end, size_t depth_limit, tg3json_value *out_value, const char **out_error_pos); +typedef struct tg3json_parse_options { + size_t depth_limit; /* 0 = default */ + size_t memory_budget; /* 0 = unlimited */ + size_t max_single_alloc; /* 0 = unlimited */ + size_t max_string_length; /* 0 = unlimited */ + int parse_float32; /* 1 = round JSON reals to float */ +} tg3json_parse_options; +int tg3json_parse_n_opts(const char *data, size_t len, + const tg3json_parse_options *options, + tg3json_value *out_value, + const char **out_error_pos); void tg3json_value_free(tg3json_value *value); void tg3json_value_init_null(tg3json_value *value); void tg3json_value_init_bool(tg3json_value *value, int boolean_value); @@ -109,9 +120,15 @@ typedef struct tg3json__parser { const char *end; const char *error; size_t depth_limit; + size_t memory_budget; + size_t max_single_alloc; + size_t max_string_length; + size_t allocated; + int parse_float32; } tg3json__parser; typedef struct tg3json__buffer { + tg3json__parser *parser; char *data; size_t len; size_t cap; @@ -131,6 +148,20 @@ static char *tg3json__strndup_local(const char *src, size_t len) { return dst; } +static void *tg3json__parser_alloc(tg3json__parser *parser, size_t size) { + void *ptr; + if (!parser) return malloc(size); + if (parser->max_single_alloc && size > parser->max_single_alloc) return NULL; + if (parser->memory_budget && + (size > parser->memory_budget || parser->allocated > parser->memory_budget - size)) { + return NULL; + } + ptr = malloc(size); + if (!ptr) return NULL; + parser->allocated += size; + return ptr; +} + static int tg3json__reserve_bytes(void **ptr, size_t elem_size, size_t needed, size_t *capacity) { void *new_ptr; @@ -154,6 +185,44 @@ static int tg3json__reserve_bytes(void **ptr, size_t elem_size, return 1; } +static int tg3json__reserve_bytes_parser(tg3json__parser *parser, void **ptr, + size_t elem_size, size_t needed, + size_t *capacity) { + void *new_ptr; + size_t new_cap; + size_t old_bytes; + size_t new_bytes; + + if (needed <= *capacity) return 1; + new_cap = (*capacity > 0) ? *capacity : 8; + while (new_cap < needed) { + if (new_cap > ((size_t)-1) / 2) { + new_cap = needed; + break; + } + new_cap *= 2; + } + + if (elem_size != 0 && new_cap > ((size_t)-1) / elem_size) return 0; + old_bytes = elem_size * (*capacity); + new_bytes = elem_size * new_cap; + if (parser) { + size_t delta = (new_bytes > old_bytes) ? (new_bytes - old_bytes) : 0; + if (parser->max_single_alloc && new_bytes > parser->max_single_alloc) return 0; + if (parser->memory_budget && delta > 0 && + (delta > parser->memory_budget || + parser->allocated > parser->memory_budget - delta)) { + return 0; + } + } + new_ptr = realloc(*ptr, new_bytes); + if (!new_ptr) return 0; + if (parser && new_bytes > old_bytes) parser->allocated += new_bytes - old_bytes; + *ptr = new_ptr; + *capacity = new_cap; + return 1; +} + static const char *tg3json__skip_ws(const char *p, const char *end) { while (p < end) { unsigned char c = (unsigned char)*p; @@ -169,7 +238,8 @@ static void tg3json__set_error(tg3json__parser *parser, const char *pos) { static int tg3json__buf_append(tg3json__buffer *buf, const char *src, size_t len) { if (len == 0) return 1; - if (!tg3json__reserve_bytes((void **)&buf->data, 1, buf->len + len + 1, &buf->cap)) { + if (!tg3json__reserve_bytes_parser(buf->parser, (void **)&buf->data, 1, + buf->len + len + 1, &buf->cap)) { return 0; } memcpy(buf->data + buf->len, src, len); @@ -179,7 +249,8 @@ static int tg3json__buf_append(tg3json__buffer *buf, const char *src, size_t len } static int tg3json__buf_putc(tg3json__buffer *buf, char c) { - if (!tg3json__reserve_bytes((void **)&buf->data, 1, buf->len + 2, &buf->cap)) { + if (!tg3json__reserve_bytes_parser(buf->parser, (void **)&buf->data, 1, + buf->len + 2, &buf->cap)) { return 0; } buf->data[buf->len++] = c; @@ -230,6 +301,7 @@ static int tg3json__parse_string_raw(tg3json__parser *parser, tg3json__buffer buf; const char *start; memset(&buf, 0, sizeof(buf)); + buf.parser = parser; if (parser->cur >= parser->end || *parser->cur != '"') { tg3json__set_error(parser, parser->cur); @@ -241,6 +313,8 @@ static int tg3json__parse_string_raw(tg3json__parser *parser, while (parser->cur < parser->end) { unsigned char c = (unsigned char)*parser->cur; if (c == '"') { + size_t final_len = buf.len + (size_t)(parser->cur - start); + if (parser->max_string_length && final_len > parser->max_string_length) goto oom; if (!tg3json__buf_append(&buf, start, (size_t)(parser->cur - start))) goto oom; ++parser->cur; *out_str = buf.data; @@ -249,6 +323,8 @@ static int tg3json__parse_string_raw(tg3json__parser *parser, } if (c == '\\') { uint32_t codepoint; + size_t pending_len = (size_t)(parser->cur - start); + if (parser->max_string_length && buf.len + pending_len > parser->max_string_length) goto oom; if (!tg3json__buf_append(&buf, start, (size_t)(parser->cur - start))) goto oom; ++parser->cur; if (parser->cur >= parser->end) break; @@ -304,6 +380,37 @@ oom: static int tg3json__parse_value(tg3json__parser *parser, size_t depth, tg3json_value *out_value); +static int tg3json__parse_int64_span(const char *start, const char *end, + int64_t *out) { + const char *p = start; + uint64_t value = 0; + uint64_t limit = (uint64_t)INT64_MAX; + int neg = 0; + if (p < end && *p == '-') { + neg = 1; + limit += 1u; + ++p; + } + if (p >= end) return 0; + while (p < end) { + unsigned digit = (unsigned)(*p - '0'); + if (digit > 9u) return 0; + if (value > (limit - digit) / 10u) return 0; + value = value * 10u + (uint64_t)digit; + ++p; + } + if (neg) { + if (value == ((uint64_t)INT64_MAX + 1u)) { + *out = INT64_MIN; + } else { + *out = -(int64_t)value; + } + } else { + *out = (int64_t)value; + } + return 1; +} + static int tg3json__parse_array(tg3json__parser *parser, size_t depth, tg3json_value *out_value) { tg3json_value *items = NULL; @@ -323,7 +430,8 @@ static int tg3json__parse_array(tg3json__parser *parser, size_t depth, while (parser->cur < parser->end) { tg3json_value value; tg3json__init_value(&value); - if (!tg3json__reserve_bytes((void **)&items, sizeof(*items), count + 1, &cap)) goto oom; + if (!tg3json__reserve_bytes_parser(parser, (void **)&items, + sizeof(*items), count + 1, &cap)) goto oom; if (!tg3json__parse_value(parser, depth + 1, &value)) goto fail; items[count++] = value; parser->cur = tg3json__skip_ws(parser->cur, parser->end); @@ -395,14 +503,15 @@ static int tg3json__parse_object(tg3json__parser *parser, size_t depth, free(key); goto fail; } - if (!tg3json__reserve_bytes((void **)&items, sizeof(*items), count + 1, &cap)) { + if (!tg3json__reserve_bytes_parser(parser, (void **)&items, + sizeof(*items), count + 1, &cap)) { free(key); tg3json_value_free(&value); goto oom; } items[count].key = key; items[count].key_len = key_len; - items[count].value = (tg3json_value *)malloc(sizeof(tg3json_value)); + items[count].value = (tg3json_value *)tg3json__parser_alloc(parser, sizeof(tg3json_value)); if (!items[count].value) { free(key); tg3json_value_free(&value); @@ -485,29 +594,25 @@ static int tg3json__parse_number(tg3json__parser *parser, tg3json_value *out_val do { ++p; } while (p < parser->end && *p >= '0' && *p <= '9'); } - len = (size_t)(p - start); - if (len + 1 > sizeof(stack_buf)) { - num_buf = (char *)malloc(len + 1); - if (!num_buf) goto oom; - } - memcpy(num_buf, start, len); - num_buf[len] = '\0'; - if (!is_real) { - char *endptr = NULL; - long long v; - errno = 0; - v = strtoll(num_buf, &endptr, 10); - if (errno == 0 && endptr == num_buf + len) { + int64_t v; + if (tg3json__parse_int64_span(start, p, &v)) { out_value->type = TG3JSON_INT; - out_value->u.integer = (int64_t)v; + out_value->u.integer = v; parser->cur = p; - if (num_buf != stack_buf) free(num_buf); return 1; } is_real = 1; } + len = (size_t)(p - start); + if (len + 1 > sizeof(stack_buf)) { + num_buf = (char *)tg3json__parser_alloc(parser, len + 1); + if (!num_buf) goto oom; + } + memcpy(num_buf, start, len); + num_buf[len] = '\0'; + { char *endptr = NULL; errno = 0; @@ -516,6 +621,7 @@ static int tg3json__parse_number(tg3json__parser *parser, tg3json_value *out_val if (num_buf != stack_buf) free(num_buf); goto fail; } + if (parser->parse_float32) out_value->u.real = (double)(float)out_value->u.real; out_value->type = TG3JSON_REAL; parser->cur = p; if (num_buf != stack_buf) free(num_buf); @@ -584,14 +690,25 @@ static int tg3json__parse_value(tg3json__parser *parser, size_t depth, return 0; } -int tg3json_parse_n(const char *data, size_t len, size_t depth_limit, - tg3json_value *out_value, const char **out_error_pos) { +int tg3json_parse_n_opts(const char *data, size_t len, + const tg3json_parse_options *options, + tg3json_value *out_value, + const char **out_error_pos) { tg3json__parser parser; + if (!data || !out_value) { + if (out_error_pos) *out_error_pos = data; + return 0; + } tg3json__init_value(out_value); parser.cur = data; parser.end = data + len; parser.error = NULL; - parser.depth_limit = depth_limit ? depth_limit : 512; + parser.depth_limit = (options && options->depth_limit) ? options->depth_limit : 512; + parser.memory_budget = options ? options->memory_budget : 0; + parser.max_single_alloc = options ? options->max_single_alloc : 0; + parser.max_string_length = options ? options->max_string_length : 0; + parser.allocated = 0; + parser.parse_float32 = options ? options->parse_float32 : 0; if (!tg3json__parse_value(&parser, 0, out_value)) { if (out_error_pos) *out_error_pos = parser.error; @@ -608,6 +725,14 @@ int tg3json_parse_n(const char *data, size_t len, size_t depth_limit, return 1; } +int tg3json_parse_n(const char *data, size_t len, size_t depth_limit, + tg3json_value *out_value, const char **out_error_pos) { + tg3json_parse_options options; + memset(&options, 0, sizeof(options)); + options.depth_limit = depth_limit; + return tg3json_parse_n_opts(data, len, &options, out_value, out_error_pos); +} + int tg3json_parse(const char *begin, const char *end, size_t depth_limit, tg3json_value *out_value, const char **out_error_pos) { if (!begin || !end || end < begin) {