diff --git a/CMakeLists.txt b/CMakeLists.txt index a91f23b..23ccb14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,7 +247,7 @@ set(ENCODER_LIB_SRC_LIST encoder/basisu_astc_hdr_common.cpp encoder/basisu_astc_ldr_common.cpp encoder/basisu_astc_ldr_encode.cpp - encoder/3rdparty/tinyexr.cpp + encoder/basisu_tinyexr.cpp transcoder/basisu_transcoder.cpp encoder/basisu_astc_hdr_6x6_enc.h encoder/basisu_astc_hdr_common.h diff --git a/encoder/3rdparty/tinyexr.h b/encoder/3rdparty/tinyexr.h index 2b759ee..dfd7164 100644 --- a/encoder/3rdparty/tinyexr.h +++ b/encoder/3rdparty/tinyexr.h @@ -1,5 +1,3 @@ -// rg 8/23/2024: I fixed some minor undefined behavior in this module (signed 32-bit left shifts). - #ifndef TINYEXR_H_ #define TINYEXR_H_ /* @@ -109,9 +107,6 @@ extern "C" { // required if this flag is 0 and TINYEXR_USE_STB_ZLIB is 0. #ifndef TINYEXR_USE_MINIZ #define TINYEXR_USE_MINIZ (1) -#ifndef MINIZ_HEADER_FILE_ONLY -#define MINIZ_HEADER_FILE_ONLY (1) -#endif #endif // Use the ZIP implementation of stb_image.h and stb_image_write.h. @@ -136,7 +131,11 @@ extern "C" { #ifndef TINYEXR_USE_THREAD #define TINYEXR_USE_THREAD (0) // No threaded loading. -// http://computation.llnl.gov/projects/floating-point-compression +#else +// When using threading a reduced custom upperbound can be specified by setting TINYEXR_MAX_THREADS +#ifndef TINYEXR_MAX_THREADS // if not defined define it as 0 meaning upper limit is taken from hardware_concurrency() +#define TINYEXR_MAX_THREADS (0) +#endif #endif #ifndef TINYEXR_USE_OPENMP @@ -147,6 +146,41 @@ extern "C" { #endif #endif +#ifndef TINYEXR_USE_COMPILER_FP16 +#define TINYEXR_USE_COMPILER_FP16 (0) +#endif + +#if TINYEXR_USE_COMPILER_FP16 +#ifndef _MSC_VER +#if defined( __GNUC__ ) || defined( __clang__ ) +#if defined( __SSE2__ ) +#if ( __GNUC__ > 11 ) || ( __clang_major__ > 14 ) +#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__ +#define __STDC_WANT_IEC_60559_TYPES_EXT__ +#endif +#include +#include +#define TINYEXR_FP16_COMPILER_TYPE _Float16 +#endif +#endif +#if defined( __ARM_NEON__ ) || defined( __ARM_NEON ) +#define TINYEXR_FP16_COMPILER_TYPE __fp16 +#endif +#endif +#else +#if (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__) +#include +#define TINYEXR_FP16_COMPILER_TYPE uint16_t +#endif +#endif +#endif + +#ifdef TINYEXR_FP16_COMPILER_TYPE +#define TINYEXR_HAS_FP16_COMPILER_TYPE (1) +#else +#define TINYEXR_HAS_FP16_COMPILER_TYPE (0) +#endif + #define TINYEXR_SUCCESS (0) #define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1) #define TINYEXR_ERROR_INVALID_EXR_VERSION (-2) @@ -178,6 +212,11 @@ extern "C" { #define TINYEXR_COMPRESSIONTYPE_ZIPS (2) #define TINYEXR_COMPRESSIONTYPE_ZIP (3) #define TINYEXR_COMPRESSIONTYPE_PIZ (4) +#define TINYEXR_COMPRESSIONTYPE_PXR24 (5) +#define TINYEXR_COMPRESSIONTYPE_B44 (6) +#define TINYEXR_COMPRESSIONTYPE_B44A (7) +#define TINYEXR_COMPRESSIONTYPE_DWAA (8) // Not yet supported +#define TINYEXR_COMPRESSIONTYPE_DWAB (9) // Not yet supported #define TINYEXR_COMPRESSIONTYPE_ZFP (128) // TinyEXR extension #define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0) @@ -191,6 +230,12 @@ extern "C" { #define TINYEXR_TILE_ROUND_DOWN (0) #define TINYEXR_TILE_ROUND_UP (1) +// Spectral EXR types (based on JCGT 2021 paper and spectral-exr format) +// https://jcgt.org/published/0010/03/01/ +#define TINYEXR_SPECTRUM_REFLECTIVE (0) // T.{wavelength}nm channels +#define TINYEXR_SPECTRUM_EMISSIVE (1) // S0.{wavelength}nm channels +#define TINYEXR_SPECTRUM_POLARISED (2) // S0-S3.{wavelength}nm channels + typedef struct TEXRVersion { int version; // this must be 2 // tile format image; @@ -346,7 +391,7 @@ extern int LoadEXR(float **out_rgba, int *width, int *height, // the function will return `TINYEXR_ERROR_LAYER_NOT_FOUND`. extern int LoadEXRWithLayer(float **out_rgba, int *width, int *height, const char *filename, const char *layer_name, - const char **err, int *num_chans = NULL); + const char **err); // // Get layer infos from EXR file. @@ -391,7 +436,7 @@ extern int IsEXRFromMemory(const unsigned char *memory, size_t size); // error extern int SaveEXRToMemory(const float *data, const int width, const int height, const int components, const int save_as_fp16, - const unsigned char **buffer, const char **err); + unsigned char **buffer, const char **err); // @deprecated { Not recommended, but handy to use. } // Saves single-frame OpenEXR image to a buffer. Assume EXR image contains RGB(A) channels. @@ -599,6 +644,61 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, const unsigned char *memory, size_t size, const char **err); +// Spectral EXR API (based on JCGT 2021 paper and spectral-exr format) +// https://jcgt.org/published/0010/03/01/ +// https://github.com/afichet/spectral-exr + +// Check if an EXR file contains spectral data (has spectralLayoutVersion attribute) +// Returns TINYEXR_SUCCESS if spectral, TINYEXR_ERROR_INVALID_DATA if not +extern int IsSpectralEXR(const char *filename); +extern int IsSpectralEXRFromMemory(const unsigned char *memory, size_t size); + +// Get spectrum type from EXR header (TINYEXR_SPECTRUM_REFLECTIVE/EMISSIVE/POLARISED) +// Returns spectrum type, or -1 if not a spectral EXR +extern int EXRGetSpectrumType(const EXRHeader *exr_header); + +// Format wavelength with European decimal convention (comma as separator) +// Output format: "550,000000" for 550.0nm +// buffer must be at least 32 bytes +extern void EXRFormatWavelength(char *buffer, size_t buffer_size, float wavelength_nm); + +// Create spectral channel name +// For emissive: "S{stokes}.{wavelength}nm" (e.g., "S0.550,000000nm") +// For reflective: "T.{wavelength}nm" (e.g., "T.550,000000nm") +// buffer must be at least 64 bytes +extern void EXRSpectralChannelName(char *buffer, size_t buffer_size, + float wavelength_nm, int stokes_component); +extern void EXRReflectiveChannelName(char *buffer, size_t buffer_size, + float wavelength_nm); + +// Parse wavelength from spectral channel name +// Returns wavelength in nm, or -1.0 if not a valid spectral channel name +extern float EXRParseSpectralChannelWavelength(const char *channel_name); + +// Get Stokes component from channel name (0-3, or -1 if not polarised/invalid) +extern int EXRGetStokesComponent(const char *channel_name); + +// Check if channel name is a spectral channel (S{n}.{wavelength}nm or T.{wavelength}nm) +extern int EXRIsSpectralChannel(const char *channel_name); + +// Get wavelengths from EXR header channels +// Returns number of unique wavelengths found +// wavelengths array must be pre-allocated, max_wavelengths is its size +extern int EXRGetWavelengths(const EXRHeader *exr_header, + float *wavelengths, int max_wavelengths); + +// Helper to add spectral attributes to an EXRHeader +// This sets spectralLayoutVersion and ROOT/units (or emissiveUnits for emissive) +// spectrum_type: TINYEXR_SPECTRUM_REFLECTIVE, EMISSIVE, or POLARISED +// units: unit string (e.g., "W.m^-2.sr^-1.nm^-1" for radiance) +extern int EXRSetSpectralAttributes(EXRHeader *exr_header, + int spectrum_type, + const char *units); + +// Get spectral units from EXR header +// Returns NULL if not found, otherwise pointer to units string (valid until header is freed) +extern const char* EXRGetSpectralUnits(const EXRHeader *exr_header); + #ifdef __cplusplus } #endif @@ -632,6 +732,7 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, #endif #include +#include #include #include #include @@ -644,6 +745,9 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, #include #include +// Include Reader class with error stack for safer memory reading +#include "exr_reader.hh" + // https://stackoverflow.com/questions/5047971/how-do-i-check-for-c11-support #if __cplusplus > 199711L || (defined(_MSC_VER) && _MSC_VER >= 1900) #define TINYEXR_HAS_CXX11 (1) @@ -664,7 +768,7 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, #endif #if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) -#include "../basisu_miniz.h" +#include #else // Issue #46. Please include your own zlib-compatible API header before // including `tinyexr.h` @@ -776,7 +880,7 @@ static void SetWarningMessage(const std::string &msg, const char **warn) { static const int kEXRVersionSize = 8; -static void cpy2(unsigned short *dst_val, const unsigned short *src_val) { +static void inline cpy2(unsigned short *dst_val, const unsigned short *src_val) { unsigned char *dst = reinterpret_cast(dst_val); const unsigned char *src = reinterpret_cast(src_val); @@ -784,7 +888,7 @@ static void cpy2(unsigned short *dst_val, const unsigned short *src_val) { dst[1] = src[1]; } -static void swap2(unsigned short *val) { +static void inline swap2(unsigned short *val) { #if TINYEXR_LITTLE_ENDIAN (void)val; #else @@ -806,7 +910,7 @@ static void swap2(unsigned short *val) { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-function" #endif -static void cpy4(int *dst_val, const int *src_val) { +static void inline cpy4(int *dst_val, const int *src_val) { unsigned char *dst = reinterpret_cast(dst_val); const unsigned char *src = reinterpret_cast(src_val); @@ -816,7 +920,7 @@ static void cpy4(int *dst_val, const int *src_val) { dst[3] = src[3]; } -static void cpy4(unsigned int *dst_val, const unsigned int *src_val) { +static void inline cpy4(unsigned int *dst_val, const unsigned int *src_val) { unsigned char *dst = reinterpret_cast(dst_val); const unsigned char *src = reinterpret_cast(src_val); @@ -826,7 +930,7 @@ static void cpy4(unsigned int *dst_val, const unsigned int *src_val) { dst[3] = src[3]; } -static void cpy4(float *dst_val, const float *src_val) { +static void inline cpy4(float *dst_val, const float *src_val) { unsigned char *dst = reinterpret_cast(dst_val); const unsigned char *src = reinterpret_cast(src_val); @@ -843,7 +947,7 @@ static void cpy4(float *dst_val, const float *src_val) { #pragma GCC diagnostic pop #endif -static void swap4(unsigned int *val) { +static void inline swap4(unsigned int *val) { #if TINYEXR_LITTLE_ENDIAN (void)val; #else @@ -858,7 +962,7 @@ static void swap4(unsigned int *val) { #endif } -static void swap4(int *val) { +static void inline swap4(int *val) { #if TINYEXR_LITTLE_ENDIAN (void)val; #else @@ -873,7 +977,7 @@ static void swap4(int *val) { #endif } -static void swap4(float *val) { +static void inline swap4(float *val) { #if TINYEXR_LITTLE_ENDIAN (void)val; #else @@ -889,7 +993,7 @@ static void swap4(float *val) { } #if 0 -static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) { +static void inline cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) { unsigned char *dst = reinterpret_cast(dst_val); const unsigned char *src = reinterpret_cast(src_val); @@ -904,7 +1008,7 @@ static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 } #endif -static void swap8(tinyexr::tinyexr_uint64 *val) { +static void inline swap8(tinyexr::tinyexr_uint64 *val) { #if TINYEXR_LITTLE_ENDIAN (void)val; #else @@ -924,6 +1028,11 @@ static void swap8(tinyexr::tinyexr_uint64 *val) { } // https://gist.github.com/rygorous/2156668 +#if TINYEXR_HAS_FP16_COMPILER_TYPE && (TINYEXR_USE_COMPILER_FP16 > 0) +union FP32 { + float f; +}; +#else union FP32 { unsigned int u; float f; @@ -939,12 +1048,21 @@ union FP32 { #endif } s; }; +#endif #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wpadded" #endif +#if TINYEXR_HAS_FP16_COMPILER_TYPE && (TINYEXR_USE_COMPILER_FP16 > 0) +union FP16 { + TINYEXR_FP16_COMPILER_TYPE f; + unsigned short u; +}; + +#else + union FP16 { unsigned short u; struct { @@ -959,11 +1077,32 @@ union FP16 { #endif } s; }; +#endif #ifdef __clang__ #pragma clang diagnostic pop #endif +#if TINYEXR_HAS_FP16_COMPILER_TYPE && (TINYEXR_USE_COMPILER_FP16 > 0) +static inline FP32 half_to_float(FP16 h) { + FP32 o; +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__) + o.f =_mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(static_cast (h.u)))); +#else + o.f = static_cast (h.f); +#endif + return o; +} +static inline FP16 float_to_half_full(FP32 f) { + FP16 o; +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__) + o.f = static_cast (_mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f.f), _MM_FROUND_CUR_DIRECTION))); +#else + o.f = static_cast (f.f); +#endif + return o; +} +#else static FP32 half_to_float(FP16 h) { static const FP32 magic = {113 << 23}; static const unsigned int shifted_exp = 0x7c00 @@ -1023,7 +1162,7 @@ static FP16 float_to_half_full(FP32 f) { o.s.Sign = f.s.Sign; return o; } - +#endif // NOTE: From OpenEXR code // #define IMF_INCREASING_Y 0 // #define IMF_DECREASING_Y 1 @@ -1367,11 +1506,11 @@ static bool CompressZip(unsigned char *dst, // Compress the data using miniz // - buminiz::mz_ulong outSize = buminiz::mz_compressBound(src_size); - int ret = buminiz::mz_compress( + mz_ulong outSize = mz_compressBound(src_size); + int ret = mz_compress( dst, &outSize, static_cast(&tmpBuf.at(0)), src_size); - if (ret != buminiz::MZ_OK) { + if (ret != MZ_OK) { return false; } @@ -1396,7 +1535,7 @@ static bool CompressZip(unsigned char *dst, memcpy(dst, ret, outSize); free(ret); - + compressedSize = outSize; #else uLong outSize = compressBound(static_cast(src_size)); @@ -1431,8 +1570,8 @@ static bool DecompressZip(unsigned char *dst, #if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) int ret = - buminiz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); - if (buminiz::MZ_OK != ret) { + mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); + if (MZ_OK != ret) { return false; } #elif TINYEXR_USE_STB_ZLIB @@ -2144,7 +2283,7 @@ inline void outputBits(int nBits, long long bits, long long &c, int &lc, inline long long getBits(int nBits, long long &c, int &lc, const char *&in) { while (lc < nBits) { - c = (long long)((unsigned long long)c << 8) | *(reinterpret_cast(in++)); + c = (c << 8) | *(reinterpret_cast(in++)); lc += 8; } @@ -2720,7 +2859,7 @@ static int hufEncode // return: output size (in bits) #define getChar(c, lc, in) \ { \ - c = ((unsigned long long)c << 8) | *(unsigned char *)(in++); \ + c = (c << 8) | *(unsigned char *)(in++); \ lc += 8; \ } @@ -2882,7 +3021,7 @@ static bool hufDecode(const long long *hcode, // i : encoding table lc -= i; while (lc > 0) { - const HufDec pl = hdecod[((unsigned long long)c << (HUF_DECBITS - lc)) & HUF_DECMASK]; + const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK]; if (pl.len) { lc -= pl.len; @@ -3357,6 +3496,1112 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, } #endif // TINYEXR_USE_PIZ +// ============================================================================ +// PXR24 decompression +// ============================================================================ + +// PXR24 stores 32-bit floats as 24-bit values (truncates 8 mantissa bits) +// HALF and UINT are stored without modification +static bool DecompressPxr24(unsigned char *outPtr, size_t outBufSize, + const unsigned char *inPtr, size_t inLen, + int data_width, int num_lines, + size_t num_channels, + const EXRChannelInfo *channels) { + // Calculate the PXR24 data size after zlib decompression + // PXR24 stores HALF as 2 bytes, UINT as 4 bytes, FLOAT as 3 bytes + // Data is stored with byte plane separation and delta encoding + size_t pxr24_size = 0; + for (size_t c = 0; c < num_channels; c++) { + int ch_width = data_width; // V1 doesn't handle subsampling in decompression + int ch_pixels = ch_width * num_lines; + + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + pxr24_size += static_cast(ch_pixels) * 4; + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + pxr24_size += static_cast(ch_pixels) * 2; + } else { // FLOAT + pxr24_size += static_cast(ch_pixels) * 3; + } + } + + // Allocate buffer for zlib-decompressed PXR24 data + std::vector pxr24_buf(pxr24_size); + size_t uncomp_size = pxr24_size; + + // PXR24 uses raw zlib compression + if (pxr24_size == inLen) { + // Uncompressed - copy directly + memcpy(pxr24_buf.data(), inPtr, inLen); + } else { +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) + mz_ulong dest_len = static_cast(pxr24_size); + int ret = mz_uncompress(pxr24_buf.data(), &dest_len, inPtr, static_cast(inLen)); + if (ret != MZ_OK) { + return false; + } + uncomp_size = static_cast(dest_len); +#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1) + int outLen = stbi_zlib_decode_buffer(reinterpret_cast(pxr24_buf.data()), + static_cast(pxr24_size), reinterpret_cast(inPtr), static_cast(inLen)); + if (outLen < 0) { + return false; + } + uncomp_size = static_cast(outLen); +#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1) + uint64_t outLen = 0; + nanoz_status_t ret = nanoz_uncompress(inPtr, inLen, pxr24_size, pxr24_buf.data(), &outLen); + if (ret != NANOZ_SUCCESS) { + return false; + } + uncomp_size = static_cast(outLen); +#else + uLongf dest_len = static_cast(pxr24_size); + int ret = uncompress(pxr24_buf.data(), &dest_len, inPtr, static_cast(inLen)); + if (ret != Z_OK) { + return false; + } + uncomp_size = static_cast(dest_len); +#endif + } + + if (uncomp_size != pxr24_size) { + return false; + } + + // Convert PXR24 format to standard EXR format + // PXR24 uses: + // 1. Byte plane separation: bytes are stored by plane (all high bytes, then next-high, etc.) + // 2. Delta encoding: each pixel is stored as difference from previous pixel + const unsigned char* in_p = pxr24_buf.data(); + unsigned char* out_p = outPtr; + + for (int line = 0; line < num_lines; line++) { + for (size_t c = 0; c < num_channels; c++) { + int w = data_width; + + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + // UINT: 4 byte planes with delta encoding + const unsigned char* ptr0 = in_p; + const unsigned char* ptr1 = in_p + w; + const unsigned char* ptr2 = in_p + w * 2; + const unsigned char* ptr3 = in_p + w * 3; + in_p += w * 4; + + unsigned int pixel = 0; + for (int x = 0; x < w; x++) { + unsigned int diff = (static_cast(ptr0[x]) << 24) | + (static_cast(ptr1[x]) << 16) | + (static_cast(ptr2[x]) << 8) | + (static_cast(ptr3[x])); + pixel += diff; + memcpy(out_p, &pixel, 4); + out_p += 4; + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + // HALF: 2 byte planes with delta encoding + const unsigned char* ptr0 = in_p; + const unsigned char* ptr1 = in_p + w; + in_p += w * 2; + + unsigned int pixel = 0; + for (int x = 0; x < w; x++) { + unsigned int diff = (static_cast(ptr0[x]) << 8) | + (static_cast(ptr1[x])); + pixel += diff; + unsigned short h = static_cast(pixel); + memcpy(out_p, &h, 2); + out_p += 2; + } + } else { // FLOAT + // FLOAT: 3 byte planes with delta encoding, expand to 32-bit + const unsigned char* ptr0 = in_p; + const unsigned char* ptr1 = in_p + w; + const unsigned char* ptr2 = in_p + w * 2; + in_p += w * 3; + + unsigned int pixel = 0; + for (int x = 0; x < w; x++) { + // PXR24 stores 24-bit floats with delta encoding + // The diff is in the upper 24 bits + unsigned int diff = (static_cast(ptr0[x]) << 24) | + (static_cast(ptr1[x]) << 16) | + (static_cast(ptr2[x]) << 8); + pixel += diff; + memcpy(out_p, &pixel, 4); + out_p += 4; + } + } + } + } + + return true; +} + +// ============================================================================ +// B44/B44A decompression +// ============================================================================ + +// B44 compresses 4x4 blocks of HALF values to 14 bytes +// B44A can compress flat regions to 3 bytes + +// B44 lookup tables +// expTable: converts half-float value to exp(half/8) for p_linear channels +// logTable: converts half-float value to 8*log(half) for p_linear channels +// Note: These tables are primarily for luminance channels with p_linear attribute +static unsigned short g_b44_exp_table[65536]; +static unsigned short g_b44_log_table[65536]; +static bool g_b44_tables_initialized = false; + +// Half-float conversion helpers for B44 table initialization +static inline float B44HalfToFloat(unsigned short h) { + union { unsigned int i; float f; } u; + + int s = (h >> 15) & 0x1; + int e = (h >> 10) & 0x1f; + int m = h & 0x3ff; + + if (e == 0) { + if (m == 0) { + // Zero + u.i = s << 31; + return u.f; + } + // Denormal + float f = (float)m / 1024.0f; + f = f * (1.0f / 16384.0f); // 2^-14 + return s ? -f : f; + } else if (e == 31) { + // Inf or NaN + u.i = (s << 31) | 0x7f800000 | (m << 13); + return u.f; + } + + // Normal + u.i = (s << 31) | ((e + 112) << 23) | (m << 13); + return u.f; +} + +static inline unsigned short B44FloatToHalf(float f) { + union { unsigned int i; float f; } u; + u.f = f; + + int s = (u.i >> 31) & 0x1; + int e = (u.i >> 23) & 0xff; + int m = u.i & 0x7fffff; + + if (e == 0) { + return static_cast(s << 15); // Zero + } else if (e == 255) { + // Inf or NaN + return static_cast((s << 15) | 0x7c00 | (m >> 13)); + } else if (e < 113) { + // Too small - denormal or zero + if (e < 103) return static_cast(s << 15); + m = (m | 0x800000) >> (114 - e); + return static_cast((s << 15) | (m >> 13)); + } else if (e > 142) { + // Too large - infinity + return static_cast((s << 15) | 0x7c00); + } + + return static_cast((s << 15) | ((e - 112) << 10) | (m >> 13)); +} + +// Initialize B44 exp/log lookup tables (matches OpenEXR algorithm) +static void InitB44Tables() { + if (g_b44_tables_initialized) return; + + // Generate tables per OpenEXR's b44_table_init.c + for (int i = 0; i < 65536; i++) { + unsigned short x = static_cast(i); + + // expTable: convertFromLinear - exp(half / 8) + if ((x & 0x7c00) == 0x7c00) { + // infinity/nan -> 0 + g_b44_exp_table[i] = 0; + } else if (x >= 0x558c && x < 0x8000) { + // >= 8 * log(HALF_MAX) -> HALF_MAX + g_b44_exp_table[i] = 0x7bff; + } else { + float f = B44HalfToFloat(x); + f = static_cast(std::exp(static_cast(f) / 8.0)); + g_b44_exp_table[i] = B44FloatToHalf(f); + } + + // logTable: convertToLinear - 8 * log(half) + if ((x & 0x7c00) == 0x7c00) { + // infinity/nan -> 0 + g_b44_log_table[i] = 0; + } else if (x > 0x8000) { + // negative (excluding -0.0) -> 0 + g_b44_log_table[i] = 0; + } else { + float f = B44HalfToFloat(x); + if (f <= 0.0f) { + g_b44_log_table[i] = 0; + } else { + f = static_cast(8.0 * std::log(static_cast(f))); + g_b44_log_table[i] = B44FloatToHalf(f); + } + } + } + + g_b44_tables_initialized = true; +} + +// Convert half to linear-log space (for p_linear channels) +static inline unsigned short B44ConvertFromLinear(unsigned short h) { + return g_b44_exp_table[h]; +} + +// Convert linear-log back to half (for p_linear channels) +static inline unsigned short B44ConvertToLinear(unsigned short h) { + return g_b44_log_table[h]; +} + +// Unpack one 4x4 block from B44 compressed 14 bytes (matches OpenEXR unpack14) +static void UnpackB44Block(unsigned short dst[16], const unsigned char src[14]) { + // Extract t[0] (stored as ordered-magnitude value) + unsigned short s0 = (static_cast(src[0]) << 8) | src[1]; + + // Extract shift and compute bias + unsigned short shift = src[2] >> 2; + unsigned short bias = static_cast(0x20u << shift); + + // Reconstruct t values using running differences + // Pattern: s[0]->s[4]->s[8]->s[12], then s[0]->s[1], s[4]->s[5], etc. + + unsigned short s4 = static_cast( + static_cast(s0) + + static_cast(((static_cast(src[2]) << 4) | + (static_cast(src[3]) >> 4)) & 0x3fu) * (1u << shift) - bias); + + unsigned short s8 = static_cast( + static_cast(s4) + + static_cast(((static_cast(src[3]) << 2) | + (static_cast(src[4]) >> 6)) & 0x3fu) * (1u << shift) - bias); + + unsigned short s12 = static_cast( + static_cast(s8) + + static_cast(src[4] & 0x3fu) * (1u << shift) - bias); + + unsigned short s1 = static_cast( + static_cast(s0) + + static_cast(src[5] >> 2) * (1u << shift) - bias); + + unsigned short s5 = static_cast( + static_cast(s4) + + static_cast(((static_cast(src[5]) << 4) | + (static_cast(src[6]) >> 4)) & 0x3fu) * (1u << shift) - bias); + + unsigned short s9 = static_cast( + static_cast(s8) + + static_cast(((static_cast(src[6]) << 2) | + (static_cast(src[7]) >> 6)) & 0x3fu) * (1u << shift) - bias); + + unsigned short s13 = static_cast( + static_cast(s12) + + static_cast(src[7] & 0x3fu) * (1u << shift) - bias); + + unsigned short s2 = static_cast( + static_cast(s1) + + static_cast(src[8] >> 2) * (1u << shift) - bias); + + unsigned short s6 = static_cast( + static_cast(s5) + + static_cast(((static_cast(src[8]) << 4) | + (static_cast(src[9]) >> 4)) & 0x3fu) * (1u << shift) - bias); + + unsigned short s10 = static_cast( + static_cast(s9) + + static_cast(((static_cast(src[9]) << 2) | + (static_cast(src[10]) >> 6)) & 0x3fu) * (1u << shift) - bias); + + unsigned short s14 = static_cast( + static_cast(s13) + + static_cast(src[10] & 0x3fu) * (1u << shift) - bias); + + unsigned short s3 = static_cast( + static_cast(s2) + + static_cast(src[11] >> 2) * (1u << shift) - bias); + + unsigned short s7 = static_cast( + static_cast(s6) + + static_cast(((static_cast(src[11]) << 4) | + (static_cast(src[12]) >> 4)) & 0x3fu) * (1u << shift) - bias); + + unsigned short s11 = static_cast( + static_cast(s10) + + static_cast(((static_cast(src[12]) << 2) | + (static_cast(src[13]) >> 6)) & 0x3fu) * (1u << shift) - bias); + + unsigned short s15 = static_cast( + static_cast(s14) + + static_cast(src[13] & 0x3fu) * (1u << shift) - bias); + + // Store t values + dst[0] = s0; dst[1] = s1; dst[2] = s2; dst[3] = s3; + dst[4] = s4; dst[5] = s5; dst[6] = s6; dst[7] = s7; + dst[8] = s8; dst[9] = s9; dst[10] = s10; dst[11] = s11; + dst[12] = s12; dst[13] = s13; dst[14] = s14; dst[15] = s15; + + // Convert from ordered-magnitude to half-float + for (int i = 0; i < 16; i++) { + if (dst[i] & 0x8000) { + dst[i] &= 0x7fff; // Positive: clear sign bit + } else { + dst[i] = ~dst[i]; // Negative: invert all bits + } + } +} + +// Unpack a 3-byte flat block (all pixels same value) +static void UnpackB44FlatBlock(unsigned short dst[16], const unsigned char src[3]) { + unsigned short t = (static_cast(src[0]) << 8) | src[1]; + + // Convert from ordered-magnitude to half-float + unsigned short h; + if (t & 0x8000) { + h = t & 0x7fff; + } else { + h = ~t; + } + + for (int i = 0; i < 16; i++) { + dst[i] = h; + } +} + +static bool DecompressB44(unsigned char *outPtr, size_t outBufSize, + const unsigned char *inPtr, size_t inLen, + int data_width, int num_lines, + size_t num_channels, + const EXRChannelInfo *channels, + bool is_b44a) { + (void)is_b44a; // Flat block detection doesn't depend on B44/B44A for decoding + InitB44Tables(); + + // Validate that the output buffer is large enough for the decoded data. + // Use overflow-safe arithmetic: overflow in any multiplication or accumulation + // means the size cannot fit in memory, which is definitely > outBufSize. + { + size_t expected_out = 0; + for (size_t c = 0; c < num_channels; c++) { + int xs = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1; + int ys = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1; + size_t cw = static_cast((data_width + xs - 1) / xs); + size_t ch = static_cast((num_lines + ys - 1) / ys); + size_t bpp = (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) ? 2u : 4u; + // Check cw * ch overflow + if (cw != 0 && ch > (SIZE_MAX / cw)) return false; + size_t pixels = cw * ch; + // Check pixels * bpp overflow + if (pixels > (SIZE_MAX / bpp)) return false; + size_t ch_total = pixels * bpp; + // Check accumulation overflow + if (expected_out > SIZE_MAX - ch_total) return false; + expected_out += ch_total; + } + if (expected_out > outBufSize) return false; + } + + const unsigned char* in_p = inPtr; + const unsigned char* in_end = inPtr + inLen; + + // First pass: decompress all channels into scratch buffers. + // For non-HALF channels, save the pointer and byte count so we can copy + // the raw data to the output buffer in the second pass. + std::vector> scratch_buffers(num_channels); + std::vector nonhalf_ptrs(num_channels, nullptr); + std::vector nonhalf_sizes(num_channels, 0); + + for (size_t c = 0; c < num_channels; c++) { + // Compute per-channel dimensions based on sampling + int x_sampling = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1; + int y_sampling = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1; + int ch_width = (data_width + x_sampling - 1) / x_sampling; + int ch_height = (num_lines + y_sampling - 1) / y_sampling; + + // B44 only works with HALF pixel types + if (channels[c].pixel_type != TINYEXR_PIXELTYPE_HALF) { + // For non-HALF channels, data is stored uncompressed; record position + size_t ch_bytes = static_cast(ch_width) * ch_height; + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT || + channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + ch_bytes *= 4; + } else { + ch_bytes *= 2; + } + if (in_p + ch_bytes > in_end) return false; + nonhalf_ptrs[c] = in_p; + nonhalf_sizes[c] = ch_bytes; + in_p += ch_bytes; + continue; + } + + // Calculate block dimensions (rounded up to multiple of 4) + int padded_width = ((ch_width + 3) / 4) * 4; + int padded_height = ((ch_height + 3) / 4) * 4; + int num_blocks_x = padded_width / 4; + int num_blocks_y = padded_height / 4; + + // Allocate scratch buffer for this channel + scratch_buffers[c].resize(static_cast(padded_width) * padded_height); + + // Process blocks + for (int by = 0; by < num_blocks_y; by++) { + for (int bx = 0; bx < num_blocks_x; bx++) { + unsigned short block[16]; + + if (in_p + 3 > in_end) return false; + + // Check for flat block (shift >= 13) + if (in_p[2] >= (13 << 2)) { + // 3-byte flat block + UnpackB44FlatBlock(block, in_p); + in_p += 3; + } else { + // Regular 14-byte block + if (in_p + 14 > in_end) return false; + UnpackB44Block(block, in_p); + in_p += 14; + } + + // Apply p_linear conversion (log table) if needed + if (channels[c].p_linear) { + for (int i = 0; i < 16; i++) { + block[i] = g_b44_log_table[block[i]]; + } + } + + // Store block in scratch buffer + for (int dy = 0; dy < 4; dy++) { + int y = by * 4 + dy; + for (int dx = 0; dx < 4; dx++) { + int x = bx * 4 + dx; + scratch_buffers[c][static_cast(y) * padded_width + x] = block[dy * 4 + dx]; + } + } + } + } + } + + // Second pass: copy from scratch buffers to output in per-channel format. + // Output format: all data for channel 0, then all data for channel 1, etc. + // ch_offset in DecodePixelData is accumulated per preceding channel sizes. + unsigned char* out_p = outPtr; + for (size_t c = 0; c < num_channels; c++) { + int x_sampling = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1; + int y_sampling = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1; + int ch_width = (data_width + x_sampling - 1) / x_sampling; + int ch_height = (num_lines + y_sampling - 1) / y_sampling; + int padded_width = ((ch_width + 3) / 4) * 4; + + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (int y = 0; y < ch_height; y++) { + for (int x = 0; x < ch_width; x++) { + unsigned short val = scratch_buffers[c][static_cast(y) * padded_width + x]; + // Write as little-endian bytes so DecodePixelData's swap2 (LE->host) + // works correctly on both little- and big-endian platforms. + tinyexr::swap2(&val); + memcpy(out_p, &val, sizeof(val)); + out_p += sizeof(val); + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT || + channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + // Non-HALF data is stored uncompressed; copy from saved pointer + if (nonhalf_sizes[c] > 0) { + memcpy(out_p, nonhalf_ptrs[c], nonhalf_sizes[c]); + out_p += nonhalf_sizes[c]; + } + } + } + + return true; +} + +// ============================================================================ +// PXR24 compression (encoding) +// ============================================================================ + +// Convert float32 to float24 (PXR24 format) +static inline unsigned int float_to_float24(float f) { + union { float f; unsigned int i; } u; + u.f = f; + + unsigned int s = u.i & 0x80000000; + unsigned int e = u.i & 0x7f800000; + unsigned int m = u.i & 0x007fffff; + + if (e == 0x7f800000) { + if (m) { + // NaN - preserve sign and 15 leftmost mantissa bits + m >>= 8; + return (s >> 8) | (e >> 8) | m | (m == 0 ? 1 : 0); + } else { + // Infinity + return (s >> 8) | (e >> 8); + } + } + + // Finite - round mantissa to 15 bits + unsigned int i = ((e | m) + (m & 0x00000080)) >> 8; + + if (i >= 0x7f8000) { + // Overflow - truncate instead of round + i = (e | m) >> 8; + } + + return (s >> 8) | i; +} + +static bool CompressPxr24(std::vector& outBuf, + const unsigned char *inPtr, size_t inLen, + int data_width, int num_lines, + size_t num_channels, + const EXRChannelInfo *channels) { + // PXR24 stores HALF as 2 bytes, UINT as 4 bytes, FLOAT as 3 bytes (truncated) + // Data is stored with byte plane separation and delta encoding + size_t pxr24_size = 0; + for (size_t c = 0; c < num_channels; c++) { + int ch_pixels = data_width * num_lines; + + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + pxr24_size += static_cast(ch_pixels) * 4; + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + pxr24_size += static_cast(ch_pixels) * 2; + } else { // FLOAT + pxr24_size += static_cast(ch_pixels) * 3; + } + } + + // Create PXR24 format data + std::vector pxr24_buf(pxr24_size); + const unsigned char* in_p = inPtr; + unsigned char* out_p = pxr24_buf.data(); + + // PXR24 uses: + // 1. Byte plane separation: bytes are stored by plane (all high bytes, then next-high, etc.) + // 2. Delta encoding: each pixel is stored as difference from previous pixel + for (int line = 0; line < num_lines; line++) { + for (size_t c = 0; c < num_channels; c++) { + int w = data_width; + + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + // UINT: 4 byte planes with delta encoding + unsigned char* ptr0 = out_p; + unsigned char* ptr1 = out_p + w; + unsigned char* ptr2 = out_p + w * 2; + unsigned char* ptr3 = out_p + w * 3; + out_p += w * 4; + + unsigned int prevPixel = 0; + for (int x = 0; x < w; x++) { + unsigned int pixel; + memcpy(&pixel, in_p, 4); + in_p += 4; + unsigned int diff = pixel - prevPixel; + prevPixel = pixel; + + ptr0[x] = static_cast(diff >> 24); + ptr1[x] = static_cast(diff >> 16); + ptr2[x] = static_cast(diff >> 8); + ptr3[x] = static_cast(diff); + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + // HALF: 2 byte planes with delta encoding + unsigned char* ptr0 = out_p; + unsigned char* ptr1 = out_p + w; + out_p += w * 2; + + unsigned int prevPixel = 0; + for (int x = 0; x < w; x++) { + unsigned short h; + memcpy(&h, in_p, 2); + in_p += 2; + unsigned int pixel = h; + unsigned int diff = pixel - prevPixel; + prevPixel = pixel; + + ptr0[x] = static_cast(diff >> 8); + ptr1[x] = static_cast(diff); + } + } else { // FLOAT + // FLOAT: convert to 24-bit, then 3 byte planes with delta encoding + unsigned char* ptr0 = out_p; + unsigned char* ptr1 = out_p + w; + unsigned char* ptr2 = out_p + w * 2; + out_p += w * 3; + + unsigned int prevPixel = 0; + for (int x = 0; x < w; x++) { + float f; + memcpy(&f, in_p, 4); + in_p += 4; + unsigned int pixel24 = float_to_float24(f); + unsigned int diff = pixel24 - prevPixel; + prevPixel = pixel24; + + // Store as 24-bit diff (shifted to upper bits for proper reconstruction) + ptr0[x] = static_cast(diff >> 16); + ptr1[x] = static_cast(diff >> 8); + ptr2[x] = static_cast(diff); + } + } + } + } + + // Compress with zlib +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) + mz_ulong dest_len = mz_compressBound(static_cast(pxr24_size)); + outBuf.resize(dest_len); + int ret = mz_compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast(pxr24_size)); + if (ret != MZ_OK) { + return false; + } + outBuf.resize(static_cast(dest_len)); +#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1) + int outSize; + unsigned char* ret = stbi_zlib_compress(pxr24_buf.data(), static_cast(pxr24_size), &outSize, 8); + if (!ret) { + return false; + } + outBuf.assign(ret, ret + outSize); + free(ret); +#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1) + int outSize; + unsigned char* ret = nanoz_compress(pxr24_buf.data(), pxr24_size, &outSize, 8); + if (!ret) { + return false; + } + outBuf.assign(ret, ret + outSize); + free(ret); +#else + uLongf dest_len = compressBound(static_cast(pxr24_size)); + outBuf.resize(static_cast(dest_len)); + int ret = compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast(pxr24_size)); + if (ret != Z_OK) { + return false; + } + outBuf.resize(static_cast(dest_len)); +#endif + + return true; +} + +// Overloaded version that takes ChannelInfo instead of EXRChannelInfo +// Note: Uses requested_pixel_type which is the file format, not the input type +static bool CompressPxr24(std::vector& outBuf, + const unsigned char *inPtr, size_t inLen, + int data_width, int num_lines, + size_t num_channels, + const std::vector& channels) { + // PXR24 stores HALF as 2 bytes, UINT as 4 bytes, FLOAT as 3 bytes (truncated) + // Data is stored with byte plane separation and delta encoding + // Use requested_pixel_type which is the actual format in the file + size_t pxr24_size = 0; + for (size_t c = 0; c < num_channels; c++) { + int ch_pixels = data_width * num_lines; + int file_type = channels[c].requested_pixel_type; + + if (file_type == TINYEXR_PIXELTYPE_UINT) { + pxr24_size += static_cast(ch_pixels) * 4; + } else if (file_type == TINYEXR_PIXELTYPE_HALF) { + pxr24_size += static_cast(ch_pixels) * 2; + } else { // FLOAT + pxr24_size += static_cast(ch_pixels) * 3; + } + } + + // Create PXR24 format data + std::vector pxr24_buf(pxr24_size); + const unsigned char* in_p = inPtr; + unsigned char* out_p = pxr24_buf.data(); + + // PXR24 uses: + // 1. Byte plane separation: bytes are stored by plane (all high bytes, then next-high, etc.) + // 2. Delta encoding: each pixel is stored as difference from previous pixel + for (int line = 0; line < num_lines; line++) { + for (size_t c = 0; c < num_channels; c++) { + int w = data_width; + int file_type = channels[c].requested_pixel_type; + + if (file_type == TINYEXR_PIXELTYPE_UINT) { + // UINT: 4 byte planes with delta encoding + unsigned char* ptr0 = out_p; + unsigned char* ptr1 = out_p + w; + unsigned char* ptr2 = out_p + w * 2; + unsigned char* ptr3 = out_p + w * 3; + out_p += w * 4; + + unsigned int prevPixel = 0; + for (int x = 0; x < w; x++) { + unsigned int pixel; + memcpy(&pixel, in_p, 4); + in_p += 4; + unsigned int diff = pixel - prevPixel; + prevPixel = pixel; + + ptr0[x] = static_cast(diff >> 24); + ptr1[x] = static_cast(diff >> 16); + ptr2[x] = static_cast(diff >> 8); + ptr3[x] = static_cast(diff); + } + } else if (file_type == TINYEXR_PIXELTYPE_HALF) { + // HALF: 2 byte planes with delta encoding + unsigned char* ptr0 = out_p; + unsigned char* ptr1 = out_p + w; + out_p += w * 2; + + unsigned int prevPixel = 0; + for (int x = 0; x < w; x++) { + unsigned short h; + memcpy(&h, in_p, 2); + in_p += 2; + unsigned int pixel = h; + unsigned int diff = pixel - prevPixel; + prevPixel = pixel; + + ptr0[x] = static_cast(diff >> 8); + ptr1[x] = static_cast(diff); + } + } else { // FLOAT + // FLOAT: convert to 24-bit, then 3 byte planes with delta encoding + unsigned char* ptr0 = out_p; + unsigned char* ptr1 = out_p + w; + unsigned char* ptr2 = out_p + w * 2; + out_p += w * 3; + + unsigned int prevPixel = 0; + for (int x = 0; x < w; x++) { + float f; + memcpy(&f, in_p, 4); + in_p += 4; + unsigned int pixel24 = float_to_float24(f); + unsigned int diff = pixel24 - prevPixel; + prevPixel = pixel24; + + // Store as 24-bit diff (shifted to upper bits for proper reconstruction) + ptr0[x] = static_cast(diff >> 16); + ptr1[x] = static_cast(diff >> 8); + ptr2[x] = static_cast(diff); + } + } + } + } + + // Compress with zlib +#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) + mz_ulong dest_len = mz_compressBound(static_cast(pxr24_size)); + outBuf.resize(dest_len); + int ret = mz_compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast(pxr24_size)); + if (ret != MZ_OK) { + return false; + } + outBuf.resize(static_cast(dest_len)); +#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1) + int outSize; + unsigned char* ret = stbi_zlib_compress(pxr24_buf.data(), static_cast(pxr24_size), &outSize, 8); + if (!ret) { + return false; + } + outBuf.assign(ret, ret + outSize); + free(ret); +#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1) + int outSize; + unsigned char* ret = nanoz_compress(pxr24_buf.data(), pxr24_size, &outSize, 8); + if (!ret) { + return false; + } + outBuf.assign(ret, ret + outSize); + free(ret); +#else + uLongf dest_len = compressBound(static_cast(pxr24_size)); + outBuf.resize(static_cast(dest_len)); + int ret = compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast(pxr24_size)); + if (ret != Z_OK) { + return false; + } + outBuf.resize(static_cast(dest_len)); +#endif + + return true; +} + +// ============================================================================ +// B44/B44A compression (encoding) +// ============================================================================ + +// Shift and round for B44 pack (matches OpenEXR's shiftAndRound) +static inline int B44ShiftAndRound(int x, int shift) { + // Compute y = x * pow(2, -shift), rounded to nearest integer + // In case of a tie, round to the even one + x <<= 1; + int a = (1 << shift) - 1; + shift += 1; + int b = (x >> shift) & 1; + return (x + a + b) >> shift; +} + +// Pack a 4x4 block of HALF values into 14 bytes (matches OpenEXR's pack()) +// Returns the number of bytes written (14 for normal, 3 for flat if flatfields=true) +static int PackB44Block(unsigned char* out, const unsigned short* block, bool flatfields, bool exactmax) { + int d[16]; + int r[15]; + int rMin, rMax; + unsigned short t[16]; + unsigned short tMax; + int shift = -1; + + const int bias = 0x20; + + // Convert half-float values to ordered-magnitude representation + // This ensures that if t[i] > t[j], then half[i] > half[j] as floats + for (int i = 0; i < 16; ++i) { + if ((block[i] & 0x7c00) == 0x7c00) { + t[i] = 0x8000; // NaN/Inf -> neutral value + } else if (block[i] & 0x8000) { + t[i] = ~block[i]; // Negative: invert all bits + } else { + t[i] = block[i] | 0x8000; // Positive: set sign bit + } + } + + // Find maximum t value + tMax = 0; + for (int i = 0; i < 16; ++i) { + if (tMax < t[i]) tMax = t[i]; + } + + // Compute running differences and find valid shift + do { + shift += 1; + + // Compute absolute differences from tMax, shifted and rounded + for (int i = 0; i < 16; ++i) { + d[i] = B44ShiftAndRound(tMax - t[i], shift); + } + + // Convert to running differences (specific pattern for B44) + r[0] = d[0] - d[4] + bias; + r[1] = d[4] - d[8] + bias; + r[2] = d[8] - d[12] + bias; + + r[3] = d[0] - d[1] + bias; + r[4] = d[4] - d[5] + bias; + r[5] = d[8] - d[9] + bias; + r[6] = d[12] - d[13] + bias; + + r[7] = d[1] - d[2] + bias; + r[8] = d[5] - d[6] + bias; + r[9] = d[9] - d[10] + bias; + r[10] = d[13] - d[14] + bias; + + r[11] = d[2] - d[3] + bias; + r[12] = d[6] - d[7] + bias; + r[13] = d[10] - d[11] + bias; + r[14] = d[14] - d[15] + bias; + + rMin = r[0]; + rMax = r[0]; + for (int i = 1; i < 15; ++i) { + if (rMin > r[i]) rMin = r[i]; + if (rMax < r[i]) rMax = r[i]; + } + } while (rMin < 0 || rMax > 0x3f); + + // Check for flat block (all pixels same value) + if (rMin == bias && rMax == bias && flatfields) { + // Encode as 3 bytes: t[0] and marker 0xfc + out[0] = static_cast(t[0] >> 8); + out[1] = static_cast(t[0]); + out[2] = 0xfc; // Flat block marker (shift >= 13) + return 3; + } + + if (exactmax) { + // Adjust t[0] so the max pixel is represented accurately + t[0] = tMax - static_cast(d[0] << shift); + } + + // Pack t[0], shift, and r[0]..r[14] into 14 bytes + out[0] = static_cast(t[0] >> 8); + out[1] = static_cast(t[0]); + out[2] = static_cast((shift << 2) | (r[0] >> 4)); + out[3] = static_cast((r[0] << 4) | (r[1] >> 2)); + out[4] = static_cast((r[1] << 6) | r[2]); + out[5] = static_cast((r[3] << 2) | (r[4] >> 4)); + out[6] = static_cast((r[4] << 4) | (r[5] >> 2)); + out[7] = static_cast((r[5] << 6) | r[6]); + out[8] = static_cast((r[7] << 2) | (r[8] >> 4)); + out[9] = static_cast((r[8] << 4) | (r[9] >> 2)); + out[10] = static_cast((r[9] << 6) | r[10]); + out[11] = static_cast((r[11] << 2) | (r[12] >> 4)); + out[12] = static_cast((r[12] << 4) | (r[13] >> 2)); + out[13] = static_cast((r[13] << 6) | r[14]); + + return 14; +} + +static bool CompressB44(std::vector& outBuf, + const unsigned char *inPtr, size_t inLen, + int data_width, int num_lines, + size_t num_channels, + const EXRChannelInfo *channels, + bool is_b44a) { + // Calculate number of 4x4 blocks + int num_blocks_x = (data_width + 3) / 4; + int num_blocks_y = (num_lines + 3) / 4; + + // Estimate output size (14 bytes per block per HALF channel) + size_t max_size = 0; + for (size_t c = 0; c < num_channels; c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + max_size += static_cast(num_blocks_x) * num_blocks_y * 14; + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT || + channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + max_size += static_cast(data_width) * num_lines * 4; + } else { + max_size += static_cast(data_width) * num_lines * 2; + } + } + + outBuf.resize(max_size); + unsigned char* out_p = outBuf.data(); + + // Process each channel + size_t in_offset = 0; + for (size_t c = 0; c < num_channels; c++) { + if (channels[c].pixel_type != TINYEXR_PIXELTYPE_HALF) { + // Non-HALF channels are stored uncompressed + size_t ch_bytes = static_cast(data_width) * num_lines; + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT || + channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + ch_bytes *= 4; + } else { + ch_bytes *= 2; + } + memcpy(out_p, inPtr + in_offset, ch_bytes); + out_p += ch_bytes; + in_offset += ch_bytes; + continue; + } + + // Process HALF channel in 4x4 blocks + const unsigned short* ch_ptr = reinterpret_cast(inPtr + in_offset); + + for (int by = 0; by < num_blocks_y; by++) { + for (int bx = 0; bx < num_blocks_x; bx++) { + unsigned short block[16]; + + // Gather block pixels with edge replication for padding + for (int dy = 0; dy < 4; dy++) { + int y = by * 4 + dy; + int src_y = (y >= num_lines) ? (num_lines - 1) : y; + + for (int dx = 0; dx < 4; dx++) { + int x = bx * 4 + dx; + int src_x = (x >= data_width) ? (data_width - 1) : x; + + block[dy * 4 + dx] = ch_ptr[src_y * data_width + src_x]; + } + } + + // Pack block - PackB44Block handles flat block detection internally + // flatfields = is_b44a, exactmax = true (for better accuracy) + int bytes_written = PackB44Block(out_p, block, is_b44a, true); + out_p += bytes_written; + } + } + + in_offset += static_cast(data_width) * num_lines * 2; + } + + // Resize to actual size + outBuf.resize(static_cast(out_p - outBuf.data())); + + return true; +} + +// Overloaded version that takes ChannelInfo instead of EXRChannelInfo +// Note: Uses requested_pixel_type which is the file format, not the input type +static bool CompressB44(std::vector& outBuf, + const unsigned char *inPtr, size_t inLen, + int data_width, int num_lines, + size_t num_channels, + const std::vector& channels, + bool is_b44a) { + // Calculate number of 4x4 blocks + int num_blocks_x = (data_width + 3) / 4; + int num_blocks_y = (num_lines + 3) / 4; + + // Estimate output size - use requested_pixel_type (file format) + size_t max_size = 0; + for (size_t c = 0; c < num_channels; c++) { + int file_type = channels[c].requested_pixel_type; + if (file_type == TINYEXR_PIXELTYPE_HALF) { + max_size += static_cast(num_blocks_x) * num_blocks_y * 14; + } else if (file_type == TINYEXR_PIXELTYPE_UINT || + file_type == TINYEXR_PIXELTYPE_FLOAT) { + max_size += static_cast(data_width) * num_lines * 4; + } else { + max_size += static_cast(data_width) * num_lines * 2; + } + } + + outBuf.resize(max_size); + unsigned char* out_p = outBuf.data(); + + size_t in_offset = 0; + for (size_t c = 0; c < num_channels; c++) { + int file_type = channels[c].requested_pixel_type; + if (file_type != TINYEXR_PIXELTYPE_HALF) { + size_t ch_bytes = static_cast(data_width) * num_lines; + if (file_type == TINYEXR_PIXELTYPE_UINT || + file_type == TINYEXR_PIXELTYPE_FLOAT) { + ch_bytes *= 4; + } else { + ch_bytes *= 2; + } + memcpy(out_p, inPtr + in_offset, ch_bytes); + out_p += ch_bytes; + in_offset += ch_bytes; + continue; + } + + const unsigned short* ch_ptr = reinterpret_cast(inPtr + in_offset); + + for (int by = 0; by < num_blocks_y; by++) { + for (int bx = 0; bx < num_blocks_x; bx++) { + unsigned short block[16]; + + // Gather block pixels with edge replication for padding + for (int dy = 0; dy < 4; dy++) { + int y = by * 4 + dy; + int src_y = (y >= num_lines) ? (num_lines - 1) : y; + + for (int dx = 0; dx < 4; dx++) { + int x = bx * 4 + dx; + int src_x = (x >= data_width) ? (data_width - 1) : x; + + block[dy * 4 + dx] = ch_ptr[src_y * data_width + src_x]; + } + } + + // Pack block - PackB44Block handles flat block detection internally + int bytes_written = PackB44Block(out_p, block, is_b44a, true); + out_p += bytes_written; + } + } + + in_offset += static_cast(data_width) * num_lines * 2; + } + + outBuf.resize(static_cast(out_p - outBuf.data())); + return true; +} + #if TINYEXR_USE_ZFP struct ZFPCompressionParam { @@ -4090,6 +5335,257 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, (void)num_channels; return false; #endif + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) { + // PXR24 compression: Use true PXR24 decompression + // PXR24 truncates FLOAT to 24-bits, HALF/UINT pass through unchanged + std::vector outBuf(static_cast(width) * + static_cast(num_lines) * + pixel_data_size); + + if (!tinyexr::DecompressPxr24( + reinterpret_cast(&outBuf.at(0)), outBuf.size(), + data_ptr, static_cast(data_len), + width, num_lines, static_cast(num_channels), channels)) { + return false; + } + + // Process decompressed data (same as ZIP path) + for (size_t c = 0; c < static_cast(num_channels); c++) { + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(v * static_cast(pixel_data_size) * + static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + tinyexr::FP16 hf; + tinyexr::cpy2(&(hf.u), line_ptr + u); + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false); + + for (size_t v = 0; v < static_cast(num_lines); v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + unsigned int val; + tinyexr::cpy4(&val, line_ptr + u); + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < static_cast(num_lines); v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(v * pixel_data_size * static_cast(width) + + channel_offset_list[c] * static_cast(width))); + for (size_t u = 0; u < static_cast(width); u++) { + float val; + tinyexr::cpy4(&val, line_ptr + u); + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + } + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 || + compression_type == TINYEXR_COMPRESSIONTYPE_B44A) { + // B44/B44A compression: Use true B44 block decompression + // B44 is a lossy block compression for HALF data (4x4 blocks -> 14 bytes) + bool is_b44a = (compression_type == TINYEXR_COMPRESSIONTYPE_B44A); + + // Compute outBuf size matching DecompressB44's output layout: per-channel + // sequential data using subsampled dimensions for each channel. + size_t b44_out_size = 0; + for (size_t c = 0; c < static_cast(num_channels); c++) { + int xs = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1; + int ys = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1; + size_t cw = static_cast((width + xs - 1) / xs); + size_t ch = static_cast((num_lines + ys - 1) / ys); + size_t bpp = (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) ? 2u : 4u; + b44_out_size += cw * ch * bpp; + } + + std::vector outBuf(b44_out_size); + + if (!tinyexr::DecompressB44( + reinterpret_cast(&outBuf.at(0)), outBuf.size(), + data_ptr, static_cast(data_len), + width, num_lines, static_cast(num_channels), channels, + is_b44a)) { + return false; + } + + // Process decompressed data - B44 returns data organized per channel, + // using subsampled dimensions (ch_width/ch_height based on x/y_sampling). + // Accumulate ch_offset based on actual subsampled sizes of preceding + // channels to handle mixed channel types and subsampling correctly. + size_t ch_offset = 0; + for (size_t c = 0; c < static_cast(num_channels); c++) { + int xs = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1; + int ys = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1; + size_t ch_width = static_cast((width + xs - 1) / xs); + size_t ch_height = static_cast((num_lines + ys - 1) / ys); + size_t ch_bytes = (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) ? 2 : 4; + + if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { + for (size_t v = 0; v < ch_height; v++) { + const unsigned short *line_ptr = reinterpret_cast( + &outBuf.at(ch_offset + v * ch_width * 2)); + for (size_t u = 0; u < ch_width; u++) { + tinyexr::FP16 hf; + tinyexr::cpy2(&(hf.u), line_ptr + u); + // B44 stream stores data in little-endian order (same as the + // encoder's buf); reverse the byte swap the encoder applied. + tinyexr::swap2(reinterpret_cast(&hf.u)); + + if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { + unsigned short *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = hf.u; + } else { // HALF -> FLOAT + tinyexr::FP32 f32 = half_to_float(hf); + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = f32.f; + } + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false); + + for (size_t v = 0; v < ch_height; v++) { + const unsigned int *line_ptr = reinterpret_cast( + &outBuf.at(ch_offset + v * ch_width * 4)); + for (size_t u = 0; u < ch_width; u++) { + unsigned int val; + tinyexr::cpy4(&val, line_ptr + u); + tinyexr::swap4(&val); + + unsigned int *image = + reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { + TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false); + for (size_t v = 0; v < ch_height; v++) { + const float *line_ptr = reinterpret_cast( + &outBuf.at(ch_offset + v * ch_width * 4)); + for (size_t u = 0; u < ch_width; u++) { + float val; + tinyexr::cpy4(&val, line_ptr + u); + tinyexr::swap4(reinterpret_cast(&val)); + + float *image = reinterpret_cast(out_images)[c]; + if (line_order == 0) { + image += (static_cast(line_no) + v) * + static_cast(x_stride) + + u; + } else { + image += (static_cast(height) - 1U - + (static_cast(line_no) + v)) * + static_cast(x_stride) + + u; + } + *image = val; + } + } + } else { + return false; + } + ch_offset += ch_width * ch_height * ch_bytes; + } } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { for (size_t c = 0; c < num_channels; c++) { for (size_t v = 0; v < static_cast(num_lines); v++) { @@ -4217,7 +5713,7 @@ static bool DecodePixelData(/* out */ unsigned char **out_images, static bool DecodeTiledPixelData( unsigned char **out_images, int *width, int *height, const int *requested_pixel_types, const unsigned char *data_ptr, - size_t data_len, int compression_type, int line_order, int data_width, + size_t data_len, int compression_type, int data_width, int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x, int tile_size_y, size_t pixel_data_size, size_t num_attributes, const EXRAttribute *attributes, size_t num_channels, @@ -4243,8 +5739,9 @@ static bool DecodeTiledPixelData( } // Image size = tile size. + // Line order within tiles is always increasing. return DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len, - compression_type, line_order, (*width), tile_size_y, + compression_type, /* line_order*/ 0, (*width), tile_size_y, /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0, (*height), pixel_data_size, num_attributes, attributes, num_channels, channels, channel_offset_list); @@ -4514,6 +6011,13 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, #endif } + // PXR24, B44, B44A compression types + if (data[0] == TINYEXR_COMPRESSIONTYPE_PXR24 || + data[0] == TINYEXR_COMPRESSIONTYPE_B44 || + data[0] == TINYEXR_COMPRESSIONTYPE_B44A) { + ok = true; + } + if (!ok) { if (err) { (*err) = "Unknown compression type."; @@ -4866,7 +6370,7 @@ static int LevelIndex(int lx, int ly, int tile_level_mode, int num_x_levels) { default: return -1; } -// return 0; + return 0; } static int LevelSize(int toplevel_size, int level, int tile_rounding_mode) { @@ -4935,10 +6439,12 @@ static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header, std::atomic tile_count(0); int num_threads = std::max(1, int(std::thread::hardware_concurrency())); +#if (TINYEXR_MAX_THREADS > 0) + num_threads = std::min(num_threads,TINYEXR_MAX_THREADS); +#endif if (num_threads > int(num_tiles)) { num_threads = int(num_tiles); } - for (int t = 0; t < num_threads; t++) { workers.emplace_back(std::thread([&]() { @@ -5017,7 +6523,6 @@ static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header, &(exr_image->tiles[tile_idx].height), exr_header->requested_pixel_types, data_ptr, static_cast(data_len), exr_header->compression_type, - exr_header->line_order, exr_image->width, exr_image->height, tile_coordinates[0], tile_coordinates[1], exr_header->tile_size_x, exr_header->tile_size_y, static_cast(pixel_data_size), @@ -5078,16 +6583,23 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, num_scanline_blocks = 32; } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44 || + exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44A) { + num_scanline_blocks = 32; + } #if TINYEXR_USE_ZFP + if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { tinyexr::ZFPCompressionParam zfp_compression_param; if (!FindZFPCompressionParam(&zfp_compression_param, exr_header->custom_attributes, int(exr_header->num_custom_attributes), err)) { return TINYEXR_ERROR_INVALID_HEADER; } -#endif } +#endif if (exr_header->data_window.max_x < exr_header->data_window.min_x || exr_header->data_window.max_y < exr_header->data_window.min_y) { @@ -5291,10 +6803,12 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, std::atomic y_count(0); int num_threads = std::max(1, int(std::thread::hardware_concurrency())); +#if (TINYEXR_MAX_THREADS > 0) + num_threads = std::min(num_threads,TINYEXR_MAX_THREADS); +#endif if (num_threads > int(num_blocks)) { num_threads = int(num_blocks); } - for (int t = 0; t < num_threads; t++) { workers.emplace_back(std::thread([&]() { int y = 0; @@ -5369,10 +6883,11 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, if (line_no < 0) { invalid_data = true; } else { + // Line order is increasing because we read in line offset table order. if (!tinyexr::DecodePixelData( exr_image->images, exr_header->requested_pixel_types, data_ptr, static_cast(data_len), - exr_header->compression_type, exr_header->line_order, + exr_header->compression_type, /* line_order*/ 0, int(data_width), int(data_height), int(data_width), y, line_no, num_lines, static_cast(pixel_data_size), static_cast( @@ -5860,7 +7375,7 @@ static bool ReconstructTileOffsets(OffsetData& offset_data, if (size_t(tileX) >= offset_data.offsets[size_t(level_idx)][size_t(tileY)].size()) { return false; } - + offset_data.offsets[size_t(level_idx)][size_t(tileY)][size_t(tileX)] = tileOffset; } } @@ -5914,6 +7429,11 @@ static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header, num_scanline_blocks = 32; } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) { + num_scanline_blocks = 16; + } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44 || + exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44A) { + num_scanline_blocks = 32; } if (exr_header->data_window.max_x < exr_header->data_window.min_x || @@ -6187,18 +7707,15 @@ int EXRLayers(const char *filename, const char **layer_names[], int *num_layers, } int LoadEXR(float **out_rgba, int *width, int *height, const char *filename, - const char **err, int *num_chans) { + const char **err) { return LoadEXRWithLayer(out_rgba, width, height, filename, - /* layername */ NULL, err, num_chans); + /* layername */ NULL, err); } int LoadEXRWithLayer(float **out_rgba, int *width, int *height, const char *filename, const char *layername, - const char **err, int *num_chans) { - if (num_chans) - *num_chans = 0; - - if (out_rgba == NULL) { + const char **err) { + if (out_rgba == NULL) { tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err); return TINYEXR_ERROR_INVALID_ARGUMENT; } @@ -6293,9 +7810,6 @@ int LoadEXRWithLayer(float **out_rgba, int *width, int *height, } if (channels.size() == 1) { - if (num_chans) - *num_chans = 1; - int chIdx = int(channels.front().index); // Grayscale channel only. @@ -6374,9 +7888,6 @@ int LoadEXRWithLayer(float **out_rgba, int *width, int *height, return TINYEXR_ERROR_INVALID_DATA; } - if (num_chans) - *num_chans = (idxA != -1) ? 4 : 3; - (*out_rgba) = reinterpret_cast( malloc(4 * sizeof(float) * static_cast(exr_image.width) * static_cast(exr_image.height))); @@ -6555,6 +8066,7 @@ int LoadEXRFromMemory(float **out_rgba, int *width, int *height, ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err); if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); return ret; } @@ -6568,6 +8080,8 @@ int LoadEXRFromMemory(float **out_rgba, int *width, int *height, InitEXRImage(&exr_image); ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err); if (ret != TINYEXR_SUCCESS) { + FreeEXRHeader(&exr_header); + FreeEXRImage(&exr_image); return ret; } @@ -6871,7 +8385,7 @@ struct MemoryMappedFile { if (read_bytes != size) { // TODO: Try to read data until reading `size` bytes. fclose(fp); - size = 0; + size = 0; data = nullptr; return; } @@ -7133,7 +8647,7 @@ static bool EncodePixelData(/* out */ std::vector& out_data, } else if ((compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { #if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1) - std::vector block(buminiz::mz_compressBound( + std::vector block(mz_compressBound( static_cast(buf.size()))); #elif TINYEXR_USE_STB_ZLIB // there is no compressBound() function, so we use a value that @@ -7238,6 +8752,70 @@ static bool EncodePixelData(/* out */ std::vector& out_data, (void)compression_param; return false; #endif + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) { + // PXR24 compression: True PXR24 (truncates FLOAT to 24-bits + zlib) + std::vector block; + + if (!tinyexr::CompressPxr24(block, + reinterpret_cast(&buf.at(0)), + buf.size(), width, num_lines, + channels.size(), channels)) { + if (err) { + (*err) += "PXR24 compression failed.\n"; + } + return false; + } + + out_data.insert(out_data.end(), block.begin(), block.end()); + + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 || + compression_type == TINYEXR_COMPRESSIONTYPE_B44A) { + // B44/B44A compression: True B44 block compression for HALF data + bool is_b44a = (compression_type == TINYEXR_COMPRESSIONTYPE_B44A); + std::vector block; + + // CompressB44 expects per-channel sequential layout, but buf is + // scanline-interleaved: within each row, channels are stored contiguously + // (channel_offset_list[c] * width bytes into the row), and rows are + // stacked. Convert to per-channel sequential before compressing. + std::vector seq_buf(buf_size); + unsigned char *seq_p = seq_buf.data(); + for (size_t c = 0; c < channels.size(); c++) { + int file_type = channels[c].requested_pixel_type; + // HALF is 2 bytes; FLOAT and UINT are both 4 bytes + size_t ch_size = (file_type == TINYEXR_PIXELTYPE_HALF) ? 2 : 4; + for (int y = 0; y < num_lines; y++) { + const unsigned char *src = + &buf[y * pixel_data_size * width + channel_offset_list[c] * width]; + size_t row_bytes = static_cast(width) * ch_size; + memcpy(seq_p, src, row_bytes); +#if !TINYEXR_LITTLE_ENDIAN + // buf has already been byte-swapped to little-endian for file output. + // CompressB44 reads HALF values as host-endian unsigned shorts, so + // un-swap the bytes back to host-endian for correct B44 encoding. + if (file_type == TINYEXR_PIXELTYPE_HALF) { + unsigned short *p = reinterpret_cast(seq_p); + for (int x = 0; x < width; x++) { + tinyexr::swap2(p + x); + } + } +#endif + seq_p += row_bytes; + } + } + + if (!tinyexr::CompressB44(block, + reinterpret_cast(seq_buf.data()), + seq_buf.size(), width, num_lines, + channels.size(), channels, is_b44a)) { + if (err) { + (*err) += "B44 compression failed.\n"; + } + return false; + } + + out_data.insert(out_data.end(), block.begin(), block.end()); + } else { return false; } @@ -7282,6 +8860,9 @@ static int EncodeTiledLevel(const EXRImage* level_image, const EXRHeader* exr_he std::atomic tile_count(0); int num_threads = std::max(1, int(std::thread::hardware_concurrency())); +#if (TINYEXR_MAX_THREADS > 0) + num_threads = std::min(num_threads,TINYEXR_MAX_THREADS); +#endif if (num_threads > int(num_tiles)) { num_threads = int(num_tiles); } @@ -7378,6 +8959,11 @@ static int NumScanlines(int compression_type) { num_scanlines = 32; } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { num_scanlines = 16; + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) { + num_scanlines = 16; // PXR24 uses 16 scanlines per block (same as ZIP) + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 || + compression_type == TINYEXR_COMPRESSIONTYPE_B44A) { + num_scanlines = 32; // B44/B44A uses 32 scanlines per block } return num_scanlines; } @@ -7531,7 +9117,9 @@ static int EncodeChunk(const EXRImage* exr_image, const EXRHeader* exr_header, std::atomic block_count(0); int num_threads = std::min(std::max(1, int(std::thread::hardware_concurrency())), num_blocks); - +#if (TINYEXR_MAX_THREADS > 0) + num_threads = std::min(num_threads,TINYEXR_MAX_THREADS); +#endif for (int t = 0; t < num_threads; t++) { workers.emplace_back(std::thread([&]() { int i = 0; @@ -7708,7 +9296,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, if (!PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i])) { SetErrorMessage("Failed to precalculate Tile info", err); - return (size_t)TINYEXR_ERROR_INVALID_DATA; + return TINYEXR_ERROR_INVALID_DATA; } int ntiles = InitTileOffsets(offset_data[i], exr_headers[i], num_x_tiles, num_y_tiles); if (ntiles > 0) { @@ -7716,8 +9304,8 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, } else { SetErrorMessage("Failed to compute Tile offsets", err); - return (size_t)TINYEXR_ERROR_INVALID_DATA; - + return TINYEXR_ERROR_INVALID_DATA; + } total_chunk_count += chunk_count[i]; } @@ -7918,7 +9506,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, // Allocating required memory if (total_size == 0) { // something went wrong tinyexr::SetErrorMessage("Output memory size is zero", err); - return (size_t)TINYEXR_ERROR_INVALID_DATA; + return TINYEXR_ERROR_INVALID_DATA; } (*memory_out) = static_cast(malloc(size_t(total_size))); @@ -7939,7 +9527,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, sum += num_bytes; if (sum > total_size) { tinyexr::SetErrorMessage("Invalid offset bytes in Tiled Part image.", err); - return (size_t)TINYEXR_ERROR_INVALID_DATA; + return TINYEXR_ERROR_INVALID_DATA; } memcpy(memory_ptr, @@ -7954,7 +9542,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, sum += num_bytes; if (sum > total_size) { tinyexr::SetErrorMessage("Invalid offset bytes in Part image.", err); - return (size_t)TINYEXR_ERROR_INVALID_DATA; + return TINYEXR_ERROR_INVALID_DATA; } std::vector& offsets = offset_data[i].offsets[0][0]; memcpy(memory_ptr, reinterpret_cast(&offsets[0]), num_bytes); @@ -7969,7 +9557,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, sum += 4; if (sum > total_size) { tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err); - return (size_t)TINYEXR_ERROR_INVALID_DATA; + return TINYEXR_ERROR_INVALID_DATA; } unsigned int part_number = i; swap4(&part_number); @@ -7979,7 +9567,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, sum += data_lists[i][j].size(); if (sum > total_size) { tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err); - return (size_t)TINYEXR_ERROR_INVALID_DATA; + return TINYEXR_ERROR_INVALID_DATA; } memcpy(memory_ptr, &data_lists[i][j][0], data_lists[i][j].size()); memory_ptr += data_lists[i][j].size(); @@ -7988,7 +9576,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images, if (sum != total_size) { tinyexr::SetErrorMessage("Corrupted Part image chunk data.", err); - return (size_t)TINYEXR_ERROR_INVALID_DATA; + return TINYEXR_ERROR_INVALID_DATA; } return size_t(total_size); // OK @@ -8080,7 +9668,7 @@ size_t SaveEXRMultipartImageToMemory(const EXRImage* exr_images, const EXRHeader** exr_headers, unsigned int num_parts, unsigned char** memory_out, const char** err) { - if (exr_images == NULL || exr_headers == NULL || num_parts < 2 || + if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || memory_out == NULL) { tinyexr::SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory", err); @@ -8094,7 +9682,7 @@ int SaveEXRMultipartImageToFile(const EXRImage* exr_images, unsigned int num_parts, const char* filename, const char** err) { - if (exr_images == NULL || exr_headers == NULL || num_parts < 2) { + if (exr_images == NULL || exr_headers == NULL || num_parts == 0) { tinyexr::SetErrorMessage("Invalid argument for SaveEXRMultipartImageToFile", err); return TINYEXR_ERROR_INVALID_ARGUMENT; @@ -8226,7 +9814,7 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { if (attr_name.compare("compression") == 0) { compression_type = data[0]; - if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) { + if (compression_type > TINYEXR_COMPRESSIONTYPE_B44A) { std::stringstream ss; ss << "Unsupported compression type : " << compression_type; tinyexr::SetErrorMessage(ss.str(), err); @@ -8235,6 +9823,9 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { num_scanline_blocks = 16; + } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 || + compression_type == TINYEXR_COMPRESSIONTYPE_B44A) { + num_scanline_blocks = 32; } } else if (attr_name.compare("channels") == 0) { @@ -8583,20 +10174,7 @@ int EXRNumLevels(const EXRImage* exr_image) { if(exr_image->images) return 1; // scanlines int levels = 1; const EXRImage* level_image = exr_image; - -#if 0 - while ((level_image = level_image->next_level)) - ++levels; -#else - for (; ;) - { - level_image = level_image->next_level; - if (!level_image) - break; - ++levels; - } -#endif - + while((level_image = level_image->next_level)) ++levels; return levels; } @@ -8608,16 +10186,19 @@ int FreeEXRImage(EXRImage *exr_image) { if (exr_image->next_level) { FreeEXRImage(exr_image->next_level); delete exr_image->next_level; + exr_image->next_level = NULL; } for (int i = 0; i < exr_image->num_channels; i++) { if (exr_image->images && exr_image->images[i]) { free(exr_image->images[i]); + exr_image->images[i] = NULL; } } if (exr_image->images) { free(exr_image->images); + exr_image->images = NULL; } if (exr_image->tiles) { @@ -8625,15 +10206,21 @@ int FreeEXRImage(EXRImage *exr_image) { for (int i = 0; i < exr_image->num_channels; i++) { if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) { free(exr_image->tiles[tid].images[i]); + exr_image->tiles[tid].images[i] = NULL; } } if (exr_image->tiles[tid].images) { free(exr_image->tiles[tid].images); + exr_image->tiles[tid].images = NULL; } } free(exr_image->tiles); + exr_image->tiles = NULL; } + exr_image->num_channels = 0; + exr_image->num_tiles = 0; + return TINYEXR_SUCCESS; } @@ -8787,6 +10374,71 @@ int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers, exr_headers, num_headers, exr_version, file.data, file.size, err); } +// ======================================================================== +// Refactored loader functions using Reader class for safer memory access +// ======================================================================== + +namespace { // anonymous namespace for internal helpers + +// Parse EXR version header using Reader class +static int ParseEXRVersionWithReader(EXRVersion *version, tinyexr::Reader& reader) { + if (version == NULL) { + return TINYEXR_ERROR_INVALID_ARGUMENT; + } + + if (reader.length() < tinyexr::kEXRVersionSize) { + reader.add_error("Insufficient data size for EXR version header"); + return TINYEXR_ERROR_INVALID_DATA; + } + + // Check magic number: 0x76, 0x2f, 0x31, 0x01 + uint8_t header[4]; + if (!reader.read(4, header)) { + return TINYEXR_ERROR_INVALID_DATA; + } + + const uint8_t expected_header[] = {0x76, 0x2f, 0x31, 0x01}; + if (header[0] != expected_header[0] || header[1] != expected_header[1] || + header[2] != expected_header[2] || header[3] != expected_header[3]) { + reader.add_error("Invalid EXR magic number"); + return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; + } + + // Parse version byte (must be 2) + uint8_t version_byte; + if (!reader.read1(&version_byte)) { + return TINYEXR_ERROR_INVALID_DATA; + } + + if (version_byte != 2) { + reader.add_error("Invalid EXR version"); + return TINYEXR_ERROR_INVALID_EXR_VERSION; + } + + version->version = 2; + + // Parse flags byte + uint8_t flags; + if (!reader.read1(&flags)) { + return TINYEXR_ERROR_INVALID_DATA; + } + + version->tiled = (flags & 0x2) ? true : false; // 9th bit + version->long_name = (flags & 0x4) ? true : false; // 10th bit + version->non_image = (flags & 0x8) ? true : false; // 11th bit (deep image) + version->multipart = (flags & 0x10) ? true : false; // 12th bit + + // Skip remaining 2 bytes to complete the 8-byte version header + uint8_t dummy[2]; + if (!reader.read(2, dummy)) { + return TINYEXR_ERROR_INVALID_DATA; + } + + return TINYEXR_SUCCESS; +} + +} // anonymous namespace + int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory, size_t size) { if (version == NULL || memory == NULL) { @@ -8797,51 +10449,13 @@ int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory, return TINYEXR_ERROR_INVALID_DATA; } - const unsigned char *marker = memory; + // Use Reader class for safer memory access + tinyexr::Reader reader(memory, size, tinyexr::Endian::Little); + int ret = ParseEXRVersionWithReader(version, reader); - // Header check. - { - const char header[] = {0x76, 0x2f, 0x31, 0x01}; - - if (memcmp(marker, header, 4) != 0) { - return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; - } - marker += 4; - } - - version->tiled = false; - version->long_name = false; - version->non_image = false; - version->multipart = false; - - // Parse version header. - { - // must be 2 - if (marker[0] != 2) { - return TINYEXR_ERROR_INVALID_EXR_VERSION; - } - - if (version == NULL) { - return TINYEXR_SUCCESS; // May OK - } - - version->version = 2; - - if (marker[1] & 0x2) { // 9th bit - version->tiled = true; - } - if (marker[1] & 0x4) { // 10th bit - version->long_name = true; - } - if (marker[1] & 0x8) { // 11th bit - version->non_image = true; // (deep image) - } - if (marker[1] & 0x10) { // 12th bit - version->multipart = true; - } - } - - return TINYEXR_SUCCESS; + // Note: errors are accumulated in reader.errors() but not propagated + // to maintain compatibility with existing API + return ret; } int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) { @@ -9035,7 +10649,7 @@ int LoadEXRMultipartImageFromFile(EXRImage *exr_images, } int SaveEXRToMemory(const float *data, int width, int height, int components, - const int save_as_fp16, const unsigned char **outbuf, const char **err) { + const int save_as_fp16, unsigned char **outbuf, const char **err) { if ((components == 1) || components == 3 || components == 4) { // OK @@ -9074,13 +10688,19 @@ int SaveEXRToMemory(const float *data, int width, int height, int components, images[3].resize(static_cast(width * height)); // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers - for (size_t i = 0; i < static_cast(width * height); i++) { - images[0][i] = data[static_cast(components) * i + 0]; - images[1][i] = data[static_cast(components) * i + 1]; - images[2][i] = data[static_cast(components) * i + 2]; - if (components == 4) { - images[3][i] = data[static_cast(components) * i + 3]; - } + if (components == 4) { + for (size_t i = 0; i < static_cast(width * height); i++) { + images[0][i] = data[static_cast(components) * i + 0]; + images[1][i] = data[static_cast(components) * i + 1]; + images[2][i] = data[static_cast(components) * i + 2]; + images[3][i] = data[static_cast(components) * i + 3]; + } + } else { + for (size_t i = 0; i < static_cast(width * height); i++) { + images[0][i] = data[static_cast(components) * i + 0]; + images[1][i] = data[static_cast(components) * i + 1]; + images[2][i] = data[static_cast(components) * i + 2]; + } } } @@ -9225,13 +10845,19 @@ int SaveEXR(const float *data, int width, int height, int components, images[3].resize(pixel_count); // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers - for (size_t i = 0; i < pixel_count; i++) { - images[0][i] = data[static_cast(components) * i + 0]; - images[1][i] = data[static_cast(components) * i + 1]; - images[2][i] = data[static_cast(components) * i + 2]; - if (components == 4) { - images[3][i] = data[static_cast(components) * i + 3]; - } + if (components == 4) { + for (size_t i = 0; i < pixel_count; i++) { + images[0][i] = data[static_cast(components) * i + 0]; + images[1][i] = data[static_cast(components) * i + 1]; + images[2][i] = data[static_cast(components) * i + 2]; + images[3][i] = data[static_cast(components) * i + 3]; + } + } else { + for (size_t i = 0; i < pixel_count; i++) { + images[0][i] = data[static_cast(components) * i + 0]; + images[1][i] = data[static_cast(components) * i + 1]; + images[2][i] = data[static_cast(components) * i + 2]; + } } } @@ -9314,9 +10940,6 @@ int SaveEXR(const float *data, int width, int height, int components, } int ret = SaveEXRImageToFile(&image, &header, outfilename, err); - if (ret != TINYEXR_SUCCESS) { - return ret; - } free(header.channels); free(header.pixel_types); @@ -9325,6 +10948,394 @@ int SaveEXR(const float *data, int width, int height, int components, return ret; } +// ---------------------------------------------------------------- +// Spectral EXR API implementations +// ---------------------------------------------------------------- + +// Helper to format wavelength with comma as decimal separator (European convention) +void EXRFormatWavelength(char *buffer, size_t buffer_size, float wavelength_nm) { + if (!buffer || buffer_size < 16) return; + + // Format with 6 decimal places + int whole = static_cast(wavelength_nm); + int frac = static_cast((wavelength_nm - whole) * 1000000.0f + 0.5f); + +#ifdef _MSC_VER + sprintf_s(buffer, buffer_size, "%d,%06d", whole, frac); +#else + snprintf(buffer, buffer_size, "%d,%06d", whole, frac); +#endif +} + +// Create spectral channel name for emissive spectrum +void EXRSpectralChannelName(char *buffer, size_t buffer_size, + float wavelength_nm, int stokes_component) { + if (!buffer || buffer_size < 32) return; + + char wavelength_str[32]; + EXRFormatWavelength(wavelength_str, sizeof(wavelength_str), wavelength_nm); + +#ifdef _MSC_VER + sprintf_s(buffer, buffer_size, "S%d.%snm", stokes_component, wavelength_str); +#else + snprintf(buffer, buffer_size, "S%d.%snm", stokes_component, wavelength_str); +#endif +} + +// Create spectral channel name for reflective spectrum +void EXRReflectiveChannelName(char *buffer, size_t buffer_size, + float wavelength_nm) { + if (!buffer || buffer_size < 32) return; + + char wavelength_str[32]; + EXRFormatWavelength(wavelength_str, sizeof(wavelength_str), wavelength_nm); + +#ifdef _MSC_VER + sprintf_s(buffer, buffer_size, "T.%snm", wavelength_str); +#else + snprintf(buffer, buffer_size, "T.%snm", wavelength_str); +#endif +} + +// Parse wavelength from spectral channel name +float EXRParseSpectralChannelWavelength(const char *channel_name) { + if (!channel_name) return -1.0f; + + const char *p = channel_name; + + // Skip prefix: "S{n}." or "T." + if (*p == 'S' && p[1] >= '0' && p[1] <= '3' && p[2] == '.') { + p += 3; + } else if (*p == 'T' && p[1] == '.') { + p += 2; + } else { + return -1.0f; + } + + // Parse wavelength with comma as decimal separator + // Format: "550,000000nm" + char wavelength_str[64]; + size_t len = 0; + while (*p && *p != 'n' && len < sizeof(wavelength_str) - 1) { + wavelength_str[len++] = (*p == ',') ? '.' : *p; + p++; + } + wavelength_str[len] = '\0'; + + // Check for "nm" suffix + if (*p != 'n' || p[1] != 'm') { + return -1.0f; + } + + return static_cast(atof(wavelength_str)); +} + +// Get Stokes component from channel name +int EXRGetStokesComponent(const char *channel_name) { + if (!channel_name) return -1; + + if (channel_name[0] == 'S' && + channel_name[1] >= '0' && channel_name[1] <= '3' && + channel_name[2] == '.') { + return channel_name[1] - '0'; + } + + return -1; +} + +// Check if channel name is a spectral channel +int EXRIsSpectralChannel(const char *channel_name) { + if (!channel_name) return 0; + + // Check for "S{n}.{wavelength}nm" pattern + if (channel_name[0] == 'S' && + channel_name[1] >= '0' && channel_name[1] <= '3' && + channel_name[2] == '.') { + return EXRParseSpectralChannelWavelength(channel_name) > 0.0f ? 1 : 0; + } + + // Check for "T.{wavelength}nm" pattern + if (channel_name[0] == 'T' && channel_name[1] == '.') { + return EXRParseSpectralChannelWavelength(channel_name) > 0.0f ? 1 : 0; + } + + return 0; +} + +// Helper to find custom attribute by name +static const EXRAttribute* FindCustomAttribute(const EXRHeader *exr_header, + const char *name) { + if (!exr_header || !name) return NULL; + + for (int i = 0; i < exr_header->num_custom_attributes; i++) { + if (strcmp(exr_header->custom_attributes[i].name, name) == 0) { + return &exr_header->custom_attributes[i]; + } + } + return NULL; +} + +// Get spectrum type from EXR header +int EXRGetSpectrumType(const EXRHeader *exr_header) { + if (!exr_header) return -1; + + // Check for spectralLayoutVersion attribute + const EXRAttribute *layout_attr = FindCustomAttribute(exr_header, "spectralLayoutVersion"); + if (!layout_attr) return -1; + + // Check channel names to determine type + int has_stokes = 0; + int has_reflective = 0; + int has_emissive = 0; + + for (int i = 0; i < exr_header->num_channels; i++) { + const char *name = exr_header->channels[i].name; + if (name[0] == 'T' && name[1] == '.') { + has_reflective = 1; + } else if (name[0] == 'S' && name[1] >= '0' && name[1] <= '3' && name[2] == '.') { + has_emissive = 1; + if (name[1] != '0') { + has_stokes = 1; + } + } + } + + if (has_reflective) return TINYEXR_SPECTRUM_REFLECTIVE; + if (has_stokes) return TINYEXR_SPECTRUM_POLARISED; + if (has_emissive) return TINYEXR_SPECTRUM_EMISSIVE; + + return -1; +} + +// Get wavelengths from EXR header channels +int EXRGetWavelengths(const EXRHeader *exr_header, + float *wavelengths, int max_wavelengths) { + if (!exr_header || !wavelengths || max_wavelengths <= 0) return 0; + + int count = 0; + + for (int i = 0; i < exr_header->num_channels && count < max_wavelengths; i++) { + float wl = EXRParseSpectralChannelWavelength(exr_header->channels[i].name); + if (wl > 0.0f) { + // Check if wavelength already in list + int found = 0; + for (int j = 0; j < count; j++) { + if (std::fabs(static_cast(wavelengths[j] - wl)) < 0.01) { + found = 1; + break; + } + } + if (!found) { + wavelengths[count++] = wl; + } + } + } + + // Sort wavelengths + for (int i = 0; i < count - 1; i++) { + for (int j = i + 1; j < count; j++) { + if (wavelengths[i] > wavelengths[j]) { + float tmp = wavelengths[i]; + wavelengths[i] = wavelengths[j]; + wavelengths[j] = tmp; + } + } + } + + return count; +} + +// Get spectral units from EXR header +const char* EXRGetSpectralUnits(const EXRHeader *exr_header) { + if (!exr_header) return NULL; + + // Try ROOT/units first (spectral-exr format) + const EXRAttribute *attr = FindCustomAttribute(exr_header, "ROOT/units"); + if (attr && attr->value && attr->size > 0) { + return reinterpret_cast(attr->value); + } + + // Try emissiveUnits + attr = FindCustomAttribute(exr_header, "emissiveUnits"); + if (attr && attr->value && attr->size > 0) { + return reinterpret_cast(attr->value); + } + + return NULL; +} + +// Helper to add a string attribute +static int AddStringAttribute(EXRHeader *exr_header, const char *name, const char *value) { + if (!exr_header || !name || !value) return TINYEXR_ERROR_INVALID_ARGUMENT; + + int new_count = exr_header->num_custom_attributes + 1; + if (new_count > TINYEXR_MAX_CUSTOM_ATTRIBUTES) { + return TINYEXR_ERROR_DATA_TOO_LARGE; + } + + // Reallocate attributes array + EXRAttribute *new_attrs = static_cast( + realloc(exr_header->custom_attributes, + sizeof(EXRAttribute) * static_cast(new_count))); + if (!new_attrs) { + return TINYEXR_ERROR_INVALID_DATA; + } + + exr_header->custom_attributes = new_attrs; + EXRAttribute *attr = &exr_header->custom_attributes[exr_header->num_custom_attributes]; + + // Initialize the new attribute + memset(attr, 0, sizeof(EXRAttribute)); + +#ifdef _MSC_VER + strncpy_s(attr->name, sizeof(attr->name), name, 255); + strncpy_s(attr->type, sizeof(attr->type), "string", 255); +#else + strncpy(attr->name, name, 255); + attr->name[255] = '\0'; + strncpy(attr->type, "string", 255); + attr->type[255] = '\0'; +#endif + + size_t value_len = strlen(value) + 1; // Include null terminator + attr->value = static_cast(malloc(value_len)); + if (!attr->value) { + return TINYEXR_ERROR_INVALID_DATA; + } + memcpy(attr->value, value, value_len); + attr->size = static_cast(value_len); + + exr_header->num_custom_attributes = new_count; + + return TINYEXR_SUCCESS; +} + +// Helper to add an int attribute +static int AddIntAttribute(EXRHeader *exr_header, const char *name, int value) { + if (!exr_header || !name) return TINYEXR_ERROR_INVALID_ARGUMENT; + + int new_count = exr_header->num_custom_attributes + 1; + if (new_count > TINYEXR_MAX_CUSTOM_ATTRIBUTES) { + return TINYEXR_ERROR_DATA_TOO_LARGE; + } + + // Reallocate attributes array + EXRAttribute *new_attrs = static_cast( + realloc(exr_header->custom_attributes, + sizeof(EXRAttribute) * static_cast(new_count))); + if (!new_attrs) { + return TINYEXR_ERROR_INVALID_DATA; + } + + exr_header->custom_attributes = new_attrs; + EXRAttribute *attr = &exr_header->custom_attributes[exr_header->num_custom_attributes]; + + // Initialize the new attribute + memset(attr, 0, sizeof(EXRAttribute)); + +#ifdef _MSC_VER + strncpy_s(attr->name, sizeof(attr->name), name, 255); + strncpy_s(attr->type, sizeof(attr->type), "int", 255); +#else + strncpy(attr->name, name, 255); + attr->name[255] = '\0'; + strncpy(attr->type, "int", 255); + attr->type[255] = '\0'; +#endif + + attr->value = static_cast(malloc(sizeof(int))); + if (!attr->value) { + return TINYEXR_ERROR_INVALID_DATA; + } + memcpy(attr->value, &value, sizeof(int)); + attr->size = sizeof(int); + + exr_header->num_custom_attributes = new_count; + + return TINYEXR_SUCCESS; +} + +// Set spectral attributes on EXR header +int EXRSetSpectralAttributes(EXRHeader *exr_header, + int spectrum_type, + const char *units) { + if (!exr_header) return TINYEXR_ERROR_INVALID_ARGUMENT; + + int ret; + + // Add spectralLayoutVersion (always "1.0") + ret = AddStringAttribute(exr_header, "spectralLayoutVersion", "1.0"); + if (ret != TINYEXR_SUCCESS) return ret; + + // Add units attribute based on spectrum type + if (units && strlen(units) > 0) { + if (spectrum_type == TINYEXR_SPECTRUM_REFLECTIVE) { + ret = AddStringAttribute(exr_header, "ROOT/units", units); + } else { + ret = AddStringAttribute(exr_header, "emissiveUnits", units); + } + if (ret != TINYEXR_SUCCESS) return ret; + } + + // Add polarisation handedness for polarised images + if (spectrum_type == TINYEXR_SPECTRUM_POLARISED) { + ret = AddStringAttribute(exr_header, "polarisationHandedness", "left"); + if (ret != TINYEXR_SUCCESS) return ret; + } + + return TINYEXR_SUCCESS; +} + +// Check if file contains spectral data +int IsSpectralEXR(const char *filename) { + EXRVersion version; + int ret = ParseEXRVersionFromFile(&version, filename); + if (ret != TINYEXR_SUCCESS) return ret; + + const char *err = NULL; + EXRHeader header; + InitEXRHeader(&header); + + ret = ParseEXRHeaderFromFile(&header, &version, filename, &err); + if (ret != TINYEXR_SUCCESS) { + if (err) FreeEXRErrorMessage(err); + return ret; + } + + // Check for spectralLayoutVersion attribute + int is_spectral = (FindCustomAttribute(&header, "spectralLayoutVersion") != NULL); + + FreeEXRHeader(&header); + + return is_spectral ? TINYEXR_SUCCESS : TINYEXR_ERROR_INVALID_DATA; +} + +// Check if memory contains spectral EXR data +int IsSpectralEXRFromMemory(const unsigned char *memory, size_t size) { + if (!memory || size < 8) return TINYEXR_ERROR_INVALID_DATA; + + EXRVersion version; + int ret = ParseEXRVersionFromMemory(&version, memory, size); + if (ret != TINYEXR_SUCCESS) return ret; + + const char *err = NULL; + EXRHeader header; + InitEXRHeader(&header); + + ret = ParseEXRHeaderFromMemory(&header, &version, memory, size, &err); + if (ret != TINYEXR_SUCCESS) { + if (err) FreeEXRErrorMessage(err); + return ret; + } + + // Check for spectralLayoutVersion attribute + int is_spectral = (FindCustomAttribute(&header, "spectralLayoutVersion") != NULL); + + FreeEXRHeader(&header); + + return is_spectral ? TINYEXR_SUCCESS : TINYEXR_ERROR_INVALID_DATA; +} + #ifdef __clang__ // zero-as-null-pointer-constant #pragma clang diagnostic pop diff --git a/encoder/basisu_enc.cpp b/encoder/basisu_enc.cpp index 83631b9..e92b4b8 100644 --- a/encoder/basisu_enc.cpp +++ b/encoder/basisu_enc.cpp @@ -28,9 +28,7 @@ #include -#ifndef TINYEXR_USE_ZFP -#define TINYEXR_USE_ZFP (1) -#endif +#define TINYEXR_USE_MINIZ (0) #include "3rdparty/tinyexr.h" #ifndef MINIZ_HEADER_FILE_ONLY @@ -3439,7 +3437,7 @@ namespace basisu return false; return write_vec_to_file(pFilename, file_data); } - + bool read_exr(const char* pFilename, imagef& img, int& n_chans) { n_chans = 0; @@ -3447,8 +3445,8 @@ namespace basisu int width = 0, height = 0; float* out_rgba = nullptr; const char* err = nullptr; - - int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err, &n_chans); + + int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err); if (status != 0) { error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?"); @@ -3457,7 +3455,7 @@ namespace basisu return false; } - const uint32_t MAX_SUPPORTED_DIM = 65536; + const uint32_t MAX_SUPPORTED_DIM = 32768; if ((width < 1) || (height < 1) || (width > (int)MAX_SUPPORTED_DIM) || (height > (int)MAX_SUPPORTED_DIM)) { error_printf("Invalid dimensions of .EXR image \"%s\"!\n", pFilename); @@ -3466,32 +3464,60 @@ namespace basisu } img.resize(width, height); + + memcpy((void*)img.get_ptr(), out_rgba, static_cast(sizeof(float) * 4 * img.get_total_pixels())); - if (n_chans == 1) + free(out_rgba); + out_rgba = nullptr; + + uint32_t total_all_same_rgba = 0, total_all_same_rgb = 0, total_has_alpha = 0; + + for (int y = 0; y < height; y++) { - const float* pSrc = out_rgba; - vec4F* pDst = img.get_ptr(); - - for (int y = 0; y < height; y++) + for (int x = 0; x < width; x++) { - for (int x = 0; x < width; x++) - { - (*pDst)[0] = pSrc[0]; - (*pDst)[1] = pSrc[1]; - (*pDst)[2] = pSrc[2]; - (*pDst)[3] = 1.0f; + const vec4F& p = img(x, y); - pSrc += 4; - ++pDst; - } - } + if ((p[0] == p[1]) && (p[0] == p[2])) + total_all_same_rgb++; + + const float a = p[3]; + + if ((a == p[0]) && (a == p[1]) && (a == p[2])) + total_all_same_rgba++; + + if (a != 1.0f) + total_has_alpha++; + + } // x + } // y + + const uint32_t total_pixels = width * height; + if (total_all_same_rgba == total_pixels) + { + // TinyEXR loads single channel EXR images into all output channels (including alpha) - assume they are luminance and fix our alpha. + // Odds are this is an opaque luminance-only image, not a true alpha channel image. (As of early 2026 we don't support any HDR format with alpha, anyway.) + for (int y = 0; y < height; y++) + for (int x = 0; x < width; x++) + img(x, y)[3] = 1.0f; + + n_chans = 1; + } + else if (total_has_alpha) + { + n_chans = 4; + } + else if (total_all_same_rgb == total_pixels) + { + n_chans = 1; } else { - memcpy((void *)img.get_ptr(), out_rgba, static_cast(sizeof(float) * 4 * img.get_total_pixels())); + n_chans = 3; } - free(out_rgba); + //fmt_printf("Number of detected EXR channels: {}\n", n_chans); + return true; } @@ -3513,6 +3539,8 @@ namespace basisu memcpy((void *)img.get_ptr(), out_rgba, width * height * sizeof(float) * 4); free(out_rgba); + // TODO: detect luminance-only etc. + return true; } diff --git a/encoder/basisu_tinyexr.cpp b/encoder/basisu_tinyexr.cpp index 9606914..6d3828a 100644 --- a/encoder/basisu_tinyexr.cpp +++ b/encoder/basisu_tinyexr.cpp @@ -5,34 +5,27 @@ #endif #endif +// Pull in our local fork of the miniz library. (Binomial wrote the original miniz library. Basisu was tested with this specific version.) #define MINIZ_HEADER_FILE_ONLY #define MINIZ_NO_ZLIB_COMPATIBLE_NAMES #include "basisu_miniz.h" -// Force tinyexr to use zlib-style compression API's, then we'll direct them to our own customized copy of miniz. (Binomial wrote the original miniz library.) -// This allows us to use tinyexr.h without modify it at all, or relying on zlib. +// A bit of a hack to force tinyexr to use plain zlib-style compression API's, then we'll direct them to our own customized copy of miniz with #define's. +// This allows us to use tinyexr.h without modifying it at all, or relying on zlib, or pulling in a system-wide miniz dependency. +// This assumes tinyexr.h doesn't include zlib.h (it doesn't: "Please include your own zlib-compatible API header before...") +// (Time will tell how fragile this is in reality.) #define TINYEXR_USE_MINIZ (0) -enum { Z_OK = 0, Z_STREAM_END = 1, Z_NEED_DICT = 2, Z_ERRNO = -1, Z_STREAM_ERROR = -2, Z_DATA_ERROR = -3, Z_MEM_ERROR = -4, Z_BUF_ERROR = -5, Z_VERSION_ERROR = -6, Z_PARAM_ERROR = -10000 }; -typedef unsigned long uLongf; -typedef unsigned long uLong; +#define Z_OK buminiz::MZ_OK +#define uLong buminiz::mz_ulong +#define uLongf buminiz::mz_ulong + typedef unsigned char Byte; typedef Byte Bytef; -uLong compressBound(uLong src_size) -{ - return buminiz::mz_compressBound(src_size); -} - -int compress(Bytef* dest, uLongf* destLen, const Bytef* source, uLong sourceLen) -{ - return buminiz::mz_compress(dest, destLen, source, sourceLen); -} - -int uncompress(Bytef* dest, uLongf* destLen, const Bytef* source, uLong sourceLen) -{ - return buminiz::mz_uncompress(dest, destLen, source, sourceLen); -} +#define compressBound buminiz::mz_compressBound +#define compress buminiz::mz_compress +#define uncompress buminiz::mz_uncompress #ifdef _MSC_VER #pragma warning (disable: 4060) @@ -40,6 +33,7 @@ int uncompress(Bytef* dest, uLongf* destLen, const Bytef* source, uLong sourceLe #pragma warning (disable: 4245) #pragma warning (disable: 4505) #pragma warning (disable: 4702) +#pragma warning (disable: 4530) // warning C4530: C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc #endif #define TINYEXR_IMPLEMENTATION diff --git a/encoder_lib/encoder_lib.vcxproj b/encoder_lib/encoder_lib.vcxproj index 6318f21..9d4c1b9 100644 --- a/encoder_lib/encoder_lib.vcxproj +++ b/encoder_lib/encoder_lib.vcxproj @@ -28,11 +28,11 @@ - + diff --git a/encoder_lib/encoder_lib.vcxproj.filters b/encoder_lib/encoder_lib.vcxproj.filters index 80e8fd0..8d0ad48 100644 --- a/encoder_lib/encoder_lib.vcxproj.filters +++ b/encoder_lib/encoder_lib.vcxproj.filters @@ -84,9 +84,6 @@ Source Files\encoder - - Source Files\encoder\3rdparty - Source Files\encoder\3rdparty @@ -105,6 +102,9 @@ Source Files\encoder + + Source Files\encoder + diff --git a/webgl/encoder/CMakeLists.txt b/webgl/encoder/CMakeLists.txt index 91dcd3d..8027b8e 100644 --- a/webgl/encoder/CMakeLists.txt +++ b/webgl/encoder/CMakeLists.txt @@ -67,7 +67,7 @@ if(EMSCRIPTEN) ../../encoder/basisu_astc_hdr_common.cpp ../../encoder/basisu_astc_ldr_common.cpp ../../encoder/basisu_astc_ldr_encode.cpp - ../../encoder/3rdparty/tinyexr.cpp + ../../encoder/basisu_tinyexr.cpp ) if(KTX2_ZSTANDARD) list(APPEND SRC_LIST ../../zstd/zstd.c)