From 542ed6555dffc31c2ed2b77733f9603a3346cb18 Mon Sep 17 00:00:00 2001
From: Richard Geldreich <richgel99@protonmail.com>
Date: Tue, 7 Apr 2026 16:30:57 -0400
Subject: [PATCH] tinyexr: upgrading to latest library version. We no longer
 modify tinyexr, i.e. the tinyexr.h header we're using is now completely
 standard. We direct tinyexr's zlib calls to our fork of miniz internally.

---
 CMakeLists.txt                          |    2 +-
 encoder/3rdparty/tinyexr.h              | 2295 +++++++++++++++++++++--
 encoder/basisu_enc.cpp                  |   76 +-
 encoder/basisu_tinyexr.cpp              |   32 +-
 encoder_lib/encoder_lib.vcxproj         |    2 +-
 encoder_lib/encoder_lib.vcxproj.filters |    6 +-
 webgl/encoder/CMakeLists.txt            |    2 +-
 7 files changed, 2224 insertions(+), 191 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a91f23b..23ccb14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -247,7 +247,7 @@ set(ENCODER_LIB_SRC_LIST
     encoder/basisu_astc_hdr_common.cpp
     encoder/basisu_astc_ldr_common.cpp
     encoder/basisu_astc_ldr_encode.cpp
-    encoder/3rdparty/tinyexr.cpp
+    encoder/basisu_tinyexr.cpp
     transcoder/basisu_transcoder.cpp
     encoder/basisu_astc_hdr_6x6_enc.h
     encoder/basisu_astc_hdr_common.h
diff --git a/encoder/3rdparty/tinyexr.h b/encoder/3rdparty/tinyexr.h
index 2b759ee..dfd7164 100644
--- a/encoder/3rdparty/tinyexr.h
+++ b/encoder/3rdparty/tinyexr.h
@@ -1,5 +1,3 @@
-// rg 8/23/2024: I fixed some minor undefined behavior in this module (signed 32-bit left shifts).
-
 #ifndef TINYEXR_H_
 #define TINYEXR_H_
 /*
@@ -109,9 +107,6 @@ extern "C" {
 // required if this flag is 0 and TINYEXR_USE_STB_ZLIB is 0.
 #ifndef TINYEXR_USE_MINIZ
 #define TINYEXR_USE_MINIZ (1)
-#ifndef MINIZ_HEADER_FILE_ONLY
-#define MINIZ_HEADER_FILE_ONLY (1)
-#endif
 #endif
 
 // Use the ZIP implementation of stb_image.h and stb_image_write.h.
@@ -136,7 +131,11 @@ extern "C" {
 
 #ifndef TINYEXR_USE_THREAD
 #define TINYEXR_USE_THREAD (0)  // No threaded loading.
-// http://computation.llnl.gov/projects/floating-point-compression
+#else
+// When using threading a reduced custom upperbound can be specified by setting TINYEXR_MAX_THREADS
+#ifndef TINYEXR_MAX_THREADS // if not defined define it as 0 meaning upper limit is taken from hardware_concurrency()
+#define TINYEXR_MAX_THREADS (0)
+#endif
 #endif
 
 #ifndef TINYEXR_USE_OPENMP
@@ -147,6 +146,41 @@ extern "C" {
 #endif
 #endif
 
+#ifndef TINYEXR_USE_COMPILER_FP16
+#define TINYEXR_USE_COMPILER_FP16 (0)
+#endif
+
+#if TINYEXR_USE_COMPILER_FP16
+#ifndef _MSC_VER
+#if defined( __GNUC__ ) || defined( __clang__ )
+#if defined( __SSE2__ )
+#if ( __GNUC__ > 11 ) || ( __clang_major__ > 14 )
+#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
+#define __STDC_WANT_IEC_60559_TYPES_EXT__
+#endif
+#include <float.h>
+#include <math.h>
+#define TINYEXR_FP16_COMPILER_TYPE _Float16
+#endif
+#endif
+#if defined( __ARM_NEON__ ) || defined( __ARM_NEON )
+#define TINYEXR_FP16_COMPILER_TYPE __fp16
+#endif
+#endif
+#else
+#if (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
+#include <intrin.h>
+#define TINYEXR_FP16_COMPILER_TYPE uint16_t
+#endif
+#endif
+#endif
+
+#ifdef TINYEXR_FP16_COMPILER_TYPE
+#define TINYEXR_HAS_FP16_COMPILER_TYPE (1)
+#else
+#define TINYEXR_HAS_FP16_COMPILER_TYPE (0)
+#endif
+
 #define TINYEXR_SUCCESS (0)
 #define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1)
 #define TINYEXR_ERROR_INVALID_EXR_VERSION (-2)
@@ -178,6 +212,11 @@ extern "C" {
 #define TINYEXR_COMPRESSIONTYPE_ZIPS (2)
 #define TINYEXR_COMPRESSIONTYPE_ZIP (3)
 #define TINYEXR_COMPRESSIONTYPE_PIZ (4)
+#define TINYEXR_COMPRESSIONTYPE_PXR24 (5)
+#define TINYEXR_COMPRESSIONTYPE_B44 (6)
+#define TINYEXR_COMPRESSIONTYPE_B44A (7)
+#define TINYEXR_COMPRESSIONTYPE_DWAA (8)   // Not yet supported
+#define TINYEXR_COMPRESSIONTYPE_DWAB (9)   // Not yet supported
 #define TINYEXR_COMPRESSIONTYPE_ZFP (128)  // TinyEXR extension
 
 #define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0)
@@ -191,6 +230,12 @@ extern "C" {
 #define TINYEXR_TILE_ROUND_DOWN (0)
 #define TINYEXR_TILE_ROUND_UP (1)
 
+// Spectral EXR types (based on JCGT 2021 paper and spectral-exr format)
+// https://jcgt.org/published/0010/03/01/
+#define TINYEXR_SPECTRUM_REFLECTIVE (0)   // T.{wavelength}nm channels
+#define TINYEXR_SPECTRUM_EMISSIVE (1)     // S0.{wavelength}nm channels
+#define TINYEXR_SPECTRUM_POLARISED (2)    // S0-S3.{wavelength}nm channels
+
 typedef struct TEXRVersion {
   int version;    // this must be 2
   // tile format image;
@@ -346,7 +391,7 @@ extern int LoadEXR(float **out_rgba, int *width, int *height,
 // the function will return `TINYEXR_ERROR_LAYER_NOT_FOUND`.
 extern int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
                             const char *filename, const char *layer_name,
-                            const char **err, int *num_chans = NULL);
+                            const char **err);
 
 //
 // Get layer infos from EXR file.
@@ -391,7 +436,7 @@ extern int IsEXRFromMemory(const unsigned char *memory, size_t size);
 // error
 extern int SaveEXRToMemory(const float *data, const int width, const int height,
                    const int components, const int save_as_fp16,
-                   const unsigned char **buffer, const char **err);
+                   unsigned char **buffer, const char **err);
 
 // @deprecated { Not recommended, but handy to use. }
 // Saves single-frame OpenEXR image to a buffer. Assume EXR image contains RGB(A) channels.
@@ -599,6 +644,61 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
                              const unsigned char *memory, size_t size,
                              const char **err);
 
+// Spectral EXR API (based on JCGT 2021 paper and spectral-exr format)
+// https://jcgt.org/published/0010/03/01/
+// https://github.com/afichet/spectral-exr
+
+// Check if an EXR file contains spectral data (has spectralLayoutVersion attribute)
+// Returns TINYEXR_SUCCESS if spectral, TINYEXR_ERROR_INVALID_DATA if not
+extern int IsSpectralEXR(const char *filename);
+extern int IsSpectralEXRFromMemory(const unsigned char *memory, size_t size);
+
+// Get spectrum type from EXR header (TINYEXR_SPECTRUM_REFLECTIVE/EMISSIVE/POLARISED)
+// Returns spectrum type, or -1 if not a spectral EXR
+extern int EXRGetSpectrumType(const EXRHeader *exr_header);
+
+// Format wavelength with European decimal convention (comma as separator)
+// Output format: "550,000000" for 550.0nm
+// buffer must be at least 32 bytes
+extern void EXRFormatWavelength(char *buffer, size_t buffer_size, float wavelength_nm);
+
+// Create spectral channel name
+// For emissive: "S{stokes}.{wavelength}nm" (e.g., "S0.550,000000nm")
+// For reflective: "T.{wavelength}nm" (e.g., "T.550,000000nm")
+// buffer must be at least 64 bytes
+extern void EXRSpectralChannelName(char *buffer, size_t buffer_size,
+                                   float wavelength_nm, int stokes_component);
+extern void EXRReflectiveChannelName(char *buffer, size_t buffer_size,
+                                     float wavelength_nm);
+
+// Parse wavelength from spectral channel name
+// Returns wavelength in nm, or -1.0 if not a valid spectral channel name
+extern float EXRParseSpectralChannelWavelength(const char *channel_name);
+
+// Get Stokes component from channel name (0-3, or -1 if not polarised/invalid)
+extern int EXRGetStokesComponent(const char *channel_name);
+
+// Check if channel name is a spectral channel (S{n}.{wavelength}nm or T.{wavelength}nm)
+extern int EXRIsSpectralChannel(const char *channel_name);
+
+// Get wavelengths from EXR header channels
+// Returns number of unique wavelengths found
+// wavelengths array must be pre-allocated, max_wavelengths is its size
+extern int EXRGetWavelengths(const EXRHeader *exr_header,
+                             float *wavelengths, int max_wavelengths);
+
+// Helper to add spectral attributes to an EXRHeader
+// This sets spectralLayoutVersion and ROOT/units (or emissiveUnits for emissive)
+// spectrum_type: TINYEXR_SPECTRUM_REFLECTIVE, EMISSIVE, or POLARISED
+// units: unit string (e.g., "W.m^-2.sr^-1.nm^-1" for radiance)
+extern int EXRSetSpectralAttributes(EXRHeader *exr_header,
+                                    int spectrum_type,
+                                    const char *units);
+
+// Get spectral units from EXR header
+// Returns NULL if not found, otherwise pointer to units string (valid until header is freed)
+extern const char* EXRGetSpectralUnits(const EXRHeader *exr_header);
+
 #ifdef __cplusplus
 }
 #endif
@@ -632,6 +732,7 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
 #endif
 
 #include <algorithm>
+#include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -644,6 +745,9 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
 #include <vector>
 #include <set>
 
+// Include Reader class with error stack for safer memory reading
+#include "exr_reader.hh"
+
 // https://stackoverflow.com/questions/5047971/how-do-i-check-for-c11-support
 #if __cplusplus > 199711L || (defined(_MSC_VER) && _MSC_VER >= 1900)
 #define TINYEXR_HAS_CXX11 (1)
@@ -664,7 +768,7 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
 #endif
 
 #if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
-#include "../basisu_miniz.h"
+#include <miniz.h>
 #else
 //  Issue #46. Please include your own zlib-compatible API header before
 //  including `tinyexr.h`
@@ -776,7 +880,7 @@ static void SetWarningMessage(const std::string &msg, const char **warn) {
 
 static const int kEXRVersionSize = 8;
 
-static void cpy2(unsigned short *dst_val, const unsigned short *src_val) {
+static void inline cpy2(unsigned short *dst_val, const unsigned short *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -784,7 +888,7 @@ static void cpy2(unsigned short *dst_val, const unsigned short *src_val) {
   dst[1] = src[1];
 }
 
-static void swap2(unsigned short *val) {
+static void inline swap2(unsigned short *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -806,7 +910,7 @@ static void swap2(unsigned short *val) {
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
 #endif
-static void cpy4(int *dst_val, const int *src_val) {
+static void inline cpy4(int *dst_val, const int *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -816,7 +920,7 @@ static void cpy4(int *dst_val, const int *src_val) {
   dst[3] = src[3];
 }
 
-static void cpy4(unsigned int *dst_val, const unsigned int *src_val) {
+static void inline cpy4(unsigned int *dst_val, const unsigned int *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -826,7 +930,7 @@ static void cpy4(unsigned int *dst_val, const unsigned int *src_val) {
   dst[3] = src[3];
 }
 
-static void cpy4(float *dst_val, const float *src_val) {
+static void inline cpy4(float *dst_val, const float *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -843,7 +947,7 @@ static void cpy4(float *dst_val, const float *src_val) {
 #pragma GCC diagnostic pop
 #endif
 
-static void swap4(unsigned int *val) {
+static void inline swap4(unsigned int *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -858,7 +962,7 @@ static void swap4(unsigned int *val) {
 #endif
 }
 
-static void swap4(int *val) {
+static void inline swap4(int *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -873,7 +977,7 @@ static void swap4(int *val) {
 #endif
 }
 
-static void swap4(float *val) {
+static void inline swap4(float *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -889,7 +993,7 @@ static void swap4(float *val) {
 }
 
 #if 0
-static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) {
+static void inline cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -904,7 +1008,7 @@ static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64
 }
 #endif
 
-static void swap8(tinyexr::tinyexr_uint64 *val) {
+static void inline swap8(tinyexr::tinyexr_uint64 *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -924,6 +1028,11 @@ static void swap8(tinyexr::tinyexr_uint64 *val) {
 }
 
 // https://gist.github.com/rygorous/2156668
+#if TINYEXR_HAS_FP16_COMPILER_TYPE && (TINYEXR_USE_COMPILER_FP16 > 0)
+union FP32 {
+  float f;
+};
+#else
 union FP32 {
   unsigned int u;
   float f;
@@ -939,12 +1048,21 @@ union FP32 {
 #endif
   } s;
 };
+#endif
 
 #ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wpadded"
 #endif
 
+#if TINYEXR_HAS_FP16_COMPILER_TYPE && (TINYEXR_USE_COMPILER_FP16 > 0)
+union FP16 {
+  TINYEXR_FP16_COMPILER_TYPE f;
+  unsigned short u;
+};
+
+#else
+
 union FP16 {
   unsigned short u;
   struct {
@@ -959,11 +1077,32 @@ union FP16 {
 #endif
   } s;
 };
+#endif
 
 #ifdef __clang__
 #pragma clang diagnostic pop
 #endif
 
+#if TINYEXR_HAS_FP16_COMPILER_TYPE && (TINYEXR_USE_COMPILER_FP16 > 0)
+static inline FP32 half_to_float(FP16 h) {
+  FP32 o;
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
+   o.f =_mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(static_cast<int> (h.u))));
+#else
+   o.f = static_cast<float> (h.f);
+#endif
+  return o;
+}
+static inline FP16 float_to_half_full(FP32 f) {
+  FP16 o;
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
+  o.f  = static_cast<TINYEXR_FP16_COMPILER_TYPE> (_mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f.f), _MM_FROUND_CUR_DIRECTION)));
+#else
+  o.f = static_cast<TINYEXR_FP16_COMPILER_TYPE> (f.f);
+#endif
+  return o;
+}
+#else
 static FP32 half_to_float(FP16 h) {
   static const FP32 magic = {113 << 23};
   static const unsigned int shifted_exp = 0x7c00
@@ -1023,7 +1162,7 @@ static FP16 float_to_half_full(FP32 f) {
   o.s.Sign = f.s.Sign;
   return o;
 }
-
+#endif
 // NOTE: From OpenEXR code
 // #define IMF_INCREASING_Y  0
 // #define IMF_DECREASING_Y  1
@@ -1367,11 +1506,11 @@ static bool CompressZip(unsigned char *dst,
   // Compress the data using miniz
   //
 
-  buminiz::mz_ulong outSize = buminiz::mz_compressBound(src_size);
-  int ret = buminiz::mz_compress(
+  mz_ulong outSize = mz_compressBound(src_size);
+  int ret = mz_compress(
       dst, &outSize, static_cast<const unsigned char *>(&tmpBuf.at(0)),
       src_size);
-  if (ret != buminiz::MZ_OK) {
+  if (ret != MZ_OK) {
     return false;
   }
 
@@ -1396,7 +1535,7 @@ static bool CompressZip(unsigned char *dst,
 
   memcpy(dst, ret, outSize);
   free(ret);
-
+  
   compressedSize = outSize;
 #else
   uLong outSize = compressBound(static_cast<uLong>(src_size));
@@ -1431,8 +1570,8 @@ static bool DecompressZip(unsigned char *dst,
 
 #if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
   int ret =
-      buminiz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
-  if (buminiz::MZ_OK != ret) {
+      mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
+  if (MZ_OK != ret) {
     return false;
   }
 #elif TINYEXR_USE_STB_ZLIB
@@ -2144,7 +2283,7 @@ inline void outputBits(int nBits, long long bits, long long &c, int &lc,
 
 inline long long getBits(int nBits, long long &c, int &lc, const char *&in) {
   while (lc < nBits) {
-    c = (long long)((unsigned long long)c << 8) | *(reinterpret_cast<const unsigned char *>(in++));
+    c = (c << 8) | *(reinterpret_cast<const unsigned char *>(in++));
     lc += 8;
   }
 
@@ -2720,7 +2859,7 @@ static int hufEncode            // return: output size (in bits)
 
 #define getChar(c, lc, in)                   \
   {                                          \
-    c = ((unsigned long long)c << 8) | *(unsigned char *)(in++); \
+    c = (c << 8) | *(unsigned char *)(in++); \
     lc += 8;                                 \
   }
 
@@ -2882,7 +3021,7 @@ static bool hufDecode(const long long *hcode,  // i : encoding table
   lc -= i;
 
   while (lc > 0) {
-    const HufDec pl = hdecod[((unsigned long long)c << (HUF_DECBITS - lc)) & HUF_DECMASK];
+    const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK];
 
     if (pl.len) {
       lc -= pl.len;
@@ -3357,6 +3496,1112 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr,
 }
 #endif  // TINYEXR_USE_PIZ
 
+// ============================================================================
+// PXR24 decompression
+// ============================================================================
+
+// PXR24 stores 32-bit floats as 24-bit values (truncates 8 mantissa bits)
+// HALF and UINT are stored without modification
+static bool DecompressPxr24(unsigned char *outPtr, size_t outBufSize,
+                            const unsigned char *inPtr, size_t inLen,
+                            int data_width, int num_lines,
+                            size_t num_channels,
+                            const EXRChannelInfo *channels) {
+  // Calculate the PXR24 data size after zlib decompression
+  // PXR24 stores HALF as 2 bytes, UINT as 4 bytes, FLOAT as 3 bytes
+  // Data is stored with byte plane separation and delta encoding
+  size_t pxr24_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int ch_width = data_width;  // V1 doesn't handle subsampling in decompression
+    int ch_pixels = ch_width * num_lines;
+
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 4;
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 2;
+    } else {  // FLOAT
+      pxr24_size += static_cast<size_t>(ch_pixels) * 3;
+    }
+  }
+
+  // Allocate buffer for zlib-decompressed PXR24 data
+  std::vector<unsigned char> pxr24_buf(pxr24_size);
+  size_t uncomp_size = pxr24_size;
+
+  // PXR24 uses raw zlib compression
+  if (pxr24_size == inLen) {
+    // Uncompressed - copy directly
+    memcpy(pxr24_buf.data(), inPtr, inLen);
+  } else {
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+    mz_ulong dest_len = static_cast<mz_ulong>(pxr24_size);
+    int ret = mz_uncompress(pxr24_buf.data(), &dest_len, inPtr, static_cast<mz_ulong>(inLen));
+    if (ret != MZ_OK) {
+      return false;
+    }
+    uncomp_size = static_cast<size_t>(dest_len);
+#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1)
+    int outLen = stbi_zlib_decode_buffer(reinterpret_cast<char*>(pxr24_buf.data()),
+        static_cast<int>(pxr24_size), reinterpret_cast<const char*>(inPtr), static_cast<int>(inLen));
+    if (outLen < 0) {
+      return false;
+    }
+    uncomp_size = static_cast<size_t>(outLen);
+#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1)
+    uint64_t outLen = 0;
+    nanoz_status_t ret = nanoz_uncompress(inPtr, inLen, pxr24_size, pxr24_buf.data(), &outLen);
+    if (ret != NANOZ_SUCCESS) {
+      return false;
+    }
+    uncomp_size = static_cast<size_t>(outLen);
+#else
+    uLongf dest_len = static_cast<uLongf>(pxr24_size);
+    int ret = uncompress(pxr24_buf.data(), &dest_len, inPtr, static_cast<uLong>(inLen));
+    if (ret != Z_OK) {
+      return false;
+    }
+    uncomp_size = static_cast<size_t>(dest_len);
+#endif
+  }
+
+  if (uncomp_size != pxr24_size) {
+    return false;
+  }
+
+  // Convert PXR24 format to standard EXR format
+  // PXR24 uses:
+  // 1. Byte plane separation: bytes are stored by plane (all high bytes, then next-high, etc.)
+  // 2. Delta encoding: each pixel is stored as difference from previous pixel
+  const unsigned char* in_p = pxr24_buf.data();
+  unsigned char* out_p = outPtr;
+
+  for (int line = 0; line < num_lines; line++) {
+    for (size_t c = 0; c < num_channels; c++) {
+      int w = data_width;
+
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        // UINT: 4 byte planes with delta encoding
+        const unsigned char* ptr0 = in_p;
+        const unsigned char* ptr1 = in_p + w;
+        const unsigned char* ptr2 = in_p + w * 2;
+        const unsigned char* ptr3 = in_p + w * 3;
+        in_p += w * 4;
+
+        unsigned int pixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned int diff = (static_cast<unsigned int>(ptr0[x]) << 24) |
+                              (static_cast<unsigned int>(ptr1[x]) << 16) |
+                              (static_cast<unsigned int>(ptr2[x]) << 8) |
+                              (static_cast<unsigned int>(ptr3[x]));
+          pixel += diff;
+          memcpy(out_p, &pixel, 4);
+          out_p += 4;
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        // HALF: 2 byte planes with delta encoding
+        const unsigned char* ptr0 = in_p;
+        const unsigned char* ptr1 = in_p + w;
+        in_p += w * 2;
+
+        unsigned int pixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned int diff = (static_cast<unsigned int>(ptr0[x]) << 8) |
+                              (static_cast<unsigned int>(ptr1[x]));
+          pixel += diff;
+          unsigned short h = static_cast<unsigned short>(pixel);
+          memcpy(out_p, &h, 2);
+          out_p += 2;
+        }
+      } else {  // FLOAT
+        // FLOAT: 3 byte planes with delta encoding, expand to 32-bit
+        const unsigned char* ptr0 = in_p;
+        const unsigned char* ptr1 = in_p + w;
+        const unsigned char* ptr2 = in_p + w * 2;
+        in_p += w * 3;
+
+        unsigned int pixel = 0;
+        for (int x = 0; x < w; x++) {
+          // PXR24 stores 24-bit floats with delta encoding
+          // The diff is in the upper 24 bits
+          unsigned int diff = (static_cast<unsigned int>(ptr0[x]) << 24) |
+                              (static_cast<unsigned int>(ptr1[x]) << 16) |
+                              (static_cast<unsigned int>(ptr2[x]) << 8);
+          pixel += diff;
+          memcpy(out_p, &pixel, 4);
+          out_p += 4;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+// ============================================================================
+// B44/B44A decompression
+// ============================================================================
+
+// B44 compresses 4x4 blocks of HALF values to 14 bytes
+// B44A can compress flat regions to 3 bytes
+
+// B44 lookup tables
+// expTable: converts half-float value to exp(half/8) for p_linear channels
+// logTable: converts half-float value to 8*log(half) for p_linear channels
+// Note: These tables are primarily for luminance channels with p_linear attribute
+static unsigned short g_b44_exp_table[65536];
+static unsigned short g_b44_log_table[65536];
+static bool g_b44_tables_initialized = false;
+
+// Half-float conversion helpers for B44 table initialization
+static inline float B44HalfToFloat(unsigned short h) {
+  union { unsigned int i; float f; } u;
+
+  int s = (h >> 15) & 0x1;
+  int e = (h >> 10) & 0x1f;
+  int m = h & 0x3ff;
+
+  if (e == 0) {
+    if (m == 0) {
+      // Zero
+      u.i = s << 31;
+      return u.f;
+    }
+    // Denormal
+    float f = (float)m / 1024.0f;
+    f = f * (1.0f / 16384.0f);  // 2^-14
+    return s ? -f : f;
+  } else if (e == 31) {
+    // Inf or NaN
+    u.i = (s << 31) | 0x7f800000 | (m << 13);
+    return u.f;
+  }
+
+  // Normal
+  u.i = (s << 31) | ((e + 112) << 23) | (m << 13);
+  return u.f;
+}
+
+static inline unsigned short B44FloatToHalf(float f) {
+  union { unsigned int i; float f; } u;
+  u.f = f;
+
+  int s = (u.i >> 31) & 0x1;
+  int e = (u.i >> 23) & 0xff;
+  int m = u.i & 0x7fffff;
+
+  if (e == 0) {
+    return static_cast<unsigned short>(s << 15);  // Zero
+  } else if (e == 255) {
+    // Inf or NaN
+    return static_cast<unsigned short>((s << 15) | 0x7c00 | (m >> 13));
+  } else if (e < 113) {
+    // Too small - denormal or zero
+    if (e < 103) return static_cast<unsigned short>(s << 15);
+    m = (m | 0x800000) >> (114 - e);
+    return static_cast<unsigned short>((s << 15) | (m >> 13));
+  } else if (e > 142) {
+    // Too large - infinity
+    return static_cast<unsigned short>((s << 15) | 0x7c00);
+  }
+
+  return static_cast<unsigned short>((s << 15) | ((e - 112) << 10) | (m >> 13));
+}
+
+// Initialize B44 exp/log lookup tables (matches OpenEXR algorithm)
+static void InitB44Tables() {
+  if (g_b44_tables_initialized) return;
+
+  // Generate tables per OpenEXR's b44_table_init.c
+  for (int i = 0; i < 65536; i++) {
+    unsigned short x = static_cast<unsigned short>(i);
+
+    // expTable: convertFromLinear - exp(half / 8)
+    if ((x & 0x7c00) == 0x7c00) {
+      // infinity/nan -> 0
+      g_b44_exp_table[i] = 0;
+    } else if (x >= 0x558c && x < 0x8000) {
+      // >= 8 * log(HALF_MAX) -> HALF_MAX
+      g_b44_exp_table[i] = 0x7bff;
+    } else {
+      float f = B44HalfToFloat(x);
+      f = static_cast<float>(std::exp(static_cast<double>(f) / 8.0));
+      g_b44_exp_table[i] = B44FloatToHalf(f);
+    }
+
+    // logTable: convertToLinear - 8 * log(half)
+    if ((x & 0x7c00) == 0x7c00) {
+      // infinity/nan -> 0
+      g_b44_log_table[i] = 0;
+    } else if (x > 0x8000) {
+      // negative (excluding -0.0) -> 0
+      g_b44_log_table[i] = 0;
+    } else {
+      float f = B44HalfToFloat(x);
+      if (f <= 0.0f) {
+        g_b44_log_table[i] = 0;
+      } else {
+        f = static_cast<float>(8.0 * std::log(static_cast<double>(f)));
+        g_b44_log_table[i] = B44FloatToHalf(f);
+      }
+    }
+  }
+
+  g_b44_tables_initialized = true;
+}
+
+// Convert half to linear-log space (for p_linear channels)
+static inline unsigned short B44ConvertFromLinear(unsigned short h) {
+  return g_b44_exp_table[h];
+}
+
+// Convert linear-log back to half (for p_linear channels)
+static inline unsigned short B44ConvertToLinear(unsigned short h) {
+  return g_b44_log_table[h];
+}
+
+// Unpack one 4x4 block from B44 compressed 14 bytes (matches OpenEXR unpack14)
+static void UnpackB44Block(unsigned short dst[16], const unsigned char src[14]) {
+  // Extract t[0] (stored as ordered-magnitude value)
+  unsigned short s0 = (static_cast<unsigned short>(src[0]) << 8) | src[1];
+
+  // Extract shift and compute bias
+  unsigned short shift = src[2] >> 2;
+  unsigned short bias = static_cast<unsigned short>(0x20u << shift);
+
+  // Reconstruct t values using running differences
+  // Pattern: s[0]->s[4]->s[8]->s[12], then s[0]->s[1], s[4]->s[5], etc.
+
+  unsigned short s4 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s0) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[2]) << 4) |
+                               (static_cast<unsigned int>(src[3]) >> 4)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s8 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s4) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[3]) << 2) |
+                               (static_cast<unsigned int>(src[4]) >> 6)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s12 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s8) +
+    static_cast<unsigned int>(src[4] & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s1 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s0) +
+    static_cast<unsigned int>(src[5] >> 2) * (1u << shift) - bias);
+
+  unsigned short s5 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s4) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[5]) << 4) |
+                               (static_cast<unsigned int>(src[6]) >> 4)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s9 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s8) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[6]) << 2) |
+                               (static_cast<unsigned int>(src[7]) >> 6)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s13 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s12) +
+    static_cast<unsigned int>(src[7] & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s2 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s1) +
+    static_cast<unsigned int>(src[8] >> 2) * (1u << shift) - bias);
+
+  unsigned short s6 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s5) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[8]) << 4) |
+                               (static_cast<unsigned int>(src[9]) >> 4)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s10 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s9) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[9]) << 2) |
+                               (static_cast<unsigned int>(src[10]) >> 6)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s14 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s13) +
+    static_cast<unsigned int>(src[10] & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s3 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s2) +
+    static_cast<unsigned int>(src[11] >> 2) * (1u << shift) - bias);
+
+  unsigned short s7 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s6) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[11]) << 4) |
+                               (static_cast<unsigned int>(src[12]) >> 4)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s11 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s10) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[12]) << 2) |
+                               (static_cast<unsigned int>(src[13]) >> 6)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s15 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s14) +
+    static_cast<unsigned int>(src[13] & 0x3fu) * (1u << shift) - bias);
+
+  // Store t values
+  dst[0] = s0;   dst[1] = s1;   dst[2] = s2;   dst[3] = s3;
+  dst[4] = s4;   dst[5] = s5;   dst[6] = s6;   dst[7] = s7;
+  dst[8] = s8;   dst[9] = s9;   dst[10] = s10; dst[11] = s11;
+  dst[12] = s12; dst[13] = s13; dst[14] = s14; dst[15] = s15;
+
+  // Convert from ordered-magnitude to half-float
+  for (int i = 0; i < 16; i++) {
+    if (dst[i] & 0x8000) {
+      dst[i] &= 0x7fff;  // Positive: clear sign bit
+    } else {
+      dst[i] = ~dst[i];  // Negative: invert all bits
+    }
+  }
+}
+
+// Unpack a 3-byte flat block (all pixels same value)
+static void UnpackB44FlatBlock(unsigned short dst[16], const unsigned char src[3]) {
+  unsigned short t = (static_cast<unsigned short>(src[0]) << 8) | src[1];
+
+  // Convert from ordered-magnitude to half-float
+  unsigned short h;
+  if (t & 0x8000) {
+    h = t & 0x7fff;
+  } else {
+    h = ~t;
+  }
+
+  for (int i = 0; i < 16; i++) {
+    dst[i] = h;
+  }
+}
+
+static bool DecompressB44(unsigned char *outPtr, size_t outBufSize,
+                          const unsigned char *inPtr, size_t inLen,
+                          int data_width, int num_lines,
+                          size_t num_channels,
+                          const EXRChannelInfo *channels,
+                          bool is_b44a) {
+  (void)is_b44a;  // Flat block detection doesn't depend on B44/B44A for decoding
+  InitB44Tables();
+
+  // Validate that the output buffer is large enough for the decoded data.
+  // Use overflow-safe arithmetic: overflow in any multiplication or accumulation
+  // means the size cannot fit in memory, which is definitely > outBufSize.
+  {
+    size_t expected_out = 0;
+    for (size_t c = 0; c < num_channels; c++) {
+      int xs = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+      int ys = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+      size_t cw = static_cast<size_t>((data_width  + xs - 1) / xs);
+      size_t ch = static_cast<size_t>((num_lines   + ys - 1) / ys);
+      size_t bpp = (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) ? 2u : 4u;
+      // Check cw * ch overflow
+      if (cw != 0 && ch > (SIZE_MAX / cw)) return false;
+      size_t pixels = cw * ch;
+      // Check pixels * bpp overflow
+      if (pixels > (SIZE_MAX / bpp)) return false;
+      size_t ch_total = pixels * bpp;
+      // Check accumulation overflow
+      if (expected_out > SIZE_MAX - ch_total) return false;
+      expected_out += ch_total;
+    }
+    if (expected_out > outBufSize) return false;
+  }
+
+  const unsigned char* in_p = inPtr;
+  const unsigned char* in_end = inPtr + inLen;
+
+  // First pass: decompress all channels into scratch buffers.
+  // For non-HALF channels, save the pointer and byte count so we can copy
+  // the raw data to the output buffer in the second pass.
+  std::vector<std::vector<unsigned short>> scratch_buffers(num_channels);
+  std::vector<const unsigned char *> nonhalf_ptrs(num_channels, nullptr);
+  std::vector<size_t> nonhalf_sizes(num_channels, 0);
+
+  for (size_t c = 0; c < num_channels; c++) {
+    // Compute per-channel dimensions based on sampling
+    int x_sampling = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+    int y_sampling = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+    int ch_width = (data_width + x_sampling - 1) / x_sampling;
+    int ch_height = (num_lines + y_sampling - 1) / y_sampling;
+
+    // B44 only works with HALF pixel types
+    if (channels[c].pixel_type != TINYEXR_PIXELTYPE_HALF) {
+      // For non-HALF channels, data is stored uncompressed; record position
+      size_t ch_bytes = static_cast<size_t>(ch_width) * ch_height;
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT ||
+          channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        ch_bytes *= 4;
+      } else {
+        ch_bytes *= 2;
+      }
+      if (in_p + ch_bytes > in_end) return false;
+      nonhalf_ptrs[c] = in_p;
+      nonhalf_sizes[c] = ch_bytes;
+      in_p += ch_bytes;
+      continue;
+    }
+
+    // Calculate block dimensions (rounded up to multiple of 4)
+    int padded_width = ((ch_width + 3) / 4) * 4;
+    int padded_height = ((ch_height + 3) / 4) * 4;
+    int num_blocks_x = padded_width / 4;
+    int num_blocks_y = padded_height / 4;
+
+    // Allocate scratch buffer for this channel
+    scratch_buffers[c].resize(static_cast<size_t>(padded_width) * padded_height);
+
+    // Process blocks
+    for (int by = 0; by < num_blocks_y; by++) {
+      for (int bx = 0; bx < num_blocks_x; bx++) {
+        unsigned short block[16];
+
+        if (in_p + 3 > in_end) return false;
+
+        // Check for flat block (shift >= 13)
+        if (in_p[2] >= (13 << 2)) {
+          // 3-byte flat block
+          UnpackB44FlatBlock(block, in_p);
+          in_p += 3;
+        } else {
+          // Regular 14-byte block
+          if (in_p + 14 > in_end) return false;
+          UnpackB44Block(block, in_p);
+          in_p += 14;
+        }
+
+        // Apply p_linear conversion (log table) if needed
+        if (channels[c].p_linear) {
+          for (int i = 0; i < 16; i++) {
+            block[i] = g_b44_log_table[block[i]];
+          }
+        }
+
+        // Store block in scratch buffer
+        for (int dy = 0; dy < 4; dy++) {
+          int y = by * 4 + dy;
+          for (int dx = 0; dx < 4; dx++) {
+            int x = bx * 4 + dx;
+            scratch_buffers[c][static_cast<size_t>(y) * padded_width + x] = block[dy * 4 + dx];
+          }
+        }
+      }
+    }
+  }
+
+  // Second pass: copy from scratch buffers to output in per-channel format.
+  // Output format: all data for channel 0, then all data for channel 1, etc.
+  // ch_offset in DecodePixelData is accumulated per preceding channel sizes.
+  unsigned char* out_p = outPtr;
+  for (size_t c = 0; c < num_channels; c++) {
+    int x_sampling = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+    int y_sampling = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+    int ch_width = (data_width + x_sampling - 1) / x_sampling;
+    int ch_height = (num_lines + y_sampling - 1) / y_sampling;
+    int padded_width = ((ch_width + 3) / 4) * 4;
+
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      for (int y = 0; y < ch_height; y++) {
+        for (int x = 0; x < ch_width; x++) {
+          unsigned short val = scratch_buffers[c][static_cast<size_t>(y) * padded_width + x];
+          // Write as little-endian bytes so DecodePixelData's swap2 (LE->host)
+          // works correctly on both little- and big-endian platforms.
+          tinyexr::swap2(&val);
+          memcpy(out_p, &val, sizeof(val));
+          out_p += sizeof(val);
+        }
+      }
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT ||
+               channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+      // Non-HALF data is stored uncompressed; copy from saved pointer
+      if (nonhalf_sizes[c] > 0) {
+        memcpy(out_p, nonhalf_ptrs[c], nonhalf_sizes[c]);
+        out_p += nonhalf_sizes[c];
+      }
+    }
+  }
+
+  return true;
+}
+
+// ============================================================================
+// PXR24 compression (encoding)
+// ============================================================================
+
+// Convert float32 to float24 (PXR24 format)
+static inline unsigned int float_to_float24(float f) {
+  union { float f; unsigned int i; } u;
+  u.f = f;
+
+  unsigned int s = u.i & 0x80000000;
+  unsigned int e = u.i & 0x7f800000;
+  unsigned int m = u.i & 0x007fffff;
+
+  if (e == 0x7f800000) {
+    if (m) {
+      // NaN - preserve sign and 15 leftmost mantissa bits
+      m >>= 8;
+      return (s >> 8) | (e >> 8) | m | (m == 0 ? 1 : 0);
+    } else {
+      // Infinity
+      return (s >> 8) | (e >> 8);
+    }
+  }
+
+  // Finite - round mantissa to 15 bits
+  unsigned int i = ((e | m) + (m & 0x00000080)) >> 8;
+
+  if (i >= 0x7f8000) {
+    // Overflow - truncate instead of round
+    i = (e | m) >> 8;
+  }
+
+  return (s >> 8) | i;
+}
+
+static bool CompressPxr24(std::vector<unsigned char>& outBuf,
+                          const unsigned char *inPtr, size_t inLen,
+                          int data_width, int num_lines,
+                          size_t num_channels,
+                          const EXRChannelInfo *channels) {
+  // PXR24 stores HALF as 2 bytes, UINT as 4 bytes, FLOAT as 3 bytes (truncated)
+  // Data is stored with byte plane separation and delta encoding
+  size_t pxr24_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int ch_pixels = data_width * num_lines;
+
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 4;
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 2;
+    } else {  // FLOAT
+      pxr24_size += static_cast<size_t>(ch_pixels) * 3;
+    }
+  }
+
+  // Create PXR24 format data
+  std::vector<unsigned char> pxr24_buf(pxr24_size);
+  const unsigned char* in_p = inPtr;
+  unsigned char* out_p = pxr24_buf.data();
+
+  // PXR24 uses:
+  // 1. Byte plane separation: bytes are stored by plane (all high bytes, then next-high, etc.)
+  // 2. Delta encoding: each pixel is stored as difference from previous pixel
+  for (int line = 0; line < num_lines; line++) {
+    for (size_t c = 0; c < num_channels; c++) {
+      int w = data_width;
+
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        // UINT: 4 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        unsigned char* ptr2 = out_p + w * 2;
+        unsigned char* ptr3 = out_p + w * 3;
+        out_p += w * 4;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned int pixel;
+          memcpy(&pixel, in_p, 4);
+          in_p += 4;
+          unsigned int diff = pixel - prevPixel;
+          prevPixel = pixel;
+
+          ptr0[x] = static_cast<unsigned char>(diff >> 24);
+          ptr1[x] = static_cast<unsigned char>(diff >> 16);
+          ptr2[x] = static_cast<unsigned char>(diff >> 8);
+          ptr3[x] = static_cast<unsigned char>(diff);
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        // HALF: 2 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        out_p += w * 2;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned short h;
+          memcpy(&h, in_p, 2);
+          in_p += 2;
+          unsigned int pixel = h;
+          unsigned int diff = pixel - prevPixel;
+          prevPixel = pixel;
+
+          ptr0[x] = static_cast<unsigned char>(diff >> 8);
+          ptr1[x] = static_cast<unsigned char>(diff);
+        }
+      } else {  // FLOAT
+        // FLOAT: convert to 24-bit, then 3 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        unsigned char* ptr2 = out_p + w * 2;
+        out_p += w * 3;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          float f;
+          memcpy(&f, in_p, 4);
+          in_p += 4;
+          unsigned int pixel24 = float_to_float24(f);
+          unsigned int diff = pixel24 - prevPixel;
+          prevPixel = pixel24;
+
+          // Store as 24-bit diff (shifted to upper bits for proper reconstruction)
+          ptr0[x] = static_cast<unsigned char>(diff >> 16);
+          ptr1[x] = static_cast<unsigned char>(diff >> 8);
+          ptr2[x] = static_cast<unsigned char>(diff);
+        }
+      }
+    }
+  }
+
+  // Compress with zlib
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+  mz_ulong dest_len = mz_compressBound(static_cast<mz_ulong>(pxr24_size));
+  outBuf.resize(dest_len);
+  int ret = mz_compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast<mz_ulong>(pxr24_size));
+  if (ret != MZ_OK) {
+    return false;
+  }
+  outBuf.resize(static_cast<size_t>(dest_len));
+#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1)
+  int outSize;
+  unsigned char* ret = stbi_zlib_compress(pxr24_buf.data(), static_cast<int>(pxr24_size), &outSize, 8);
+  if (!ret) {
+    return false;
+  }
+  outBuf.assign(ret, ret + outSize);
+  free(ret);
+#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1)
+  int outSize;
+  unsigned char* ret = nanoz_compress(pxr24_buf.data(), pxr24_size, &outSize, 8);
+  if (!ret) {
+    return false;
+  }
+  outBuf.assign(ret, ret + outSize);
+  free(ret);
+#else
+  uLongf dest_len = compressBound(static_cast<uLong>(pxr24_size));
+  outBuf.resize(static_cast<size_t>(dest_len));
+  int ret = compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast<uLong>(pxr24_size));
+  if (ret != Z_OK) {
+    return false;
+  }
+  outBuf.resize(static_cast<size_t>(dest_len));
+#endif
+
+  return true;
+}
+
+// Overloaded version that takes ChannelInfo instead of EXRChannelInfo
+// Note: Uses requested_pixel_type which is the file format, not the input type
+static bool CompressPxr24(std::vector<unsigned char>& outBuf,
+                          const unsigned char *inPtr, size_t inLen,
+                          int data_width, int num_lines,
+                          size_t num_channels,
+                          const std::vector<ChannelInfo>& channels) {
+  // PXR24 stores HALF as 2 bytes, UINT as 4 bytes, FLOAT as 3 bytes (truncated)
+  // Data is stored with byte plane separation and delta encoding
+  // Use requested_pixel_type which is the actual format in the file
+  size_t pxr24_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int ch_pixels = data_width * num_lines;
+    int file_type = channels[c].requested_pixel_type;
+
+    if (file_type == TINYEXR_PIXELTYPE_UINT) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 4;
+    } else if (file_type == TINYEXR_PIXELTYPE_HALF) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 2;
+    } else {  // FLOAT
+      pxr24_size += static_cast<size_t>(ch_pixels) * 3;
+    }
+  }
+
+  // Create PXR24 format data
+  std::vector<unsigned char> pxr24_buf(pxr24_size);
+  const unsigned char* in_p = inPtr;
+  unsigned char* out_p = pxr24_buf.data();
+
+  // PXR24 uses:
+  // 1. Byte plane separation: bytes are stored by plane (all high bytes, then next-high, etc.)
+  // 2. Delta encoding: each pixel is stored as difference from previous pixel
+  for (int line = 0; line < num_lines; line++) {
+    for (size_t c = 0; c < num_channels; c++) {
+      int w = data_width;
+      int file_type = channels[c].requested_pixel_type;
+
+      if (file_type == TINYEXR_PIXELTYPE_UINT) {
+        // UINT: 4 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        unsigned char* ptr2 = out_p + w * 2;
+        unsigned char* ptr3 = out_p + w * 3;
+        out_p += w * 4;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned int pixel;
+          memcpy(&pixel, in_p, 4);
+          in_p += 4;
+          unsigned int diff = pixel - prevPixel;
+          prevPixel = pixel;
+
+          ptr0[x] = static_cast<unsigned char>(diff >> 24);
+          ptr1[x] = static_cast<unsigned char>(diff >> 16);
+          ptr2[x] = static_cast<unsigned char>(diff >> 8);
+          ptr3[x] = static_cast<unsigned char>(diff);
+        }
+      } else if (file_type == TINYEXR_PIXELTYPE_HALF) {
+        // HALF: 2 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        out_p += w * 2;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned short h;
+          memcpy(&h, in_p, 2);
+          in_p += 2;
+          unsigned int pixel = h;
+          unsigned int diff = pixel - prevPixel;
+          prevPixel = pixel;
+
+          ptr0[x] = static_cast<unsigned char>(diff >> 8);
+          ptr1[x] = static_cast<unsigned char>(diff);
+        }
+      } else {  // FLOAT
+        // FLOAT: convert to 24-bit, then 3 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        unsigned char* ptr2 = out_p + w * 2;
+        out_p += w * 3;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          float f;
+          memcpy(&f, in_p, 4);
+          in_p += 4;
+          unsigned int pixel24 = float_to_float24(f);
+          unsigned int diff = pixel24 - prevPixel;
+          prevPixel = pixel24;
+
+          // Store as 24-bit diff (shifted to upper bits for proper reconstruction)
+          ptr0[x] = static_cast<unsigned char>(diff >> 16);
+          ptr1[x] = static_cast<unsigned char>(diff >> 8);
+          ptr2[x] = static_cast<unsigned char>(diff);
+        }
+      }
+    }
+  }
+
+  // Compress with zlib
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+  mz_ulong dest_len = mz_compressBound(static_cast<mz_ulong>(pxr24_size));
+  outBuf.resize(dest_len);
+  int ret = mz_compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast<mz_ulong>(pxr24_size));
+  if (ret != MZ_OK) {
+    return false;
+  }
+  outBuf.resize(static_cast<size_t>(dest_len));
+#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1)
+  int outSize;
+  unsigned char* ret = stbi_zlib_compress(pxr24_buf.data(), static_cast<int>(pxr24_size), &outSize, 8);
+  if (!ret) {
+    return false;
+  }
+  outBuf.assign(ret, ret + outSize);
+  free(ret);
+#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1)
+  int outSize;
+  unsigned char* ret = nanoz_compress(pxr24_buf.data(), pxr24_size, &outSize, 8);
+  if (!ret) {
+    return false;
+  }
+  outBuf.assign(ret, ret + outSize);
+  free(ret);
+#else
+  uLongf dest_len = compressBound(static_cast<uLong>(pxr24_size));
+  outBuf.resize(static_cast<size_t>(dest_len));
+  int ret = compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast<uLong>(pxr24_size));
+  if (ret != Z_OK) {
+    return false;
+  }
+  outBuf.resize(static_cast<size_t>(dest_len));
+#endif
+
+  return true;
+}
+
+// ============================================================================
+// B44/B44A compression (encoding)
+// ============================================================================
+
+// Shift and round for B44 pack (matches OpenEXR's shiftAndRound)
+static inline int B44ShiftAndRound(int x, int shift) {
+  // Compute y = x * pow(2, -shift), rounded to nearest integer
+  // In case of a tie, round to the even one
+  x <<= 1;
+  int a = (1 << shift) - 1;
+  shift += 1;
+  int b = (x >> shift) & 1;
+  return (x + a + b) >> shift;
+}
+
+// Pack a 4x4 block of HALF values into 14 bytes (matches OpenEXR's pack())
+// Returns the number of bytes written (14 for normal, 3 for flat if flatfields=true)
+static int PackB44Block(unsigned char* out, const unsigned short* block, bool flatfields, bool exactmax) {
+  int d[16];
+  int r[15];
+  int rMin, rMax;
+  unsigned short t[16];
+  unsigned short tMax;
+  int shift = -1;
+
+  const int bias = 0x20;
+
+  // Convert half-float values to ordered-magnitude representation
+  // This ensures that if t[i] > t[j], then half[i] > half[j] as floats
+  for (int i = 0; i < 16; ++i) {
+    if ((block[i] & 0x7c00) == 0x7c00) {
+      t[i] = 0x8000;  // NaN/Inf -> neutral value
+    } else if (block[i] & 0x8000) {
+      t[i] = ~block[i];  // Negative: invert all bits
+    } else {
+      t[i] = block[i] | 0x8000;  // Positive: set sign bit
+    }
+  }
+
+  // Find maximum t value
+  tMax = 0;
+  for (int i = 0; i < 16; ++i) {
+    if (tMax < t[i]) tMax = t[i];
+  }
+
+  // Compute running differences and find valid shift
+  do {
+    shift += 1;
+
+    // Compute absolute differences from tMax, shifted and rounded
+    for (int i = 0; i < 16; ++i) {
+      d[i] = B44ShiftAndRound(tMax - t[i], shift);
+    }
+
+    // Convert to running differences (specific pattern for B44)
+    r[0] = d[0] - d[4] + bias;
+    r[1] = d[4] - d[8] + bias;
+    r[2] = d[8] - d[12] + bias;
+
+    r[3] = d[0] - d[1] + bias;
+    r[4] = d[4] - d[5] + bias;
+    r[5] = d[8] - d[9] + bias;
+    r[6] = d[12] - d[13] + bias;
+
+    r[7]  = d[1] - d[2] + bias;
+    r[8]  = d[5] - d[6] + bias;
+    r[9]  = d[9] - d[10] + bias;
+    r[10] = d[13] - d[14] + bias;
+
+    r[11] = d[2] - d[3] + bias;
+    r[12] = d[6] - d[7] + bias;
+    r[13] = d[10] - d[11] + bias;
+    r[14] = d[14] - d[15] + bias;
+
+    rMin = r[0];
+    rMax = r[0];
+    for (int i = 1; i < 15; ++i) {
+      if (rMin > r[i]) rMin = r[i];
+      if (rMax < r[i]) rMax = r[i];
+    }
+  } while (rMin < 0 || rMax > 0x3f);
+
+  // Check for flat block (all pixels same value)
+  if (rMin == bias && rMax == bias && flatfields) {
+    // Encode as 3 bytes: t[0] and marker 0xfc
+    out[0] = static_cast<unsigned char>(t[0] >> 8);
+    out[1] = static_cast<unsigned char>(t[0]);
+    out[2] = 0xfc;  // Flat block marker (shift >= 13)
+    return 3;
+  }
+
+  if (exactmax) {
+    // Adjust t[0] so the max pixel is represented accurately
+    t[0] = tMax - static_cast<unsigned short>(d[0] << shift);
+  }
+
+  // Pack t[0], shift, and r[0]..r[14] into 14 bytes
+  out[0]  = static_cast<unsigned char>(t[0] >> 8);
+  out[1]  = static_cast<unsigned char>(t[0]);
+  out[2]  = static_cast<unsigned char>((shift << 2) | (r[0] >> 4));
+  out[3]  = static_cast<unsigned char>((r[0] << 4) | (r[1] >> 2));
+  out[4]  = static_cast<unsigned char>((r[1] << 6) | r[2]);
+  out[5]  = static_cast<unsigned char>((r[3] << 2) | (r[4] >> 4));
+  out[6]  = static_cast<unsigned char>((r[4] << 4) | (r[5] >> 2));
+  out[7]  = static_cast<unsigned char>((r[5] << 6) | r[6]);
+  out[8]  = static_cast<unsigned char>((r[7] << 2) | (r[8] >> 4));
+  out[9]  = static_cast<unsigned char>((r[8] << 4) | (r[9] >> 2));
+  out[10] = static_cast<unsigned char>((r[9] << 6) | r[10]);
+  out[11] = static_cast<unsigned char>((r[11] << 2) | (r[12] >> 4));
+  out[12] = static_cast<unsigned char>((r[12] << 4) | (r[13] >> 2));
+  out[13] = static_cast<unsigned char>((r[13] << 6) | r[14]);
+
+  return 14;
+}
+
+static bool CompressB44(std::vector<unsigned char>& outBuf,
+                        const unsigned char *inPtr, size_t inLen,
+                        int data_width, int num_lines,
+                        size_t num_channels,
+                        const EXRChannelInfo *channels,
+                        bool is_b44a) {
+  // Calculate number of 4x4 blocks
+  int num_blocks_x = (data_width + 3) / 4;
+  int num_blocks_y = (num_lines + 3) / 4;
+
+  // Estimate output size (14 bytes per block per HALF channel)
+  size_t max_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      max_size += static_cast<size_t>(num_blocks_x) * num_blocks_y * 14;
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT ||
+               channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+      max_size += static_cast<size_t>(data_width) * num_lines * 4;
+    } else {
+      max_size += static_cast<size_t>(data_width) * num_lines * 2;
+    }
+  }
+
+  outBuf.resize(max_size);
+  unsigned char* out_p = outBuf.data();
+
+  // Process each channel
+  size_t in_offset = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    if (channels[c].pixel_type != TINYEXR_PIXELTYPE_HALF) {
+      // Non-HALF channels are stored uncompressed
+      size_t ch_bytes = static_cast<size_t>(data_width) * num_lines;
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT ||
+          channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        ch_bytes *= 4;
+      } else {
+        ch_bytes *= 2;
+      }
+      memcpy(out_p, inPtr + in_offset, ch_bytes);
+      out_p += ch_bytes;
+      in_offset += ch_bytes;
+      continue;
+    }
+
+    // Process HALF channel in 4x4 blocks
+    const unsigned short* ch_ptr = reinterpret_cast<const unsigned short*>(inPtr + in_offset);
+
+    for (int by = 0; by < num_blocks_y; by++) {
+      for (int bx = 0; bx < num_blocks_x; bx++) {
+        unsigned short block[16];
+
+        // Gather block pixels with edge replication for padding
+        for (int dy = 0; dy < 4; dy++) {
+          int y = by * 4 + dy;
+          int src_y = (y >= num_lines) ? (num_lines - 1) : y;
+
+          for (int dx = 0; dx < 4; dx++) {
+            int x = bx * 4 + dx;
+            int src_x = (x >= data_width) ? (data_width - 1) : x;
+
+            block[dy * 4 + dx] = ch_ptr[src_y * data_width + src_x];
+          }
+        }
+
+        // Pack block - PackB44Block handles flat block detection internally
+        // flatfields = is_b44a, exactmax = true (for better accuracy)
+        int bytes_written = PackB44Block(out_p, block, is_b44a, true);
+        out_p += bytes_written;
+      }
+    }
+
+    in_offset += static_cast<size_t>(data_width) * num_lines * 2;
+  }
+
+  // Resize to actual size
+  outBuf.resize(static_cast<size_t>(out_p - outBuf.data()));
+
+  return true;
+}
+
+// Overloaded version that takes ChannelInfo instead of EXRChannelInfo
+// Note: Uses requested_pixel_type which is the file format, not the input type
+static bool CompressB44(std::vector<unsigned char>& outBuf,
+                        const unsigned char *inPtr, size_t inLen,
+                        int data_width, int num_lines,
+                        size_t num_channels,
+                        const std::vector<ChannelInfo>& channels,
+                        bool is_b44a) {
+  // Calculate number of 4x4 blocks
+  int num_blocks_x = (data_width + 3) / 4;
+  int num_blocks_y = (num_lines + 3) / 4;
+
+  // Estimate output size - use requested_pixel_type (file format)
+  size_t max_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int file_type = channels[c].requested_pixel_type;
+    if (file_type == TINYEXR_PIXELTYPE_HALF) {
+      max_size += static_cast<size_t>(num_blocks_x) * num_blocks_y * 14;
+    } else if (file_type == TINYEXR_PIXELTYPE_UINT ||
+               file_type == TINYEXR_PIXELTYPE_FLOAT) {
+      max_size += static_cast<size_t>(data_width) * num_lines * 4;
+    } else {
+      max_size += static_cast<size_t>(data_width) * num_lines * 2;
+    }
+  }
+
+  outBuf.resize(max_size);
+  unsigned char* out_p = outBuf.data();
+
+  size_t in_offset = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int file_type = channels[c].requested_pixel_type;
+    if (file_type != TINYEXR_PIXELTYPE_HALF) {
+      size_t ch_bytes = static_cast<size_t>(data_width) * num_lines;
+      if (file_type == TINYEXR_PIXELTYPE_UINT ||
+          file_type == TINYEXR_PIXELTYPE_FLOAT) {
+        ch_bytes *= 4;
+      } else {
+        ch_bytes *= 2;
+      }
+      memcpy(out_p, inPtr + in_offset, ch_bytes);
+      out_p += ch_bytes;
+      in_offset += ch_bytes;
+      continue;
+    }
+
+    const unsigned short* ch_ptr = reinterpret_cast<const unsigned short*>(inPtr + in_offset);
+
+    for (int by = 0; by < num_blocks_y; by++) {
+      for (int bx = 0; bx < num_blocks_x; bx++) {
+        unsigned short block[16];
+
+        // Gather block pixels with edge replication for padding
+        for (int dy = 0; dy < 4; dy++) {
+          int y = by * 4 + dy;
+          int src_y = (y >= num_lines) ? (num_lines - 1) : y;
+
+          for (int dx = 0; dx < 4; dx++) {
+            int x = bx * 4 + dx;
+            int src_x = (x >= data_width) ? (data_width - 1) : x;
+
+            block[dy * 4 + dx] = ch_ptr[src_y * data_width + src_x];
+          }
+        }
+
+        // Pack block - PackB44Block handles flat block detection internally
+        int bytes_written = PackB44Block(out_p, block, is_b44a, true);
+        out_p += bytes_written;
+      }
+    }
+
+    in_offset += static_cast<size_t>(data_width) * num_lines * 2;
+  }
+
+  outBuf.resize(static_cast<size_t>(out_p - outBuf.data()));
+  return true;
+}
+
 #if TINYEXR_USE_ZFP
 
 struct ZFPCompressionParam {
@@ -4090,6 +5335,257 @@ static bool DecodePixelData(/* out */ unsigned char **out_images,
     (void)num_channels;
     return false;
 #endif
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    // PXR24 compression: Use true PXR24 decompression
+    // PXR24 truncates FLOAT to 24-bits, HALF/UINT pass through unchanged
+    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
+                                      static_cast<size_t>(num_lines) *
+                                      pixel_data_size);
+
+    if (!tinyexr::DecompressPxr24(
+            reinterpret_cast<unsigned char *>(&outBuf.at(0)), outBuf.size(),
+            data_ptr, static_cast<size_t>(data_len),
+            width, num_lines, static_cast<size_t>(num_channels), channels)) {
+      return false;
+    }
+
+    // Process decompressed data (same as ZIP path)
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
+              &outBuf.at(v * static_cast<size_t>(pixel_data_size) *
+                             static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            tinyexr::FP16 hf;
+            tinyexr::cpy2(&(hf.u), line_ptr + u);
+            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
+
+            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
+              unsigned short *image =
+                  reinterpret_cast<unsigned short **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = hf.u;
+            } else {  // HALF -> FLOAT
+              tinyexr::FP32 f32 = half_to_float(hf);
+              float *image = reinterpret_cast<float **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = f32.f;
+            }
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false);
+
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            unsigned int val;
+            tinyexr::cpy4(&val, line_ptr + u);
+            tinyexr::swap4(&val);
+
+            unsigned int *image =
+                reinterpret_cast<unsigned int **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false);
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const float *line_ptr = reinterpret_cast<float *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            float val;
+            tinyexr::cpy4(&val, line_ptr + u);
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            float *image = reinterpret_cast<float **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else {
+        return false;
+      }
+    }
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    // B44/B44A compression: Use true B44 block decompression
+    // B44 is a lossy block compression for HALF data (4x4 blocks -> 14 bytes)
+    bool is_b44a = (compression_type == TINYEXR_COMPRESSIONTYPE_B44A);
+
+    // Compute outBuf size matching DecompressB44's output layout: per-channel
+    // sequential data using subsampled dimensions for each channel.
+    size_t b44_out_size = 0;
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      int xs = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+      int ys = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+      size_t cw = static_cast<size_t>((width     + xs - 1) / xs);
+      size_t ch = static_cast<size_t>((num_lines + ys - 1) / ys);
+      size_t bpp = (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) ? 2u : 4u;
+      b44_out_size += cw * ch * bpp;
+    }
+
+    std::vector<unsigned char> outBuf(b44_out_size);
+
+    if (!tinyexr::DecompressB44(
+            reinterpret_cast<unsigned char *>(&outBuf.at(0)), outBuf.size(),
+            data_ptr, static_cast<size_t>(data_len),
+            width, num_lines, static_cast<size_t>(num_channels), channels,
+            is_b44a)) {
+      return false;
+    }
+
+    // Process decompressed data - B44 returns data organized per channel,
+    // using subsampled dimensions (ch_width/ch_height based on x/y_sampling).
+    // Accumulate ch_offset based on actual subsampled sizes of preceding
+    // channels to handle mixed channel types and subsampling correctly.
+    size_t ch_offset = 0;
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      int xs = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+      int ys = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+      size_t ch_width  = static_cast<size_t>((width     + xs - 1) / xs);
+      size_t ch_height = static_cast<size_t>((num_lines + ys - 1) / ys);
+      size_t ch_bytes = (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) ? 2 : 4;
+
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        for (size_t v = 0; v < ch_height; v++) {
+          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
+              &outBuf.at(ch_offset + v * ch_width * 2));
+          for (size_t u = 0; u < ch_width; u++) {
+            tinyexr::FP16 hf;
+            tinyexr::cpy2(&(hf.u), line_ptr + u);
+            // B44 stream stores data in little-endian order (same as the
+            // encoder's buf); reverse the byte swap the encoder applied.
+            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
+
+            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
+              unsigned short *image =
+                  reinterpret_cast<unsigned short **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = hf.u;
+            } else {  // HALF -> FLOAT
+              tinyexr::FP32 f32 = half_to_float(hf);
+              float *image = reinterpret_cast<float **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = f32.f;
+            }
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false);
+
+        for (size_t v = 0; v < ch_height; v++) {
+          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
+              &outBuf.at(ch_offset + v * ch_width * 4));
+          for (size_t u = 0; u < ch_width; u++) {
+            unsigned int val;
+            tinyexr::cpy4(&val, line_ptr + u);
+            tinyexr::swap4(&val);
+
+            unsigned int *image =
+                reinterpret_cast<unsigned int **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false);
+        for (size_t v = 0; v < ch_height; v++) {
+          const float *line_ptr = reinterpret_cast<float *>(
+              &outBuf.at(ch_offset + v * ch_width * 4));
+          for (size_t u = 0; u < ch_width; u++) {
+            float val;
+            tinyexr::cpy4(&val, line_ptr + u);
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            float *image = reinterpret_cast<float **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else {
+        return false;
+      }
+      ch_offset += ch_width * ch_height * ch_bytes;
+    }
   } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) {
     for (size_t c = 0; c < num_channels; c++) {
       for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
@@ -4217,7 +5713,7 @@ static bool DecodePixelData(/* out */ unsigned char **out_images,
 static bool DecodeTiledPixelData(
     unsigned char **out_images, int *width, int *height,
     const int *requested_pixel_types, const unsigned char *data_ptr,
-    size_t data_len, int compression_type, int line_order, int data_width,
+    size_t data_len, int compression_type, int data_width,
     int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x,
     int tile_size_y, size_t pixel_data_size, size_t num_attributes,
     const EXRAttribute *attributes, size_t num_channels,
@@ -4243,8 +5739,9 @@ static bool DecodeTiledPixelData(
   }
 
   // Image size = tile size.
+  // Line order within tiles is always increasing.
   return DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len,
-                         compression_type, line_order, (*width), tile_size_y,
+                         compression_type, /* line_order*/ 0, (*width), tile_size_y,
                          /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0,
                          (*height), pixel_data_size, num_attributes, attributes,
                          num_channels, channels, channel_offset_list);
@@ -4514,6 +6011,13 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header,
 #endif
       }
 
+      // PXR24, B44, B44A compression types
+      if (data[0] == TINYEXR_COMPRESSIONTYPE_PXR24 ||
+          data[0] == TINYEXR_COMPRESSIONTYPE_B44 ||
+          data[0] == TINYEXR_COMPRESSIONTYPE_B44A) {
+        ok = true;
+      }
+
       if (!ok) {
         if (err) {
           (*err) = "Unknown compression type.";
@@ -4866,7 +6370,7 @@ static int LevelIndex(int lx, int ly, int tile_level_mode, int num_x_levels) {
   default:
     return -1;
   }
-//  return 0;
+  return 0;
 }
 
 static int LevelSize(int toplevel_size, int level, int tile_rounding_mode) {
@@ -4935,10 +6439,12 @@ static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header,
   std::atomic<int> tile_count(0);
 
   int num_threads = std::max(1, int(std::thread::hardware_concurrency()));
+#if (TINYEXR_MAX_THREADS > 0)
+  num_threads = std::min(num_threads,TINYEXR_MAX_THREADS);
+#endif
   if (num_threads > int(num_tiles)) {
     num_threads = int(num_tiles);
   }
-
   for (int t = 0; t < num_threads; t++) {
     workers.emplace_back(std::thread([&]()
       {
@@ -5017,7 +6523,6 @@ static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header,
       &(exr_image->tiles[tile_idx].height),
       exr_header->requested_pixel_types, data_ptr,
       static_cast<size_t>(data_len), exr_header->compression_type,
-      exr_header->line_order,
       exr_image->width, exr_image->height,
       tile_coordinates[0], tile_coordinates[1], exr_header->tile_size_x,
       exr_header->tile_size_y, static_cast<size_t>(pixel_data_size),
@@ -5078,16 +6583,23 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header,
     num_scanline_blocks = 32;
   } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
     num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    num_scanline_blocks = 32;
+  }
 
 #if TINYEXR_USE_ZFP
+  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
     tinyexr::ZFPCompressionParam zfp_compression_param;
     if (!FindZFPCompressionParam(&zfp_compression_param,
                                  exr_header->custom_attributes,
                                  int(exr_header->num_custom_attributes), err)) {
       return TINYEXR_ERROR_INVALID_HEADER;
     }
-#endif
   }
+#endif
 
   if (exr_header->data_window.max_x < exr_header->data_window.min_x ||
       exr_header->data_window.max_y < exr_header->data_window.min_y) {
@@ -5291,10 +6803,12 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header,
     std::atomic<int> y_count(0);
 
     int num_threads = std::max(1, int(std::thread::hardware_concurrency()));
+#if (TINYEXR_MAX_THREADS > 0)
+    num_threads = std::min(num_threads,TINYEXR_MAX_THREADS);
+#endif
     if (num_threads > int(num_blocks)) {
       num_threads = int(num_blocks);
     }
-
     for (int t = 0; t < num_threads; t++) {
       workers.emplace_back(std::thread([&]() {
         int y = 0;
@@ -5369,10 +6883,11 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header,
                 if (line_no < 0) {
                   invalid_data = true;
                 } else {
+                  // Line order is increasing because we read in line offset table order.
                   if (!tinyexr::DecodePixelData(
                           exr_image->images, exr_header->requested_pixel_types,
                           data_ptr, static_cast<size_t>(data_len),
-                          exr_header->compression_type, exr_header->line_order,
+                          exr_header->compression_type, /* line_order*/ 0,
                           int(data_width), int(data_height), int(data_width), y, line_no,
                           num_lines, static_cast<size_t>(pixel_data_size),
                           static_cast<size_t>(
@@ -5860,7 +7375,7 @@ static bool ReconstructTileOffsets(OffsetData& offset_data,
         if (size_t(tileX) >= offset_data.offsets[size_t(level_idx)][size_t(tileY)].size()) {
           return false;
         }
-
+        
         offset_data.offsets[size_t(level_idx)][size_t(tileY)][size_t(tileX)] = tileOffset;
       }
     }
@@ -5914,6 +7429,11 @@ static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header,
     num_scanline_blocks = 32;
   } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
     num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    num_scanline_blocks = 32;
   }
 
   if (exr_header->data_window.max_x < exr_header->data_window.min_x ||
@@ -6187,18 +7707,15 @@ int EXRLayers(const char *filename, const char **layer_names[], int *num_layers,
 }
 
 int LoadEXR(float **out_rgba, int *width, int *height, const char *filename,
-            const char **err, int *num_chans) {
+            const char **err) {
   return LoadEXRWithLayer(out_rgba, width, height, filename,
-                          /* layername */ NULL, err, num_chans);
+                          /* layername */ NULL, err);
 }
 
 int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
                      const char *filename, const char *layername,
-                     const char **err, int *num_chans) {
-    if (num_chans)
-        *num_chans = 0;
-
-    if (out_rgba == NULL) {
+                     const char **err) {
+  if (out_rgba == NULL) {
     tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err);
     return TINYEXR_ERROR_INVALID_ARGUMENT;
   }
@@ -6293,9 +7810,6 @@ int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
   }
 
   if (channels.size() == 1) {
-    if (num_chans)
-      *num_chans = 1;
-
     int chIdx = int(channels.front().index);
     // Grayscale channel only.
 
@@ -6374,9 +7888,6 @@ int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
       return TINYEXR_ERROR_INVALID_DATA;
     }
 
-    if (num_chans)
-        *num_chans = (idxA != -1) ? 4 : 3;
-
     (*out_rgba) = reinterpret_cast<float *>(
         malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
                static_cast<size_t>(exr_image.height)));
@@ -6555,6 +8066,7 @@ int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
 
   ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err);
   if (ret != TINYEXR_SUCCESS) {
+    FreeEXRHeader(&exr_header);
     return ret;
   }
 
@@ -6568,6 +8080,8 @@ int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
   InitEXRImage(&exr_image);
   ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err);
   if (ret != TINYEXR_SUCCESS) {
+    FreeEXRHeader(&exr_header);
+    FreeEXRImage(&exr_image);
     return ret;
   }
 
@@ -6871,7 +8385,7 @@ struct MemoryMappedFile {
     if (read_bytes != size) {
       // TODO: Try to read data until reading `size` bytes.
       fclose(fp);
-      size = 0;
+      size = 0; 
       data = nullptr;
       return;
     }
@@ -7133,7 +8647,7 @@ static bool EncodePixelData(/* out */ std::vector<unsigned char>& out_data,
   } else if ((compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
     (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) {
 #if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
-    std::vector<unsigned char> block(buminiz::mz_compressBound(
+    std::vector<unsigned char> block(mz_compressBound(
       static_cast<unsigned long>(buf.size())));
 #elif TINYEXR_USE_STB_ZLIB
     // there is no compressBound() function, so we use a value that
@@ -7238,6 +8752,70 @@ static bool EncodePixelData(/* out */ std::vector<unsigned char>& out_data,
     (void)compression_param;
     return false;
 #endif
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    // PXR24 compression: True PXR24 (truncates FLOAT to 24-bits + zlib)
+    std::vector<unsigned char> block;
+
+    if (!tinyexr::CompressPxr24(block,
+                         reinterpret_cast<const unsigned char *>(&buf.at(0)),
+                         buf.size(), width, num_lines,
+                         channels.size(), channels)) {
+      if (err) {
+        (*err) += "PXR24 compression failed.\n";
+      }
+      return false;
+    }
+
+    out_data.insert(out_data.end(), block.begin(), block.end());
+
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    // B44/B44A compression: True B44 block compression for HALF data
+    bool is_b44a = (compression_type == TINYEXR_COMPRESSIONTYPE_B44A);
+    std::vector<unsigned char> block;
+
+    // CompressB44 expects per-channel sequential layout, but buf is
+    // scanline-interleaved: within each row, channels are stored contiguously
+    // (channel_offset_list[c] * width bytes into the row), and rows are
+    // stacked.  Convert to per-channel sequential before compressing.
+    std::vector<unsigned char> seq_buf(buf_size);
+    unsigned char *seq_p = seq_buf.data();
+    for (size_t c = 0; c < channels.size(); c++) {
+      int file_type = channels[c].requested_pixel_type;
+      // HALF is 2 bytes; FLOAT and UINT are both 4 bytes
+      size_t ch_size = (file_type == TINYEXR_PIXELTYPE_HALF) ? 2 : 4;
+      for (int y = 0; y < num_lines; y++) {
+        const unsigned char *src =
+            &buf[y * pixel_data_size * width + channel_offset_list[c] * width];
+        size_t row_bytes = static_cast<size_t>(width) * ch_size;
+        memcpy(seq_p, src, row_bytes);
+#if !TINYEXR_LITTLE_ENDIAN
+        // buf has already been byte-swapped to little-endian for file output.
+        // CompressB44 reads HALF values as host-endian unsigned shorts, so
+        // un-swap the bytes back to host-endian for correct B44 encoding.
+        if (file_type == TINYEXR_PIXELTYPE_HALF) {
+          unsigned short *p = reinterpret_cast<unsigned short *>(seq_p);
+          for (int x = 0; x < width; x++) {
+            tinyexr::swap2(p + x);
+          }
+        }
+#endif
+        seq_p += row_bytes;
+      }
+    }
+
+    if (!tinyexr::CompressB44(block,
+                         reinterpret_cast<const unsigned char *>(seq_buf.data()),
+                         seq_buf.size(), width, num_lines,
+                         channels.size(), channels, is_b44a)) {
+      if (err) {
+        (*err) += "B44 compression failed.\n";
+      }
+      return false;
+    }
+
+    out_data.insert(out_data.end(), block.begin(), block.end());
+
   } else {
     return false;
   }
@@ -7282,6 +8860,9 @@ static int EncodeTiledLevel(const EXRImage* level_image, const EXRHeader* exr_he
   std::atomic<int> tile_count(0);
 
   int num_threads = std::max(1, int(std::thread::hardware_concurrency()));
+#if (TINYEXR_MAX_THREADS > 0)
+  num_threads = std::min(num_threads,TINYEXR_MAX_THREADS);
+#endif
   if (num_threads > int(num_tiles)) {
     num_threads = int(num_tiles);
   }
@@ -7378,6 +8959,11 @@ static int NumScanlines(int compression_type) {
     num_scanlines = 32;
   } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
     num_scanlines = 16;
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    num_scanlines = 16;  // PXR24 uses 16 scanlines per block (same as ZIP)
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    num_scanlines = 32;  // B44/B44A uses 32 scanlines per block
   }
   return num_scanlines;
 }
@@ -7531,7 +9117,9 @@ static int EncodeChunk(const EXRImage* exr_image, const EXRHeader* exr_header,
     std::atomic<int> block_count(0);
 
     int num_threads = std::min(std::max(1, int(std::thread::hardware_concurrency())), num_blocks);
-
+#if (TINYEXR_MAX_THREADS > 0)
+    num_threads = std::min(num_threads,TINYEXR_MAX_THREADS);
+#endif
     for (int t = 0; t < num_threads; t++) {
       workers.emplace_back(std::thread([&]() {
         int i = 0;
@@ -7708,7 +9296,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
         if (!PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i])) {
           SetErrorMessage("Failed to precalculate Tile info",
                           err);
-          return (size_t)TINYEXR_ERROR_INVALID_DATA;
+          return TINYEXR_ERROR_INVALID_DATA;
         }
         int ntiles = InitTileOffsets(offset_data[i], exr_headers[i], num_x_tiles, num_y_tiles);
         if (ntiles > 0) {
@@ -7716,8 +9304,8 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
         } else {
           SetErrorMessage("Failed to compute Tile offsets",
                           err);
-          return (size_t)TINYEXR_ERROR_INVALID_DATA;
-
+          return TINYEXR_ERROR_INVALID_DATA;
+          
         }
         total_chunk_count += chunk_count[i];
       }
@@ -7918,7 +9506,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
   // Allocating required memory
   if (total_size == 0) { // something went wrong
     tinyexr::SetErrorMessage("Output memory size is zero", err);
-    return (size_t)TINYEXR_ERROR_INVALID_DATA;
+    return TINYEXR_ERROR_INVALID_DATA;
   }
   (*memory_out) = static_cast<unsigned char*>(malloc(size_t(total_size)));
 
@@ -7939,7 +9527,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
           sum += num_bytes;
           if (sum > total_size) {
             tinyexr::SetErrorMessage("Invalid offset bytes in Tiled Part image.", err);
-            return (size_t)TINYEXR_ERROR_INVALID_DATA;
+            return TINYEXR_ERROR_INVALID_DATA;
           }
 
           memcpy(memory_ptr,
@@ -7954,7 +9542,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
       sum += num_bytes;
       if (sum > total_size) {
         tinyexr::SetErrorMessage("Invalid offset bytes in Part image.", err);
-        return (size_t)TINYEXR_ERROR_INVALID_DATA;
+        return TINYEXR_ERROR_INVALID_DATA;
       }
       std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data[i].offsets[0][0];
       memcpy(memory_ptr, reinterpret_cast<unsigned char*>(&offsets[0]), num_bytes);
@@ -7969,7 +9557,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
         sum += 4;
         if (sum > total_size) {
           tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err);
-          return (size_t)TINYEXR_ERROR_INVALID_DATA;
+          return TINYEXR_ERROR_INVALID_DATA;
         }
         unsigned int part_number = i;
         swap4(&part_number);
@@ -7979,7 +9567,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
       sum += data_lists[i][j].size();
       if (sum > total_size) {
         tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err);
-        return (size_t)TINYEXR_ERROR_INVALID_DATA;
+        return TINYEXR_ERROR_INVALID_DATA;
       }
       memcpy(memory_ptr, &data_lists[i][j][0], data_lists[i][j].size());
       memory_ptr += data_lists[i][j].size();
@@ -7988,7 +9576,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
 
   if (sum != total_size) {
     tinyexr::SetErrorMessage("Corrupted Part image chunk data.", err);
-    return (size_t)TINYEXR_ERROR_INVALID_DATA;
+    return TINYEXR_ERROR_INVALID_DATA;
   }
 
   return size_t(total_size);  // OK
@@ -8080,7 +9668,7 @@ size_t SaveEXRMultipartImageToMemory(const EXRImage* exr_images,
                                      const EXRHeader** exr_headers,
                                      unsigned int num_parts,
                                      unsigned char** memory_out, const char** err) {
-  if (exr_images == NULL || exr_headers == NULL || num_parts < 2 ||
+  if (exr_images == NULL || exr_headers == NULL || num_parts == 0 ||
       memory_out == NULL) {
     tinyexr::SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory",
                               err);
@@ -8094,7 +9682,7 @@ int SaveEXRMultipartImageToFile(const EXRImage* exr_images,
                                 unsigned int num_parts,
                                 const char* filename,
                                 const char** err) {
-  if (exr_images == NULL || exr_headers == NULL || num_parts < 2) {
+  if (exr_images == NULL || exr_headers == NULL || num_parts == 0) {
     tinyexr::SetErrorMessage("Invalid argument for SaveEXRMultipartImageToFile",
                               err);
     return TINYEXR_ERROR_INVALID_ARGUMENT;
@@ -8226,7 +9814,7 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) {
 
     if (attr_name.compare("compression") == 0) {
       compression_type = data[0];
-      if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) {
+      if (compression_type > TINYEXR_COMPRESSIONTYPE_B44A) {
         std::stringstream ss;
         ss << "Unsupported compression type : " << compression_type;
         tinyexr::SetErrorMessage(ss.str(), err);
@@ -8235,6 +9823,9 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) {
 
       if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
         num_scanline_blocks = 16;
+      } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+                 compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+        num_scanline_blocks = 32;
       }
 
     } else if (attr_name.compare("channels") == 0) {
@@ -8583,20 +10174,7 @@ int EXRNumLevels(const EXRImage* exr_image) {
   if(exr_image->images) return 1; // scanlines
   int levels = 1;
   const EXRImage* level_image = exr_image;
-
-#if 0
-  while ((level_image = level_image->next_level))
-      ++levels;
-#else
-  for (; ;)
-  {
-      level_image = level_image->next_level;
-      if (!level_image)
-          break;
-      ++levels;
-  }
-#endif
-
+  while((level_image = level_image->next_level)) ++levels;
   return levels;
 }
 
@@ -8608,16 +10186,19 @@ int FreeEXRImage(EXRImage *exr_image) {
   if (exr_image->next_level) {
     FreeEXRImage(exr_image->next_level);
     delete exr_image->next_level;
+    exr_image->next_level = NULL;
   }
 
   for (int i = 0; i < exr_image->num_channels; i++) {
     if (exr_image->images && exr_image->images[i]) {
       free(exr_image->images[i]);
+      exr_image->images[i] = NULL;
     }
   }
 
   if (exr_image->images) {
     free(exr_image->images);
+    exr_image->images = NULL;
   }
 
   if (exr_image->tiles) {
@@ -8625,15 +10206,21 @@ int FreeEXRImage(EXRImage *exr_image) {
       for (int i = 0; i < exr_image->num_channels; i++) {
         if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) {
           free(exr_image->tiles[tid].images[i]);
+          exr_image->tiles[tid].images[i] = NULL;
         }
       }
       if (exr_image->tiles[tid].images) {
         free(exr_image->tiles[tid].images);
+        exr_image->tiles[tid].images = NULL;
       }
     }
     free(exr_image->tiles);
+    exr_image->tiles = NULL;
   }
 
+  exr_image->num_channels = 0;
+  exr_image->num_tiles = 0;
+
   return TINYEXR_SUCCESS;
 }
 
@@ -8787,6 +10374,71 @@ int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers,
       exr_headers, num_headers, exr_version, file.data, file.size, err);
 }
 
+// ========================================================================
+// Refactored loader functions using Reader class for safer memory access
+// ========================================================================
+
+namespace {  // anonymous namespace for internal helpers
+
+// Parse EXR version header using Reader class
+static int ParseEXRVersionWithReader(EXRVersion *version, tinyexr::Reader& reader) {
+  if (version == NULL) {
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  if (reader.length() < tinyexr::kEXRVersionSize) {
+    reader.add_error("Insufficient data size for EXR version header");
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  // Check magic number: 0x76, 0x2f, 0x31, 0x01
+  uint8_t header[4];
+  if (!reader.read(4, header)) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  const uint8_t expected_header[] = {0x76, 0x2f, 0x31, 0x01};
+  if (header[0] != expected_header[0] || header[1] != expected_header[1] ||
+      header[2] != expected_header[2] || header[3] != expected_header[3]) {
+    reader.add_error("Invalid EXR magic number");
+    return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
+  }
+
+  // Parse version byte (must be 2)
+  uint8_t version_byte;
+  if (!reader.read1(&version_byte)) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  if (version_byte != 2) {
+    reader.add_error("Invalid EXR version");
+    return TINYEXR_ERROR_INVALID_EXR_VERSION;
+  }
+
+  version->version = 2;
+
+  // Parse flags byte
+  uint8_t flags;
+  if (!reader.read1(&flags)) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  version->tiled = (flags & 0x2) ? true : false;       // 9th bit
+  version->long_name = (flags & 0x4) ? true : false;   // 10th bit
+  version->non_image = (flags & 0x8) ? true : false;   // 11th bit (deep image)
+  version->multipart = (flags & 0x10) ? true : false;  // 12th bit
+
+  // Skip remaining 2 bytes to complete the 8-byte version header
+  uint8_t dummy[2];
+  if (!reader.read(2, dummy)) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+}  // anonymous namespace
+
 int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory,
                               size_t size) {
   if (version == NULL || memory == NULL) {
@@ -8797,51 +10449,13 @@ int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory,
     return TINYEXR_ERROR_INVALID_DATA;
   }
 
-  const unsigned char *marker = memory;
+  // Use Reader class for safer memory access
+  tinyexr::Reader reader(memory, size, tinyexr::Endian::Little);
+  int ret = ParseEXRVersionWithReader(version, reader);
 
-  // Header check.
-  {
-    const char header[] = {0x76, 0x2f, 0x31, 0x01};
-
-    if (memcmp(marker, header, 4) != 0) {
-      return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
-    }
-    marker += 4;
-  }
-
-  version->tiled = false;
-  version->long_name = false;
-  version->non_image = false;
-  version->multipart = false;
-
-  // Parse version header.
-  {
-    // must be 2
-    if (marker[0] != 2) {
-      return TINYEXR_ERROR_INVALID_EXR_VERSION;
-    }
-
-    if (version == NULL) {
-      return TINYEXR_SUCCESS;  // May OK
-    }
-
-    version->version = 2;
-
-    if (marker[1] & 0x2) {  // 9th bit
-      version->tiled = true;
-    }
-    if (marker[1] & 0x4) {  // 10th bit
-      version->long_name = true;
-    }
-    if (marker[1] & 0x8) {        // 11th bit
-      version->non_image = true;  // (deep image)
-    }
-    if (marker[1] & 0x10) {  // 12th bit
-      version->multipart = true;
-    }
-  }
-
-  return TINYEXR_SUCCESS;
+  // Note: errors are accumulated in reader.errors() but not propagated
+  // to maintain compatibility with existing API
+  return ret;
 }
 
 int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) {
@@ -9035,7 +10649,7 @@ int LoadEXRMultipartImageFromFile(EXRImage *exr_images,
 }
 
 int SaveEXRToMemory(const float *data, int width, int height, int components,
-            const int save_as_fp16, const unsigned char **outbuf, const char **err) {
+            const int save_as_fp16, unsigned char **outbuf, const char **err) {
 
   if ((components == 1) || components == 3 || components == 4) {
     // OK
@@ -9074,13 +10688,19 @@ int SaveEXRToMemory(const float *data, int width, int height, int components,
     images[3].resize(static_cast<size_t>(width * height));
 
     // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers
-    for (size_t i = 0; i < static_cast<size_t>(width * height); i++) {
-      images[0][i] = data[static_cast<size_t>(components) * i + 0];
-      images[1][i] = data[static_cast<size_t>(components) * i + 1];
-      images[2][i] = data[static_cast<size_t>(components) * i + 2];
-      if (components == 4) {
-        images[3][i] = data[static_cast<size_t>(components) * i + 3];
-      }
+    if (components == 4) {
+        for (size_t i = 0; i < static_cast<size_t>(width * height); i++) {
+          images[0][i] = data[static_cast<size_t>(components) * i + 0];
+          images[1][i] = data[static_cast<size_t>(components) * i + 1];
+          images[2][i] = data[static_cast<size_t>(components) * i + 2];
+          images[3][i] = data[static_cast<size_t>(components) * i + 3];
+        }
+    } else {
+       for (size_t i = 0; i < static_cast<size_t>(width * height); i++) {
+          images[0][i] = data[static_cast<size_t>(components) * i + 0];
+          images[1][i] = data[static_cast<size_t>(components) * i + 1];
+          images[2][i] = data[static_cast<size_t>(components) * i + 2];
+        }
     }
   }
 
@@ -9225,13 +10845,19 @@ int SaveEXR(const float *data, int width, int height, int components,
     images[3].resize(pixel_count);
 
     // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers
-    for (size_t i = 0; i < pixel_count; i++) {
-      images[0][i] = data[static_cast<size_t>(components) * i + 0];
-      images[1][i] = data[static_cast<size_t>(components) * i + 1];
-      images[2][i] = data[static_cast<size_t>(components) * i + 2];
-      if (components == 4) {
-        images[3][i] = data[static_cast<size_t>(components) * i + 3];
-      }
+    if (components == 4) {
+       for (size_t i = 0; i < pixel_count; i++) {
+         images[0][i] = data[static_cast<size_t>(components) * i + 0];
+         images[1][i] = data[static_cast<size_t>(components) * i + 1];
+         images[2][i] = data[static_cast<size_t>(components) * i + 2];
+         images[3][i] = data[static_cast<size_t>(components) * i + 3];
+       }
+    } else {
+       for (size_t i = 0; i < pixel_count; i++) {
+         images[0][i] = data[static_cast<size_t>(components) * i + 0];
+         images[1][i] = data[static_cast<size_t>(components) * i + 1];
+         images[2][i] = data[static_cast<size_t>(components) * i + 2];
+       }
     }
   }
 
@@ -9314,9 +10940,6 @@ int SaveEXR(const float *data, int width, int height, int components,
   }
 
   int ret = SaveEXRImageToFile(&image, &header, outfilename, err);
-  if (ret != TINYEXR_SUCCESS) {
-    return ret;
-  }
 
   free(header.channels);
   free(header.pixel_types);
@@ -9325,6 +10948,394 @@ int SaveEXR(const float *data, int width, int height, int components,
   return ret;
 }
 
+// ----------------------------------------------------------------
+// Spectral EXR API implementations
+// ----------------------------------------------------------------
+
+// Helper to format wavelength with comma as decimal separator (European convention)
+void EXRFormatWavelength(char *buffer, size_t buffer_size, float wavelength_nm) {
+  if (!buffer || buffer_size < 16) return;
+
+  // Format with 6 decimal places
+  int whole = static_cast<int>(wavelength_nm);
+  int frac = static_cast<int>((wavelength_nm - whole) * 1000000.0f + 0.5f);
+
+#ifdef _MSC_VER
+  sprintf_s(buffer, buffer_size, "%d,%06d", whole, frac);
+#else
+  snprintf(buffer, buffer_size, "%d,%06d", whole, frac);
+#endif
+}
+
+// Create spectral channel name for emissive spectrum
+void EXRSpectralChannelName(char *buffer, size_t buffer_size,
+                            float wavelength_nm, int stokes_component) {
+  if (!buffer || buffer_size < 32) return;
+
+  char wavelength_str[32];
+  EXRFormatWavelength(wavelength_str, sizeof(wavelength_str), wavelength_nm);
+
+#ifdef _MSC_VER
+  sprintf_s(buffer, buffer_size, "S%d.%snm", stokes_component, wavelength_str);
+#else
+  snprintf(buffer, buffer_size, "S%d.%snm", stokes_component, wavelength_str);
+#endif
+}
+
+// Create spectral channel name for reflective spectrum
+void EXRReflectiveChannelName(char *buffer, size_t buffer_size,
+                              float wavelength_nm) {
+  if (!buffer || buffer_size < 32) return;
+
+  char wavelength_str[32];
+  EXRFormatWavelength(wavelength_str, sizeof(wavelength_str), wavelength_nm);
+
+#ifdef _MSC_VER
+  sprintf_s(buffer, buffer_size, "T.%snm", wavelength_str);
+#else
+  snprintf(buffer, buffer_size, "T.%snm", wavelength_str);
+#endif
+}
+
+// Parse wavelength from spectral channel name
+float EXRParseSpectralChannelWavelength(const char *channel_name) {
+  if (!channel_name) return -1.0f;
+
+  const char *p = channel_name;
+
+  // Skip prefix: "S{n}." or "T."
+  if (*p == 'S' && p[1] >= '0' && p[1] <= '3' && p[2] == '.') {
+    p += 3;
+  } else if (*p == 'T' && p[1] == '.') {
+    p += 2;
+  } else {
+    return -1.0f;
+  }
+
+  // Parse wavelength with comma as decimal separator
+  // Format: "550,000000nm"
+  char wavelength_str[64];
+  size_t len = 0;
+  while (*p && *p != 'n' && len < sizeof(wavelength_str) - 1) {
+    wavelength_str[len++] = (*p == ',') ? '.' : *p;
+    p++;
+  }
+  wavelength_str[len] = '\0';
+
+  // Check for "nm" suffix
+  if (*p != 'n' || p[1] != 'm') {
+    return -1.0f;
+  }
+
+  return static_cast<float>(atof(wavelength_str));
+}
+
+// Get Stokes component from channel name
+int EXRGetStokesComponent(const char *channel_name) {
+  if (!channel_name) return -1;
+
+  if (channel_name[0] == 'S' &&
+      channel_name[1] >= '0' && channel_name[1] <= '3' &&
+      channel_name[2] == '.') {
+    return channel_name[1] - '0';
+  }
+
+  return -1;
+}
+
+// Check if channel name is a spectral channel
+int EXRIsSpectralChannel(const char *channel_name) {
+  if (!channel_name) return 0;
+
+  // Check for "S{n}.{wavelength}nm" pattern
+  if (channel_name[0] == 'S' &&
+      channel_name[1] >= '0' && channel_name[1] <= '3' &&
+      channel_name[2] == '.') {
+    return EXRParseSpectralChannelWavelength(channel_name) > 0.0f ? 1 : 0;
+  }
+
+  // Check for "T.{wavelength}nm" pattern
+  if (channel_name[0] == 'T' && channel_name[1] == '.') {
+    return EXRParseSpectralChannelWavelength(channel_name) > 0.0f ? 1 : 0;
+  }
+
+  return 0;
+}
+
+// Helper to find custom attribute by name
+static const EXRAttribute* FindCustomAttribute(const EXRHeader *exr_header,
+                                                const char *name) {
+  if (!exr_header || !name) return NULL;
+
+  for (int i = 0; i < exr_header->num_custom_attributes; i++) {
+    if (strcmp(exr_header->custom_attributes[i].name, name) == 0) {
+      return &exr_header->custom_attributes[i];
+    }
+  }
+  return NULL;
+}
+
+// Get spectrum type from EXR header
+int EXRGetSpectrumType(const EXRHeader *exr_header) {
+  if (!exr_header) return -1;
+
+  // Check for spectralLayoutVersion attribute
+  const EXRAttribute *layout_attr = FindCustomAttribute(exr_header, "spectralLayoutVersion");
+  if (!layout_attr) return -1;
+
+  // Check channel names to determine type
+  int has_stokes = 0;
+  int has_reflective = 0;
+  int has_emissive = 0;
+
+  for (int i = 0; i < exr_header->num_channels; i++) {
+    const char *name = exr_header->channels[i].name;
+    if (name[0] == 'T' && name[1] == '.') {
+      has_reflective = 1;
+    } else if (name[0] == 'S' && name[1] >= '0' && name[1] <= '3' && name[2] == '.') {
+      has_emissive = 1;
+      if (name[1] != '0') {
+        has_stokes = 1;
+      }
+    }
+  }
+
+  if (has_reflective) return TINYEXR_SPECTRUM_REFLECTIVE;
+  if (has_stokes) return TINYEXR_SPECTRUM_POLARISED;
+  if (has_emissive) return TINYEXR_SPECTRUM_EMISSIVE;
+
+  return -1;
+}
+
+// Get wavelengths from EXR header channels
+int EXRGetWavelengths(const EXRHeader *exr_header,
+                      float *wavelengths, int max_wavelengths) {
+  if (!exr_header || !wavelengths || max_wavelengths <= 0) return 0;
+
+  int count = 0;
+
+  for (int i = 0; i < exr_header->num_channels && count < max_wavelengths; i++) {
+    float wl = EXRParseSpectralChannelWavelength(exr_header->channels[i].name);
+    if (wl > 0.0f) {
+      // Check if wavelength already in list
+      int found = 0;
+      for (int j = 0; j < count; j++) {
+        if (std::fabs(static_cast<double>(wavelengths[j] - wl)) < 0.01) {
+          found = 1;
+          break;
+        }
+      }
+      if (!found) {
+        wavelengths[count++] = wl;
+      }
+    }
+  }
+
+  // Sort wavelengths
+  for (int i = 0; i < count - 1; i++) {
+    for (int j = i + 1; j < count; j++) {
+      if (wavelengths[i] > wavelengths[j]) {
+        float tmp = wavelengths[i];
+        wavelengths[i] = wavelengths[j];
+        wavelengths[j] = tmp;
+      }
+    }
+  }
+
+  return count;
+}
+
+// Get spectral units from EXR header
+const char* EXRGetSpectralUnits(const EXRHeader *exr_header) {
+  if (!exr_header) return NULL;
+
+  // Try ROOT/units first (spectral-exr format)
+  const EXRAttribute *attr = FindCustomAttribute(exr_header, "ROOT/units");
+  if (attr && attr->value && attr->size > 0) {
+    return reinterpret_cast<const char*>(attr->value);
+  }
+
+  // Try emissiveUnits
+  attr = FindCustomAttribute(exr_header, "emissiveUnits");
+  if (attr && attr->value && attr->size > 0) {
+    return reinterpret_cast<const char*>(attr->value);
+  }
+
+  return NULL;
+}
+
+// Helper to add a string attribute
+static int AddStringAttribute(EXRHeader *exr_header, const char *name, const char *value) {
+  if (!exr_header || !name || !value) return TINYEXR_ERROR_INVALID_ARGUMENT;
+
+  int new_count = exr_header->num_custom_attributes + 1;
+  if (new_count > TINYEXR_MAX_CUSTOM_ATTRIBUTES) {
+    return TINYEXR_ERROR_DATA_TOO_LARGE;
+  }
+
+  // Reallocate attributes array
+  EXRAttribute *new_attrs = static_cast<EXRAttribute*>(
+      realloc(exr_header->custom_attributes,
+              sizeof(EXRAttribute) * static_cast<size_t>(new_count)));
+  if (!new_attrs) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  exr_header->custom_attributes = new_attrs;
+  EXRAttribute *attr = &exr_header->custom_attributes[exr_header->num_custom_attributes];
+
+  // Initialize the new attribute
+  memset(attr, 0, sizeof(EXRAttribute));
+
+#ifdef _MSC_VER
+  strncpy_s(attr->name, sizeof(attr->name), name, 255);
+  strncpy_s(attr->type, sizeof(attr->type), "string", 255);
+#else
+  strncpy(attr->name, name, 255);
+  attr->name[255] = '\0';
+  strncpy(attr->type, "string", 255);
+  attr->type[255] = '\0';
+#endif
+
+  size_t value_len = strlen(value) + 1;  // Include null terminator
+  attr->value = static_cast<unsigned char*>(malloc(value_len));
+  if (!attr->value) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  memcpy(attr->value, value, value_len);
+  attr->size = static_cast<int>(value_len);
+
+  exr_header->num_custom_attributes = new_count;
+
+  return TINYEXR_SUCCESS;
+}
+
+// Helper to add an int attribute
+static int AddIntAttribute(EXRHeader *exr_header, const char *name, int value) {
+  if (!exr_header || !name) return TINYEXR_ERROR_INVALID_ARGUMENT;
+
+  int new_count = exr_header->num_custom_attributes + 1;
+  if (new_count > TINYEXR_MAX_CUSTOM_ATTRIBUTES) {
+    return TINYEXR_ERROR_DATA_TOO_LARGE;
+  }
+
+  // Reallocate attributes array
+  EXRAttribute *new_attrs = static_cast<EXRAttribute*>(
+      realloc(exr_header->custom_attributes,
+              sizeof(EXRAttribute) * static_cast<size_t>(new_count)));
+  if (!new_attrs) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  exr_header->custom_attributes = new_attrs;
+  EXRAttribute *attr = &exr_header->custom_attributes[exr_header->num_custom_attributes];
+
+  // Initialize the new attribute
+  memset(attr, 0, sizeof(EXRAttribute));
+
+#ifdef _MSC_VER
+  strncpy_s(attr->name, sizeof(attr->name), name, 255);
+  strncpy_s(attr->type, sizeof(attr->type), "int", 255);
+#else
+  strncpy(attr->name, name, 255);
+  attr->name[255] = '\0';
+  strncpy(attr->type, "int", 255);
+  attr->type[255] = '\0';
+#endif
+
+  attr->value = static_cast<unsigned char*>(malloc(sizeof(int)));
+  if (!attr->value) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  memcpy(attr->value, &value, sizeof(int));
+  attr->size = sizeof(int);
+
+  exr_header->num_custom_attributes = new_count;
+
+  return TINYEXR_SUCCESS;
+}
+
+// Set spectral attributes on EXR header
+int EXRSetSpectralAttributes(EXRHeader *exr_header,
+                             int spectrum_type,
+                             const char *units) {
+  if (!exr_header) return TINYEXR_ERROR_INVALID_ARGUMENT;
+
+  int ret;
+
+  // Add spectralLayoutVersion (always "1.0")
+  ret = AddStringAttribute(exr_header, "spectralLayoutVersion", "1.0");
+  if (ret != TINYEXR_SUCCESS) return ret;
+
+  // Add units attribute based on spectrum type
+  if (units && strlen(units) > 0) {
+    if (spectrum_type == TINYEXR_SPECTRUM_REFLECTIVE) {
+      ret = AddStringAttribute(exr_header, "ROOT/units", units);
+    } else {
+      ret = AddStringAttribute(exr_header, "emissiveUnits", units);
+    }
+    if (ret != TINYEXR_SUCCESS) return ret;
+  }
+
+  // Add polarisation handedness for polarised images
+  if (spectrum_type == TINYEXR_SPECTRUM_POLARISED) {
+    ret = AddStringAttribute(exr_header, "polarisationHandedness", "left");
+    if (ret != TINYEXR_SUCCESS) return ret;
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+// Check if file contains spectral data
+int IsSpectralEXR(const char *filename) {
+  EXRVersion version;
+  int ret = ParseEXRVersionFromFile(&version, filename);
+  if (ret != TINYEXR_SUCCESS) return ret;
+
+  const char *err = NULL;
+  EXRHeader header;
+  InitEXRHeader(&header);
+
+  ret = ParseEXRHeaderFromFile(&header, &version, filename, &err);
+  if (ret != TINYEXR_SUCCESS) {
+    if (err) FreeEXRErrorMessage(err);
+    return ret;
+  }
+
+  // Check for spectralLayoutVersion attribute
+  int is_spectral = (FindCustomAttribute(&header, "spectralLayoutVersion") != NULL);
+
+  FreeEXRHeader(&header);
+
+  return is_spectral ? TINYEXR_SUCCESS : TINYEXR_ERROR_INVALID_DATA;
+}
+
+// Check if memory contains spectral EXR data
+int IsSpectralEXRFromMemory(const unsigned char *memory, size_t size) {
+  if (!memory || size < 8) return TINYEXR_ERROR_INVALID_DATA;
+
+  EXRVersion version;
+  int ret = ParseEXRVersionFromMemory(&version, memory, size);
+  if (ret != TINYEXR_SUCCESS) return ret;
+
+  const char *err = NULL;
+  EXRHeader header;
+  InitEXRHeader(&header);
+
+  ret = ParseEXRHeaderFromMemory(&header, &version, memory, size, &err);
+  if (ret != TINYEXR_SUCCESS) {
+    if (err) FreeEXRErrorMessage(err);
+    return ret;
+  }
+
+  // Check for spectralLayoutVersion attribute
+  int is_spectral = (FindCustomAttribute(&header, "spectralLayoutVersion") != NULL);
+
+  FreeEXRHeader(&header);
+
+  return is_spectral ? TINYEXR_SUCCESS : TINYEXR_ERROR_INVALID_DATA;
+}
+
 #ifdef __clang__
 // zero-as-null-pointer-constant
 #pragma clang diagnostic pop
diff --git a/encoder/basisu_enc.cpp b/encoder/basisu_enc.cpp
index 83631b9..e92b4b8 100644
--- a/encoder/basisu_enc.cpp
+++ b/encoder/basisu_enc.cpp
@@ -28,9 +28,7 @@
 
 #include <vector>
 
-#ifndef TINYEXR_USE_ZFP
-#define TINYEXR_USE_ZFP (1)
-#endif
+#define TINYEXR_USE_MINIZ (0)
 #include "3rdparty/tinyexr.h"
 
 #ifndef MINIZ_HEADER_FILE_ONLY
@@ -3439,7 +3437,7 @@ namespace basisu
 			return false;
 		return write_vec_to_file(pFilename, file_data);
 	}
-		
+
 	bool read_exr(const char* pFilename, imagef& img, int& n_chans)
 	{
 		n_chans = 0;
@@ -3447,8 +3445,8 @@ namespace basisu
 		int width = 0, height = 0;
 		float* out_rgba = nullptr;
 		const char* err = nullptr;
-		
-		int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err, &n_chans);
+
+		int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err);
 		if (status != 0)
 		{
 			error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?");
@@ -3457,7 +3455,7 @@ namespace basisu
 			return false;
 		}
 
-		const uint32_t MAX_SUPPORTED_DIM = 65536;
+		const uint32_t MAX_SUPPORTED_DIM = 32768;
 		if ((width < 1) || (height < 1) || (width > (int)MAX_SUPPORTED_DIM) || (height > (int)MAX_SUPPORTED_DIM))
 		{
 			error_printf("Invalid dimensions of .EXR image \"%s\"!\n", pFilename);
@@ -3466,32 +3464,60 @@ namespace basisu
 		}
 
 		img.resize(width, height);
+
+		memcpy((void*)img.get_ptr(), out_rgba, static_cast<size_t>(sizeof(float) * 4 * img.get_total_pixels()));
 		
-		if (n_chans == 1)
+		free(out_rgba);
+		out_rgba = nullptr;
+
+		uint32_t total_all_same_rgba = 0, total_all_same_rgb = 0, total_has_alpha = 0;
+
+		for (int y = 0; y < height; y++)
 		{
-			const float* pSrc = out_rgba;
-			vec4F* pDst = img.get_ptr();
-
-			for (int y = 0; y < height; y++)
+			for (int x = 0; x < width; x++)
 			{
-				for (int x = 0; x < width; x++)
-				{
-					(*pDst)[0] = pSrc[0];
-					(*pDst)[1] = pSrc[1];
-					(*pDst)[2] = pSrc[2];
-					(*pDst)[3] = 1.0f;
+				const vec4F& p = img(x, y);
 
-					pSrc += 4;
-					++pDst;
-				}
-			}
+				if ((p[0] == p[1]) && (p[0] == p[2]))
+					total_all_same_rgb++;
+
+				const float a = p[3];
+
+				if ((a == p[0]) && (a == p[1]) && (a == p[2]))
+					total_all_same_rgba++;
+								
+				if (a != 1.0f)
+					total_has_alpha++;
+
+			} // x
+		} // y
+
+		const uint32_t total_pixels = width * height;
+		if (total_all_same_rgba == total_pixels)
+		{
+			// TinyEXR loads single channel EXR images into all output channels (including alpha) - assume they are luminance and fix our alpha.
+			// Odds are this is an opaque luminance-only image, not a true alpha channel image. (As of early 2026 we don't support any HDR format with alpha, anyway.)
+			for (int y = 0; y < height; y++)
+				for (int x = 0; x < width; x++)
+					img(x, y)[3] = 1.0f;
+			
+			n_chans = 1;
+		}
+		else if (total_has_alpha)
+		{
+			n_chans = 4;
+		}
+		else if (total_all_same_rgb == total_pixels)
+		{
+			n_chans = 1;
 		}
 		else
 		{
-			memcpy((void *)img.get_ptr(), out_rgba, static_cast<size_t>(sizeof(float) * 4 * img.get_total_pixels()));
+			n_chans = 3;
 		}
 
-		free(out_rgba);
+		//fmt_printf("Number of detected EXR channels: {}\n", n_chans);
+				
 		return true;
 	}
 
@@ -3513,6 +3539,8 @@ namespace basisu
 		memcpy((void *)img.get_ptr(), out_rgba, width * height * sizeof(float) * 4);
 		free(out_rgba);
 
+		// TODO: detect luminance-only etc.
+
 		return true;
 	}
 
diff --git a/encoder/basisu_tinyexr.cpp b/encoder/basisu_tinyexr.cpp
index 9606914..6d3828a 100644
--- a/encoder/basisu_tinyexr.cpp
+++ b/encoder/basisu_tinyexr.cpp
@@ -5,34 +5,27 @@
 #endif
 #endif
 
+// Pull in our local fork of the miniz library. (Binomial wrote the original miniz library. Basisu was tested with this specific version.)
 #define MINIZ_HEADER_FILE_ONLY
 #define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
 #include "basisu_miniz.h"
 
-// Force tinyexr to use zlib-style compression API's, then we'll direct them to our own customized copy of miniz. (Binomial wrote the original miniz library.)
-// This allows us to use tinyexr.h without modify it at all, or relying on zlib.
+// A bit of a hack to force tinyexr to use plain zlib-style compression API's, then we'll direct them to our own customized copy of miniz with #define's.
+// This allows us to use tinyexr.h without modifying it at all, or relying on zlib, or pulling in a system-wide miniz dependency. 
+// This assumes tinyexr.h doesn't include zlib.h (it doesn't: "Please include your own zlib-compatible API header before...")
+// (Time will tell how fragile this is in reality.)
 #define TINYEXR_USE_MINIZ (0)
 
-enum { Z_OK = 0, Z_STREAM_END = 1, Z_NEED_DICT = 2, Z_ERRNO = -1, Z_STREAM_ERROR = -2, Z_DATA_ERROR = -3, Z_MEM_ERROR = -4, Z_BUF_ERROR = -5, Z_VERSION_ERROR = -6, Z_PARAM_ERROR = -10000 };
-typedef unsigned long uLongf;
-typedef unsigned long uLong;
+#define Z_OK buminiz::MZ_OK
+#define uLong buminiz::mz_ulong
+#define uLongf buminiz::mz_ulong
+
 typedef unsigned char Byte;
 typedef Byte Bytef;
 
-uLong compressBound(uLong src_size)
-{
-    return buminiz::mz_compressBound(src_size);
-}
-
-int compress(Bytef* dest, uLongf* destLen, const Bytef* source, uLong sourceLen)
-{
-    return buminiz::mz_compress(dest, destLen, source, sourceLen);
-}
-
-int uncompress(Bytef* dest, uLongf* destLen, const Bytef* source, uLong sourceLen)
-{
-    return buminiz::mz_uncompress(dest, destLen, source, sourceLen);
-}
+#define compressBound	buminiz::mz_compressBound
+#define compress		buminiz::mz_compress
+#define uncompress		buminiz::mz_uncompress
 
 #ifdef _MSC_VER
 #pragma warning (disable: 4060)
@@ -40,6 +33,7 @@ int uncompress(Bytef* dest, uLongf* destLen, const Bytef* source, uLong sourceLe
 #pragma warning (disable: 4245)
 #pragma warning (disable: 4505)
 #pragma warning (disable: 4702)
+#pragma warning (disable: 4530) // warning C4530: C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
 #endif
 
 #define TINYEXR_IMPLEMENTATION
diff --git a/encoder_lib/encoder_lib.vcxproj b/encoder_lib/encoder_lib.vcxproj
index 6318f21..9d4c1b9 100644
--- a/encoder_lib/encoder_lib.vcxproj
+++ b/encoder_lib/encoder_lib.vcxproj
@@ -28,11 +28,11 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\encoder\3rdparty\android_astc_decomp.cpp" />
-    <ClCompile Include="..\encoder\3rdparty\tinyexr.cpp" />
     <ClCompile Include="..\encoder\basisu_astc_hdr_6x6_enc.cpp" />
     <ClCompile Include="..\encoder\basisu_astc_hdr_common.cpp" />
     <ClCompile Include="..\encoder\basisu_astc_ldr_common.cpp" />
     <ClCompile Include="..\encoder\basisu_astc_ldr_encode.cpp" />
+    <ClCompile Include="..\encoder\basisu_tinyexr.cpp" />
     <ClCompile Include="..\encoder\basisu_uastc_hdr_4x4_enc.cpp" />
     <ClCompile Include="..\encoder\basisu_backend.cpp" />
     <ClCompile Include="..\encoder\basisu_basis_file.cpp" />
diff --git a/encoder_lib/encoder_lib.vcxproj.filters b/encoder_lib/encoder_lib.vcxproj.filters
index 80e8fd0..8d0ad48 100644
--- a/encoder_lib/encoder_lib.vcxproj.filters
+++ b/encoder_lib/encoder_lib.vcxproj.filters
@@ -84,9 +84,6 @@
     <ClCompile Include="..\encoder\pvpngreader.cpp">
       <Filter>Source Files\encoder</Filter>
     </ClCompile>
-    <ClCompile Include="..\encoder\3rdparty\tinyexr.cpp">
-      <Filter>Source Files\encoder\3rdparty</Filter>
-    </ClCompile>
     <ClCompile Include="..\zstd\zstd.c">
       <Filter>Source Files\encoder\3rdparty</Filter>
     </ClCompile>
@@ -105,6 +102,9 @@
     <ClCompile Include="..\encoder\basisu_astc_ldr_encode.cpp">
       <Filter>Source Files\encoder</Filter>
     </ClCompile>
+    <ClCompile Include="..\encoder\basisu_tinyexr.cpp">
+      <Filter>Source Files\encoder</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\transcoder\basisu_astc_helpers.h">
diff --git a/webgl/encoder/CMakeLists.txt b/webgl/encoder/CMakeLists.txt
index 91dcd3d..8027b8e 100644
--- a/webgl/encoder/CMakeLists.txt
+++ b/webgl/encoder/CMakeLists.txt
@@ -67,7 +67,7 @@ if(EMSCRIPTEN)
     ../../encoder/basisu_astc_hdr_common.cpp
     ../../encoder/basisu_astc_ldr_common.cpp
     ../../encoder/basisu_astc_ldr_encode.cpp
-    ../../encoder/3rdparty/tinyexr.cpp
+    ../../encoder/basisu_tinyexr.cpp
   )
   if(KTX2_ZSTANDARD)
     list(APPEND SRC_LIST ../../zstd/zstd.c)