diff --git a/CMakeLists.txt b/CMakeLists.txt
index a91f23b..23ccb14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -247,7 +247,7 @@ set(ENCODER_LIB_SRC_LIST
     encoder/basisu_astc_hdr_common.cpp
     encoder/basisu_astc_ldr_common.cpp
     encoder/basisu_astc_ldr_encode.cpp
-    encoder/3rdparty/tinyexr.cpp
+    encoder/basisu_tinyexr.cpp
     transcoder/basisu_transcoder.cpp
     encoder/basisu_astc_hdr_6x6_enc.h
     encoder/basisu_astc_hdr_common.h
diff --git a/encoder/3rdparty/tinyexr.h b/encoder/3rdparty/tinyexr.h
index 2b759ee..dfd7164 100644
--- a/encoder/3rdparty/tinyexr.h
+++ b/encoder/3rdparty/tinyexr.h
@@ -1,5 +1,3 @@
-// rg 8/23/2024: I fixed some minor undefined behavior in this module (signed 32-bit left shifts).
-
 #ifndef TINYEXR_H_
 #define TINYEXR_H_
 /*
@@ -109,9 +107,6 @@ extern "C" {
 // required if this flag is 0 and TINYEXR_USE_STB_ZLIB is 0.
 #ifndef TINYEXR_USE_MINIZ
 #define TINYEXR_USE_MINIZ (1)
-#ifndef MINIZ_HEADER_FILE_ONLY
-#define MINIZ_HEADER_FILE_ONLY (1)
-#endif
 #endif
 
 // Use the ZIP implementation of stb_image.h and stb_image_write.h.
@@ -136,7 +131,11 @@ extern "C" {
 
 #ifndef TINYEXR_USE_THREAD
 #define TINYEXR_USE_THREAD (0)  // No threaded loading.
-// http://computation.llnl.gov/projects/floating-point-compression
+#else
+// When using threading a reduced custom upperbound can be specified by setting TINYEXR_MAX_THREADS
+#ifndef TINYEXR_MAX_THREADS // if not defined define it as 0 meaning upper limit is taken from hardware_concurrency()
+#define TINYEXR_MAX_THREADS (0)
+#endif
 #endif
 
 #ifndef TINYEXR_USE_OPENMP
@@ -147,6 +146,41 @@ extern "C" {
 #endif
 #endif
 
+#ifndef TINYEXR_USE_COMPILER_FP16
+#define TINYEXR_USE_COMPILER_FP16 (0)
+#endif
+
+#if TINYEXR_USE_COMPILER_FP16
+#ifndef _MSC_VER
+#if defined( __GNUC__ ) || defined( __clang__ )
+#if defined( __SSE2__ )
+#if ( __GNUC__ > 11 ) || ( __clang_major__ > 14 )
+#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
+#define __STDC_WANT_IEC_60559_TYPES_EXT__
+#endif
+#include <float.h>
+#include <math.h>
+#define TINYEXR_FP16_COMPILER_TYPE _Float16
+#endif
+#endif
+#if defined( __ARM_NEON__ ) || defined( __ARM_NEON )
+#define TINYEXR_FP16_COMPILER_TYPE __fp16
+#endif
+#endif
+#else
+#if (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
+#include <intrin.h>
+#define TINYEXR_FP16_COMPILER_TYPE uint16_t
+#endif
+#endif
+#endif
+
+#ifdef TINYEXR_FP16_COMPILER_TYPE
+#define TINYEXR_HAS_FP16_COMPILER_TYPE (1)
+#else
+#define TINYEXR_HAS_FP16_COMPILER_TYPE (0)
+#endif
+
 #define TINYEXR_SUCCESS (0)
 #define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1)
 #define TINYEXR_ERROR_INVALID_EXR_VERSION (-2)
@@ -178,6 +212,11 @@ extern "C" {
 #define TINYEXR_COMPRESSIONTYPE_ZIPS (2)
 #define TINYEXR_COMPRESSIONTYPE_ZIP (3)
 #define TINYEXR_COMPRESSIONTYPE_PIZ (4)
+#define TINYEXR_COMPRESSIONTYPE_PXR24 (5)
+#define TINYEXR_COMPRESSIONTYPE_B44 (6)
+#define TINYEXR_COMPRESSIONTYPE_B44A (7)
+#define TINYEXR_COMPRESSIONTYPE_DWAA (8)   // Not yet supported
+#define TINYEXR_COMPRESSIONTYPE_DWAB (9)   // Not yet supported
 #define TINYEXR_COMPRESSIONTYPE_ZFP (128)  // TinyEXR extension
 
 #define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0)
@@ -191,6 +230,12 @@ extern "C" {
 #define TINYEXR_TILE_ROUND_DOWN (0)
 #define TINYEXR_TILE_ROUND_UP (1)
 
+// Spectral EXR types (based on JCGT 2021 paper and spectral-exr format)
+// https://jcgt.org/published/0010/03/01/
+#define TINYEXR_SPECTRUM_REFLECTIVE (0)   // T.{wavelength}nm channels
+#define TINYEXR_SPECTRUM_EMISSIVE (1)     // S0.{wavelength}nm channels
+#define TINYEXR_SPECTRUM_POLARISED (2)    // S0-S3.{wavelength}nm channels
+
 typedef struct TEXRVersion {
   int version;    // this must be 2
   // tile format image;
@@ -346,7 +391,7 @@ extern int LoadEXR(float **out_rgba, int *width, int *height,
 // the function will return `TINYEXR_ERROR_LAYER_NOT_FOUND`.
 extern int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
                             const char *filename, const char *layer_name,
-                            const char **err, int *num_chans = NULL);
+                            const char **err);
 
 //
 // Get layer infos from EXR file.
@@ -391,7 +436,7 @@ extern int IsEXRFromMemory(const unsigned char *memory, size_t size);
 // error
 extern int SaveEXRToMemory(const float *data, const int width, const int height,
                    const int components, const int save_as_fp16,
-                   const unsigned char **buffer, const char **err);
+                   unsigned char **buffer, const char **err);
 
 // @deprecated { Not recommended, but handy to use. }
 // Saves single-frame OpenEXR image to a buffer. Assume EXR image contains RGB(A) channels.
@@ -599,6 +644,61 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
                              const unsigned char *memory, size_t size,
                              const char **err);
 
+// Spectral EXR API (based on JCGT 2021 paper and spectral-exr format)
+// https://jcgt.org/published/0010/03/01/
+// https://github.com/afichet/spectral-exr
+
+// Check if an EXR file contains spectral data (has spectralLayoutVersion attribute)
+// Returns TINYEXR_SUCCESS if spectral, TINYEXR_ERROR_INVALID_DATA if not
+extern int IsSpectralEXR(const char *filename);
+extern int IsSpectralEXRFromMemory(const unsigned char *memory, size_t size);
+
+// Get spectrum type from EXR header (TINYEXR_SPECTRUM_REFLECTIVE/EMISSIVE/POLARISED)
+// Returns spectrum type, or -1 if not a spectral EXR
+extern int EXRGetSpectrumType(const EXRHeader *exr_header);
+
+// Format wavelength with European decimal convention (comma as separator)
+// Output format: "550,000000" for 550.0nm
+// buffer must be at least 32 bytes
+extern void EXRFormatWavelength(char *buffer, size_t buffer_size, float wavelength_nm);
+
+// Create spectral channel name
+// For emissive: "S{stokes}.{wavelength}nm" (e.g., "S0.550,000000nm")
+// For reflective: "T.{wavelength}nm" (e.g., "T.550,000000nm")
+// buffer must be at least 64 bytes
+extern void EXRSpectralChannelName(char *buffer, size_t buffer_size,
+                                   float wavelength_nm, int stokes_component);
+extern void EXRReflectiveChannelName(char *buffer, size_t buffer_size,
+                                     float wavelength_nm);
+
+// Parse wavelength from spectral channel name
+// Returns wavelength in nm, or -1.0 if not a valid spectral channel name
+extern float EXRParseSpectralChannelWavelength(const char *channel_name);
+
+// Get Stokes component from channel name (0-3, or -1 if not polarised/invalid)
+extern int EXRGetStokesComponent(const char *channel_name);
+
+// Check if channel name is a spectral channel (S{n}.{wavelength}nm or T.{wavelength}nm)
+extern int EXRIsSpectralChannel(const char *channel_name);
+
+// Get wavelengths from EXR header channels
+// Returns number of unique wavelengths found
+// wavelengths array must be pre-allocated, max_wavelengths is its size
+extern int EXRGetWavelengths(const EXRHeader *exr_header,
+                             float *wavelengths, int max_wavelengths);
+
+// Helper to add spectral attributes to an EXRHeader
+// This sets spectralLayoutVersion and ROOT/units (or emissiveUnits for emissive)
+// spectrum_type: TINYEXR_SPECTRUM_REFLECTIVE, EMISSIVE, or POLARISED
+// units: unit string (e.g., "W.m^-2.sr^-1.nm^-1" for radiance)
+extern int EXRSetSpectralAttributes(EXRHeader *exr_header,
+                                    int spectrum_type,
+                                    const char *units);
+
+// Get spectral units from EXR header
+// Returns NULL if not found, otherwise pointer to units string (valid until header is freed)
+extern const char* EXRGetSpectralUnits(const EXRHeader *exr_header);
+
 #ifdef __cplusplus
 }
 #endif
@@ -632,6 +732,7 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
 #endif
 
 #include <algorithm>
+#include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -644,6 +745,9 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
 #include <vector>
 #include <set>
 
+// Include Reader class with error stack for safer memory reading
+#include "exr_reader.hh"
+
 // https://stackoverflow.com/questions/5047971/how-do-i-check-for-c11-support
 #if __cplusplus > 199711L || (defined(_MSC_VER) && _MSC_VER >= 1900)
 #define TINYEXR_HAS_CXX11 (1)
@@ -664,7 +768,7 @@ extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
 #endif
 
 #if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
-#include "../basisu_miniz.h"
+#include <miniz.h>
 #else
 //  Issue #46. Please include your own zlib-compatible API header before
 //  including `tinyexr.h`
@@ -776,7 +880,7 @@ static void SetWarningMessage(const std::string &msg, const char **warn) {
 
 static const int kEXRVersionSize = 8;
 
-static void cpy2(unsigned short *dst_val, const unsigned short *src_val) {
+static void inline cpy2(unsigned short *dst_val, const unsigned short *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -784,7 +888,7 @@ static void cpy2(unsigned short *dst_val, const unsigned short *src_val) {
   dst[1] = src[1];
 }
 
-static void swap2(unsigned short *val) {
+static void inline swap2(unsigned short *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -806,7 +910,7 @@ static void swap2(unsigned short *val) {
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"
 #endif
-static void cpy4(int *dst_val, const int *src_val) {
+static void inline cpy4(int *dst_val, const int *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -816,7 +920,7 @@ static void cpy4(int *dst_val, const int *src_val) {
   dst[3] = src[3];
 }
 
-static void cpy4(unsigned int *dst_val, const unsigned int *src_val) {
+static void inline cpy4(unsigned int *dst_val, const unsigned int *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -826,7 +930,7 @@ static void cpy4(unsigned int *dst_val, const unsigned int *src_val) {
   dst[3] = src[3];
 }
 
-static void cpy4(float *dst_val, const float *src_val) {
+static void inline cpy4(float *dst_val, const float *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -843,7 +947,7 @@ static void cpy4(float *dst_val, const float *src_val) {
 #pragma GCC diagnostic pop
 #endif
 
-static void swap4(unsigned int *val) {
+static void inline swap4(unsigned int *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -858,7 +962,7 @@ static void swap4(unsigned int *val) {
 #endif
 }
 
-static void swap4(int *val) {
+static void inline swap4(int *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -873,7 +977,7 @@ static void swap4(int *val) {
 #endif
 }
 
-static void swap4(float *val) {
+static void inline swap4(float *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -889,7 +993,7 @@ static void swap4(float *val) {
 }
 
 #if 0
-static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) {
+static void inline cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) {
   unsigned char *dst = reinterpret_cast<unsigned char *>(dst_val);
   const unsigned char *src = reinterpret_cast<const unsigned char *>(src_val);
 
@@ -904,7 +1008,7 @@ static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64
 }
 #endif
 
-static void swap8(tinyexr::tinyexr_uint64 *val) {
+static void inline swap8(tinyexr::tinyexr_uint64 *val) {
 #if TINYEXR_LITTLE_ENDIAN
   (void)val;
 #else
@@ -924,6 +1028,11 @@ static void swap8(tinyexr::tinyexr_uint64 *val) {
 }
 
 // https://gist.github.com/rygorous/2156668
+#if TINYEXR_HAS_FP16_COMPILER_TYPE && (TINYEXR_USE_COMPILER_FP16 > 0)
+union FP32 {
+  float f;
+};
+#else
 union FP32 {
   unsigned int u;
   float f;
@@ -939,12 +1048,21 @@ union FP32 {
 #endif
   } s;
 };
+#endif
 
 #ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wpadded"
 #endif
 
+#if TINYEXR_HAS_FP16_COMPILER_TYPE && (TINYEXR_USE_COMPILER_FP16 > 0)
+union FP16 {
+  TINYEXR_FP16_COMPILER_TYPE f;
+  unsigned short u;
+};
+
+#else
+
 union FP16 {
   unsigned short u;
   struct {
@@ -959,11 +1077,32 @@ union FP16 {
 #endif
   } s;
 };
+#endif
 
 #ifdef __clang__
 #pragma clang diagnostic pop
 #endif
 
+#if TINYEXR_HAS_FP16_COMPILER_TYPE && (TINYEXR_USE_COMPILER_FP16 > 0)
+static inline FP32 half_to_float(FP16 h) {
+  FP32 o;
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
+   o.f =_mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(static_cast<int> (h.u))));
+#else
+   o.f = static_cast<float> (h.f);
+#endif
+  return o;
+}
+static inline FP16 float_to_half_full(FP32 f) {
+  FP16 o;
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
+  o.f  = static_cast<TINYEXR_FP16_COMPILER_TYPE> (_mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f.f), _MM_FROUND_CUR_DIRECTION)));
+#else
+  o.f = static_cast<TINYEXR_FP16_COMPILER_TYPE> (f.f);
+#endif
+  return o;
+}
+#else
 static FP32 half_to_float(FP16 h) {
   static const FP32 magic = {113 << 23};
   static const unsigned int shifted_exp = 0x7c00
@@ -1023,7 +1162,7 @@ static FP16 float_to_half_full(FP32 f) {
   o.s.Sign = f.s.Sign;
   return o;
 }
-
+#endif
 // NOTE: From OpenEXR code
 // #define IMF_INCREASING_Y  0
 // #define IMF_DECREASING_Y  1
@@ -1367,11 +1506,11 @@ static bool CompressZip(unsigned char *dst,
   // Compress the data using miniz
   //
 
-  buminiz::mz_ulong outSize = buminiz::mz_compressBound(src_size);
-  int ret = buminiz::mz_compress(
+  mz_ulong outSize = mz_compressBound(src_size);
+  int ret = mz_compress(
       dst, &outSize, static_cast<const unsigned char *>(&tmpBuf.at(0)),
       src_size);
-  if (ret != buminiz::MZ_OK) {
+  if (ret != MZ_OK) {
     return false;
   }
 
@@ -1396,7 +1535,7 @@ static bool CompressZip(unsigned char *dst,
 
   memcpy(dst, ret, outSize);
   free(ret);
-
+  
   compressedSize = outSize;
 #else
   uLong outSize = compressBound(static_cast<uLong>(src_size));
@@ -1431,8 +1570,8 @@ static bool DecompressZip(unsigned char *dst,
 
 #if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
   int ret =
-      buminiz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
-  if (buminiz::MZ_OK != ret) {
+      mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size);
+  if (MZ_OK != ret) {
     return false;
   }
 #elif TINYEXR_USE_STB_ZLIB
@@ -2144,7 +2283,7 @@ inline void outputBits(int nBits, long long bits, long long &c, int &lc,
 
 inline long long getBits(int nBits, long long &c, int &lc, const char *&in) {
   while (lc < nBits) {
-    c = (long long)((unsigned long long)c << 8) | *(reinterpret_cast<const unsigned char *>(in++));
+    c = (c << 8) | *(reinterpret_cast<const unsigned char *>(in++));
     lc += 8;
   }
 
@@ -2720,7 +2859,7 @@ static int hufEncode            // return: output size (in bits)
 
 #define getChar(c, lc, in)                   \
   {                                          \
-    c = ((unsigned long long)c << 8) | *(unsigned char *)(in++); \
+    c = (c << 8) | *(unsigned char *)(in++); \
     lc += 8;                                 \
   }
 
@@ -2882,7 +3021,7 @@ static bool hufDecode(const long long *hcode,  // i : encoding table
   lc -= i;
 
   while (lc > 0) {
-    const HufDec pl = hdecod[((unsigned long long)c << (HUF_DECBITS - lc)) & HUF_DECMASK];
+    const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK];
 
     if (pl.len) {
       lc -= pl.len;
@@ -3357,6 +3496,1112 @@ static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr,
 }
 #endif  // TINYEXR_USE_PIZ
 
+// ============================================================================
+// PXR24 decompression
+// ============================================================================
+
+// PXR24 stores 32-bit floats as 24-bit values (truncates 8 mantissa bits)
+// HALF and UINT are stored without modification
+static bool DecompressPxr24(unsigned char *outPtr, size_t outBufSize,
+                            const unsigned char *inPtr, size_t inLen,
+                            int data_width, int num_lines,
+                            size_t num_channels,
+                            const EXRChannelInfo *channels) {
+  // Calculate the PXR24 data size after zlib decompression
+  // PXR24 stores HALF as 2 bytes, UINT as 4 bytes, FLOAT as 3 bytes
+  // Data is stored with byte plane separation and delta encoding
+  size_t pxr24_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int ch_width = data_width;  // V1 doesn't handle subsampling in decompression
+    int ch_pixels = ch_width * num_lines;
+
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 4;
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 2;
+    } else {  // FLOAT
+      pxr24_size += static_cast<size_t>(ch_pixels) * 3;
+    }
+  }
+
+  // Allocate buffer for zlib-decompressed PXR24 data
+  std::vector<unsigned char> pxr24_buf(pxr24_size);
+  size_t uncomp_size = pxr24_size;
+
+  // PXR24 uses raw zlib compression
+  if (pxr24_size == inLen) {
+    // Uncompressed - copy directly
+    memcpy(pxr24_buf.data(), inPtr, inLen);
+  } else {
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+    mz_ulong dest_len = static_cast<mz_ulong>(pxr24_size);
+    int ret = mz_uncompress(pxr24_buf.data(), &dest_len, inPtr, static_cast<mz_ulong>(inLen));
+    if (ret != MZ_OK) {
+      return false;
+    }
+    uncomp_size = static_cast<size_t>(dest_len);
+#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1)
+    int outLen = stbi_zlib_decode_buffer(reinterpret_cast<char*>(pxr24_buf.data()),
+        static_cast<int>(pxr24_size), reinterpret_cast<const char*>(inPtr), static_cast<int>(inLen));
+    if (outLen < 0) {
+      return false;
+    }
+    uncomp_size = static_cast<size_t>(outLen);
+#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1)
+    uint64_t outLen = 0;
+    nanoz_status_t ret = nanoz_uncompress(inPtr, inLen, pxr24_size, pxr24_buf.data(), &outLen);
+    if (ret != NANOZ_SUCCESS) {
+      return false;
+    }
+    uncomp_size = static_cast<size_t>(outLen);
+#else
+    uLongf dest_len = static_cast<uLongf>(pxr24_size);
+    int ret = uncompress(pxr24_buf.data(), &dest_len, inPtr, static_cast<uLong>(inLen));
+    if (ret != Z_OK) {
+      return false;
+    }
+    uncomp_size = static_cast<size_t>(dest_len);
+#endif
+  }
+
+  if (uncomp_size != pxr24_size) {
+    return false;
+  }
+
+  // Convert PXR24 format to standard EXR format
+  // PXR24 uses:
+  // 1. Byte plane separation: bytes are stored by plane (all high bytes, then next-high, etc.)
+  // 2. Delta encoding: each pixel is stored as difference from previous pixel
+  const unsigned char* in_p = pxr24_buf.data();
+  unsigned char* out_p = outPtr;
+
+  for (int line = 0; line < num_lines; line++) {
+    for (size_t c = 0; c < num_channels; c++) {
+      int w = data_width;
+
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        // UINT: 4 byte planes with delta encoding
+        const unsigned char* ptr0 = in_p;
+        const unsigned char* ptr1 = in_p + w;
+        const unsigned char* ptr2 = in_p + w * 2;
+        const unsigned char* ptr3 = in_p + w * 3;
+        in_p += w * 4;
+
+        unsigned int pixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned int diff = (static_cast<unsigned int>(ptr0[x]) << 24) |
+                              (static_cast<unsigned int>(ptr1[x]) << 16) |
+                              (static_cast<unsigned int>(ptr2[x]) << 8) |
+                              (static_cast<unsigned int>(ptr3[x]));
+          pixel += diff;
+          memcpy(out_p, &pixel, 4);
+          out_p += 4;
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        // HALF: 2 byte planes with delta encoding
+        const unsigned char* ptr0 = in_p;
+        const unsigned char* ptr1 = in_p + w;
+        in_p += w * 2;
+
+        unsigned int pixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned int diff = (static_cast<unsigned int>(ptr0[x]) << 8) |
+                              (static_cast<unsigned int>(ptr1[x]));
+          pixel += diff;
+          unsigned short h = static_cast<unsigned short>(pixel);
+          memcpy(out_p, &h, 2);
+          out_p += 2;
+        }
+      } else {  // FLOAT
+        // FLOAT: 3 byte planes with delta encoding, expand to 32-bit
+        const unsigned char* ptr0 = in_p;
+        const unsigned char* ptr1 = in_p + w;
+        const unsigned char* ptr2 = in_p + w * 2;
+        in_p += w * 3;
+
+        unsigned int pixel = 0;
+        for (int x = 0; x < w; x++) {
+          // PXR24 stores 24-bit floats with delta encoding
+          // The diff is in the upper 24 bits
+          unsigned int diff = (static_cast<unsigned int>(ptr0[x]) << 24) |
+                              (static_cast<unsigned int>(ptr1[x]) << 16) |
+                              (static_cast<unsigned int>(ptr2[x]) << 8);
+          pixel += diff;
+          memcpy(out_p, &pixel, 4);
+          out_p += 4;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+// ============================================================================
+// B44/B44A decompression
+// ============================================================================
+
+// B44 compresses 4x4 blocks of HALF values to 14 bytes
+// B44A can compress flat regions to 3 bytes
+
+// B44 lookup tables
+// expTable: converts half-float value to exp(half/8) for p_linear channels
+// logTable: converts half-float value to 8*log(half) for p_linear channels
+// Note: These tables are primarily for luminance channels with p_linear attribute
+static unsigned short g_b44_exp_table[65536];
+static unsigned short g_b44_log_table[65536];
+static bool g_b44_tables_initialized = false;
+
+// Half-float conversion helpers for B44 table initialization
+static inline float B44HalfToFloat(unsigned short h) {
+  union { unsigned int i; float f; } u;
+
+  int s = (h >> 15) & 0x1;
+  int e = (h >> 10) & 0x1f;
+  int m = h & 0x3ff;
+
+  if (e == 0) {
+    if (m == 0) {
+      // Zero
+      u.i = s << 31;
+      return u.f;
+    }
+    // Denormal
+    float f = (float)m / 1024.0f;
+    f = f * (1.0f / 16384.0f);  // 2^-14
+    return s ? -f : f;
+  } else if (e == 31) {
+    // Inf or NaN
+    u.i = (s << 31) | 0x7f800000 | (m << 13);
+    return u.f;
+  }
+
+  // Normal
+  u.i = (s << 31) | ((e + 112) << 23) | (m << 13);
+  return u.f;
+}
+
+static inline unsigned short B44FloatToHalf(float f) {
+  union { unsigned int i; float f; } u;
+  u.f = f;
+
+  int s = (u.i >> 31) & 0x1;
+  int e = (u.i >> 23) & 0xff;
+  int m = u.i & 0x7fffff;
+
+  if (e == 0) {
+    return static_cast<unsigned short>(s << 15);  // Zero
+  } else if (e == 255) {
+    // Inf or NaN
+    return static_cast<unsigned short>((s << 15) | 0x7c00 | (m >> 13));
+  } else if (e < 113) {
+    // Too small - denormal or zero
+    if (e < 103) return static_cast<unsigned short>(s << 15);
+    m = (m | 0x800000) >> (114 - e);
+    return static_cast<unsigned short>((s << 15) | (m >> 13));
+  } else if (e > 142) {
+    // Too large - infinity
+    return static_cast<unsigned short>((s << 15) | 0x7c00);
+  }
+
+  return static_cast<unsigned short>((s << 15) | ((e - 112) << 10) | (m >> 13));
+}
+
+// Initialize B44 exp/log lookup tables (matches OpenEXR algorithm)
+static void InitB44Tables() {
+  if (g_b44_tables_initialized) return;
+
+  // Generate tables per OpenEXR's b44_table_init.c
+  for (int i = 0; i < 65536; i++) {
+    unsigned short x = static_cast<unsigned short>(i);
+
+    // expTable: convertFromLinear - exp(half / 8)
+    if ((x & 0x7c00) == 0x7c00) {
+      // infinity/nan -> 0
+      g_b44_exp_table[i] = 0;
+    } else if (x >= 0x558c && x < 0x8000) {
+      // >= 8 * log(HALF_MAX) -> HALF_MAX
+      g_b44_exp_table[i] = 0x7bff;
+    } else {
+      float f = B44HalfToFloat(x);
+      f = static_cast<float>(std::exp(static_cast<double>(f) / 8.0));
+      g_b44_exp_table[i] = B44FloatToHalf(f);
+    }
+
+    // logTable: convertToLinear - 8 * log(half)
+    if ((x & 0x7c00) == 0x7c00) {
+      // infinity/nan -> 0
+      g_b44_log_table[i] = 0;
+    } else if (x > 0x8000) {
+      // negative (excluding -0.0) -> 0
+      g_b44_log_table[i] = 0;
+    } else {
+      float f = B44HalfToFloat(x);
+      if (f <= 0.0f) {
+        g_b44_log_table[i] = 0;
+      } else {
+        f = static_cast<float>(8.0 * std::log(static_cast<double>(f)));
+        g_b44_log_table[i] = B44FloatToHalf(f);
+      }
+    }
+  }
+
+  g_b44_tables_initialized = true;
+}
+
+// Convert half to linear-log space (for p_linear channels)
+static inline unsigned short B44ConvertFromLinear(unsigned short h) {
+  return g_b44_exp_table[h];
+}
+
+// Convert linear-log back to half (for p_linear channels)
+static inline unsigned short B44ConvertToLinear(unsigned short h) {
+  return g_b44_log_table[h];
+}
+
+// Unpack one 4x4 block from B44 compressed 14 bytes (matches OpenEXR unpack14)
+static void UnpackB44Block(unsigned short dst[16], const unsigned char src[14]) {
+  // Extract t[0] (stored as ordered-magnitude value)
+  unsigned short s0 = (static_cast<unsigned short>(src[0]) << 8) | src[1];
+
+  // Extract shift and compute bias
+  unsigned short shift = src[2] >> 2;
+  unsigned short bias = static_cast<unsigned short>(0x20u << shift);
+
+  // Reconstruct t values using running differences
+  // Pattern: s[0]->s[4]->s[8]->s[12], then s[0]->s[1], s[4]->s[5], etc.
+
+  unsigned short s4 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s0) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[2]) << 4) |
+                               (static_cast<unsigned int>(src[3]) >> 4)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s8 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s4) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[3]) << 2) |
+                               (static_cast<unsigned int>(src[4]) >> 6)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s12 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s8) +
+    static_cast<unsigned int>(src[4] & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s1 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s0) +
+    static_cast<unsigned int>(src[5] >> 2) * (1u << shift) - bias);
+
+  unsigned short s5 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s4) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[5]) << 4) |
+                               (static_cast<unsigned int>(src[6]) >> 4)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s9 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s8) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[6]) << 2) |
+                               (static_cast<unsigned int>(src[7]) >> 6)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s13 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s12) +
+    static_cast<unsigned int>(src[7] & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s2 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s1) +
+    static_cast<unsigned int>(src[8] >> 2) * (1u << shift) - bias);
+
+  unsigned short s6 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s5) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[8]) << 4) |
+                               (static_cast<unsigned int>(src[9]) >> 4)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s10 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s9) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[9]) << 2) |
+                               (static_cast<unsigned int>(src[10]) >> 6)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s14 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s13) +
+    static_cast<unsigned int>(src[10] & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s3 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s2) +
+    static_cast<unsigned int>(src[11] >> 2) * (1u << shift) - bias);
+
+  unsigned short s7 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s6) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[11]) << 4) |
+                               (static_cast<unsigned int>(src[12]) >> 4)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s11 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s10) +
+    static_cast<unsigned int>(((static_cast<unsigned int>(src[12]) << 2) |
+                               (static_cast<unsigned int>(src[13]) >> 6)) & 0x3fu) * (1u << shift) - bias);
+
+  unsigned short s15 = static_cast<unsigned short>(
+    static_cast<unsigned int>(s14) +
+    static_cast<unsigned int>(src[13] & 0x3fu) * (1u << shift) - bias);
+
+  // Store t values
+  dst[0] = s0;   dst[1] = s1;   dst[2] = s2;   dst[3] = s3;
+  dst[4] = s4;   dst[5] = s5;   dst[6] = s6;   dst[7] = s7;
+  dst[8] = s8;   dst[9] = s9;   dst[10] = s10; dst[11] = s11;
+  dst[12] = s12; dst[13] = s13; dst[14] = s14; dst[15] = s15;
+
+  // Convert from ordered-magnitude to half-float
+  for (int i = 0; i < 16; i++) {
+    if (dst[i] & 0x8000) {
+      dst[i] &= 0x7fff;  // Positive: clear sign bit
+    } else {
+      dst[i] = ~dst[i];  // Negative: invert all bits
+    }
+  }
+}
+
+// Unpack a 3-byte flat block (all pixels same value)
+static void UnpackB44FlatBlock(unsigned short dst[16], const unsigned char src[3]) {
+  unsigned short t = (static_cast<unsigned short>(src[0]) << 8) | src[1];
+
+  // Convert from ordered-magnitude to half-float
+  unsigned short h;
+  if (t & 0x8000) {
+    h = t & 0x7fff;
+  } else {
+    h = ~t;
+  }
+
+  for (int i = 0; i < 16; i++) {
+    dst[i] = h;
+  }
+}
+
+static bool DecompressB44(unsigned char *outPtr, size_t outBufSize,
+                          const unsigned char *inPtr, size_t inLen,
+                          int data_width, int num_lines,
+                          size_t num_channels,
+                          const EXRChannelInfo *channels,
+                          bool is_b44a) {
+  (void)is_b44a;  // Flat block detection doesn't depend on B44/B44A for decoding
+  InitB44Tables();
+
+  // Validate that the output buffer is large enough for the decoded data.
+  // Use overflow-safe arithmetic: overflow in any multiplication or accumulation
+  // means the size cannot fit in memory, which is definitely > outBufSize.
+  {
+    size_t expected_out = 0;
+    for (size_t c = 0; c < num_channels; c++) {
+      int xs = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+      int ys = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+      size_t cw = static_cast<size_t>((data_width  + xs - 1) / xs);
+      size_t ch = static_cast<size_t>((num_lines   + ys - 1) / ys);
+      size_t bpp = (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) ? 2u : 4u;
+      // Check cw * ch overflow
+      if (cw != 0 && ch > (SIZE_MAX / cw)) return false;
+      size_t pixels = cw * ch;
+      // Check pixels * bpp overflow
+      if (pixels > (SIZE_MAX / bpp)) return false;
+      size_t ch_total = pixels * bpp;
+      // Check accumulation overflow
+      if (expected_out > SIZE_MAX - ch_total) return false;
+      expected_out += ch_total;
+    }
+    if (expected_out > outBufSize) return false;
+  }
+
+  const unsigned char* in_p = inPtr;
+  const unsigned char* in_end = inPtr + inLen;
+
+  // First pass: decompress all channels into scratch buffers.
+  // For non-HALF channels, save the pointer and byte count so we can copy
+  // the raw data to the output buffer in the second pass.
+  std::vector<std::vector<unsigned short>> scratch_buffers(num_channels);
+  std::vector<const unsigned char *> nonhalf_ptrs(num_channels, nullptr);
+  std::vector<size_t> nonhalf_sizes(num_channels, 0);
+
+  for (size_t c = 0; c < num_channels; c++) {
+    // Compute per-channel dimensions based on sampling
+    int x_sampling = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+    int y_sampling = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+    int ch_width = (data_width + x_sampling - 1) / x_sampling;
+    int ch_height = (num_lines + y_sampling - 1) / y_sampling;
+
+    // B44 only works with HALF pixel types
+    if (channels[c].pixel_type != TINYEXR_PIXELTYPE_HALF) {
+      // For non-HALF channels, data is stored uncompressed; record position
+      size_t ch_bytes = static_cast<size_t>(ch_width) * ch_height;
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT ||
+          channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        ch_bytes *= 4;
+      } else {
+        ch_bytes *= 2;
+      }
+      if (in_p + ch_bytes > in_end) return false;
+      nonhalf_ptrs[c] = in_p;
+      nonhalf_sizes[c] = ch_bytes;
+      in_p += ch_bytes;
+      continue;
+    }
+
+    // Calculate block dimensions (rounded up to multiple of 4)
+    int padded_width = ((ch_width + 3) / 4) * 4;
+    int padded_height = ((ch_height + 3) / 4) * 4;
+    int num_blocks_x = padded_width / 4;
+    int num_blocks_y = padded_height / 4;
+
+    // Allocate scratch buffer for this channel
+    scratch_buffers[c].resize(static_cast<size_t>(padded_width) * padded_height);
+
+    // Process blocks
+    for (int by = 0; by < num_blocks_y; by++) {
+      for (int bx = 0; bx < num_blocks_x; bx++) {
+        unsigned short block[16];
+
+        if (in_p + 3 > in_end) return false;
+
+        // Check for flat block (shift >= 13)
+        if (in_p[2] >= (13 << 2)) {
+          // 3-byte flat block
+          UnpackB44FlatBlock(block, in_p);
+          in_p += 3;
+        } else {
+          // Regular 14-byte block
+          if (in_p + 14 > in_end) return false;
+          UnpackB44Block(block, in_p);
+          in_p += 14;
+        }
+
+        // Apply p_linear conversion (log table) if needed
+        if (channels[c].p_linear) {
+          for (int i = 0; i < 16; i++) {
+            block[i] = g_b44_log_table[block[i]];
+          }
+        }
+
+        // Store block in scratch buffer
+        for (int dy = 0; dy < 4; dy++) {
+          int y = by * 4 + dy;
+          for (int dx = 0; dx < 4; dx++) {
+            int x = bx * 4 + dx;
+            scratch_buffers[c][static_cast<size_t>(y) * padded_width + x] = block[dy * 4 + dx];
+          }
+        }
+      }
+    }
+  }
+
+  // Second pass: copy from scratch buffers to output in per-channel format.
+  // Output format: all data for channel 0, then all data for channel 1, etc.
+  // ch_offset in DecodePixelData is accumulated per preceding channel sizes.
+  unsigned char* out_p = outPtr;
+  for (size_t c = 0; c < num_channels; c++) {
+    int x_sampling = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+    int y_sampling = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+    int ch_width = (data_width + x_sampling - 1) / x_sampling;
+    int ch_height = (num_lines + y_sampling - 1) / y_sampling;
+    int padded_width = ((ch_width + 3) / 4) * 4;
+
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      for (int y = 0; y < ch_height; y++) {
+        for (int x = 0; x < ch_width; x++) {
+          unsigned short val = scratch_buffers[c][static_cast<size_t>(y) * padded_width + x];
+          // Write as little-endian bytes so DecodePixelData's swap2 (LE->host)
+          // works correctly on both little- and big-endian platforms.
+          tinyexr::swap2(&val);
+          memcpy(out_p, &val, sizeof(val));
+          out_p += sizeof(val);
+        }
+      }
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT ||
+               channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+      // Non-HALF data is stored uncompressed; copy from saved pointer
+      if (nonhalf_sizes[c] > 0) {
+        memcpy(out_p, nonhalf_ptrs[c], nonhalf_sizes[c]);
+        out_p += nonhalf_sizes[c];
+      }
+    }
+  }
+
+  return true;
+}
+
+// ============================================================================
+// PXR24 compression (encoding)
+// ============================================================================
+
+// Convert float32 to float24 (PXR24 format)
+static inline unsigned int float_to_float24(float f) {
+  union { float f; unsigned int i; } u;
+  u.f = f;
+
+  unsigned int s = u.i & 0x80000000;
+  unsigned int e = u.i & 0x7f800000;
+  unsigned int m = u.i & 0x007fffff;
+
+  if (e == 0x7f800000) {
+    if (m) {
+      // NaN - preserve sign and 15 leftmost mantissa bits
+      m >>= 8;
+      return (s >> 8) | (e >> 8) | m | (m == 0 ? 1 : 0);
+    } else {
+      // Infinity
+      return (s >> 8) | (e >> 8);
+    }
+  }
+
+  // Finite - round mantissa to 15 bits
+  unsigned int i = ((e | m) + (m & 0x00000080)) >> 8;
+
+  if (i >= 0x7f8000) {
+    // Overflow - truncate instead of round
+    i = (e | m) >> 8;
+  }
+
+  return (s >> 8) | i;
+}
+
+static bool CompressPxr24(std::vector<unsigned char>& outBuf,
+                          const unsigned char *inPtr, size_t inLen,
+                          int data_width, int num_lines,
+                          size_t num_channels,
+                          const EXRChannelInfo *channels) {
+  // PXR24 stores HALF as 2 bytes, UINT as 4 bytes, FLOAT as 3 bytes (truncated)
+  // Data is stored with byte plane separation and delta encoding
+  size_t pxr24_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int ch_pixels = data_width * num_lines;
+
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 4;
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 2;
+    } else {  // FLOAT
+      pxr24_size += static_cast<size_t>(ch_pixels) * 3;
+    }
+  }
+
+  // Create PXR24 format data
+  std::vector<unsigned char> pxr24_buf(pxr24_size);
+  const unsigned char* in_p = inPtr;
+  unsigned char* out_p = pxr24_buf.data();
+
+  // PXR24 uses:
+  // 1. Byte plane separation: bytes are stored by plane (all high bytes, then next-high, etc.)
+  // 2. Delta encoding: each pixel is stored as difference from previous pixel
+  for (int line = 0; line < num_lines; line++) {
+    for (size_t c = 0; c < num_channels; c++) {
+      int w = data_width;
+
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        // UINT: 4 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        unsigned char* ptr2 = out_p + w * 2;
+        unsigned char* ptr3 = out_p + w * 3;
+        out_p += w * 4;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned int pixel;
+          memcpy(&pixel, in_p, 4);
+          in_p += 4;
+          unsigned int diff = pixel - prevPixel;
+          prevPixel = pixel;
+
+          ptr0[x] = static_cast<unsigned char>(diff >> 24);
+          ptr1[x] = static_cast<unsigned char>(diff >> 16);
+          ptr2[x] = static_cast<unsigned char>(diff >> 8);
+          ptr3[x] = static_cast<unsigned char>(diff);
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        // HALF: 2 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        out_p += w * 2;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned short h;
+          memcpy(&h, in_p, 2);
+          in_p += 2;
+          unsigned int pixel = h;
+          unsigned int diff = pixel - prevPixel;
+          prevPixel = pixel;
+
+          ptr0[x] = static_cast<unsigned char>(diff >> 8);
+          ptr1[x] = static_cast<unsigned char>(diff);
+        }
+      } else {  // FLOAT
+        // FLOAT: convert to 24-bit, then 3 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        unsigned char* ptr2 = out_p + w * 2;
+        out_p += w * 3;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          float f;
+          memcpy(&f, in_p, 4);
+          in_p += 4;
+          unsigned int pixel24 = float_to_float24(f);
+          unsigned int diff = pixel24 - prevPixel;
+          prevPixel = pixel24;
+
+          // Store as 24-bit diff (shifted to upper bits for proper reconstruction)
+          ptr0[x] = static_cast<unsigned char>(diff >> 16);
+          ptr1[x] = static_cast<unsigned char>(diff >> 8);
+          ptr2[x] = static_cast<unsigned char>(diff);
+        }
+      }
+    }
+  }
+
+  // Compress with zlib
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+  mz_ulong dest_len = mz_compressBound(static_cast<mz_ulong>(pxr24_size));
+  outBuf.resize(dest_len);
+  int ret = mz_compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast<mz_ulong>(pxr24_size));
+  if (ret != MZ_OK) {
+    return false;
+  }
+  outBuf.resize(static_cast<size_t>(dest_len));
+#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1)
+  int outSize;
+  unsigned char* ret = stbi_zlib_compress(pxr24_buf.data(), static_cast<int>(pxr24_size), &outSize, 8);
+  if (!ret) {
+    return false;
+  }
+  outBuf.assign(ret, ret + outSize);
+  free(ret);
+#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1)
+  int outSize;
+  unsigned char* ret = nanoz_compress(pxr24_buf.data(), pxr24_size, &outSize, 8);
+  if (!ret) {
+    return false;
+  }
+  outBuf.assign(ret, ret + outSize);
+  free(ret);
+#else
+  uLongf dest_len = compressBound(static_cast<uLong>(pxr24_size));
+  outBuf.resize(static_cast<size_t>(dest_len));
+  int ret = compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast<uLong>(pxr24_size));
+  if (ret != Z_OK) {
+    return false;
+  }
+  outBuf.resize(static_cast<size_t>(dest_len));
+#endif
+
+  return true;
+}
+
+// Overloaded version that takes ChannelInfo instead of EXRChannelInfo
+// Note: Uses requested_pixel_type which is the file format, not the input type
+static bool CompressPxr24(std::vector<unsigned char>& outBuf,
+                          const unsigned char *inPtr, size_t inLen,
+                          int data_width, int num_lines,
+                          size_t num_channels,
+                          const std::vector<ChannelInfo>& channels) {
+  // PXR24 stores HALF as 2 bytes, UINT as 4 bytes, FLOAT as 3 bytes (truncated)
+  // Data is stored with byte plane separation and delta encoding
+  // Use requested_pixel_type which is the actual format in the file
+  size_t pxr24_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int ch_pixels = data_width * num_lines;
+    int file_type = channels[c].requested_pixel_type;
+
+    if (file_type == TINYEXR_PIXELTYPE_UINT) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 4;
+    } else if (file_type == TINYEXR_PIXELTYPE_HALF) {
+      pxr24_size += static_cast<size_t>(ch_pixels) * 2;
+    } else {  // FLOAT
+      pxr24_size += static_cast<size_t>(ch_pixels) * 3;
+    }
+  }
+
+  // Create PXR24 format data
+  std::vector<unsigned char> pxr24_buf(pxr24_size);
+  const unsigned char* in_p = inPtr;
+  unsigned char* out_p = pxr24_buf.data();
+
+  // PXR24 uses:
+  // 1. Byte plane separation: bytes are stored by plane (all high bytes, then next-high, etc.)
+  // 2. Delta encoding: each pixel is stored as difference from previous pixel
+  for (int line = 0; line < num_lines; line++) {
+    for (size_t c = 0; c < num_channels; c++) {
+      int w = data_width;
+      int file_type = channels[c].requested_pixel_type;
+
+      if (file_type == TINYEXR_PIXELTYPE_UINT) {
+        // UINT: 4 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        unsigned char* ptr2 = out_p + w * 2;
+        unsigned char* ptr3 = out_p + w * 3;
+        out_p += w * 4;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned int pixel;
+          memcpy(&pixel, in_p, 4);
+          in_p += 4;
+          unsigned int diff = pixel - prevPixel;
+          prevPixel = pixel;
+
+          ptr0[x] = static_cast<unsigned char>(diff >> 24);
+          ptr1[x] = static_cast<unsigned char>(diff >> 16);
+          ptr2[x] = static_cast<unsigned char>(diff >> 8);
+          ptr3[x] = static_cast<unsigned char>(diff);
+        }
+      } else if (file_type == TINYEXR_PIXELTYPE_HALF) {
+        // HALF: 2 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        out_p += w * 2;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          unsigned short h;
+          memcpy(&h, in_p, 2);
+          in_p += 2;
+          unsigned int pixel = h;
+          unsigned int diff = pixel - prevPixel;
+          prevPixel = pixel;
+
+          ptr0[x] = static_cast<unsigned char>(diff >> 8);
+          ptr1[x] = static_cast<unsigned char>(diff);
+        }
+      } else {  // FLOAT
+        // FLOAT: convert to 24-bit, then 3 byte planes with delta encoding
+        unsigned char* ptr0 = out_p;
+        unsigned char* ptr1 = out_p + w;
+        unsigned char* ptr2 = out_p + w * 2;
+        out_p += w * 3;
+
+        unsigned int prevPixel = 0;
+        for (int x = 0; x < w; x++) {
+          float f;
+          memcpy(&f, in_p, 4);
+          in_p += 4;
+          unsigned int pixel24 = float_to_float24(f);
+          unsigned int diff = pixel24 - prevPixel;
+          prevPixel = pixel24;
+
+          // Store as 24-bit diff (shifted to upper bits for proper reconstruction)
+          ptr0[x] = static_cast<unsigned char>(diff >> 16);
+          ptr1[x] = static_cast<unsigned char>(diff >> 8);
+          ptr2[x] = static_cast<unsigned char>(diff);
+        }
+      }
+    }
+  }
+
+  // Compress with zlib
+#if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
+  mz_ulong dest_len = mz_compressBound(static_cast<mz_ulong>(pxr24_size));
+  outBuf.resize(dest_len);
+  int ret = mz_compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast<mz_ulong>(pxr24_size));
+  if (ret != MZ_OK) {
+    return false;
+  }
+  outBuf.resize(static_cast<size_t>(dest_len));
+#elif defined(TINYEXR_USE_STB_ZLIB) && (TINYEXR_USE_STB_ZLIB==1)
+  int outSize;
+  unsigned char* ret = stbi_zlib_compress(pxr24_buf.data(), static_cast<int>(pxr24_size), &outSize, 8);
+  if (!ret) {
+    return false;
+  }
+  outBuf.assign(ret, ret + outSize);
+  free(ret);
+#elif defined(TINYEXR_USE_NANOZLIB) && (TINYEXR_USE_NANOZLIB==1)
+  int outSize;
+  unsigned char* ret = nanoz_compress(pxr24_buf.data(), pxr24_size, &outSize, 8);
+  if (!ret) {
+    return false;
+  }
+  outBuf.assign(ret, ret + outSize);
+  free(ret);
+#else
+  uLongf dest_len = compressBound(static_cast<uLong>(pxr24_size));
+  outBuf.resize(static_cast<size_t>(dest_len));
+  int ret = compress(outBuf.data(), &dest_len, pxr24_buf.data(), static_cast<uLong>(pxr24_size));
+  if (ret != Z_OK) {
+    return false;
+  }
+  outBuf.resize(static_cast<size_t>(dest_len));
+#endif
+
+  return true;
+}
+
+// ============================================================================
+// B44/B44A compression (encoding)
+// ============================================================================
+
+// Shift and round for B44 pack (matches OpenEXR's shiftAndRound)
+static inline int B44ShiftAndRound(int x, int shift) {
+  // Compute y = x * pow(2, -shift), rounded to nearest integer
+  // In case of a tie, round to the even one
+  x <<= 1;
+  int a = (1 << shift) - 1;
+  shift += 1;
+  int b = (x >> shift) & 1;
+  return (x + a + b) >> shift;
+}
+
+// Pack a 4x4 block of HALF values into 14 bytes (matches OpenEXR's pack())
+// Returns the number of bytes written (14 for normal, 3 for flat if flatfields=true)
+static int PackB44Block(unsigned char* out, const unsigned short* block, bool flatfields, bool exactmax) {
+  int d[16];
+  int r[15];
+  int rMin, rMax;
+  unsigned short t[16];
+  unsigned short tMax;
+  int shift = -1;
+
+  const int bias = 0x20;
+
+  // Convert half-float values to ordered-magnitude representation
+  // This ensures that if t[i] > t[j], then half[i] > half[j] as floats
+  for (int i = 0; i < 16; ++i) {
+    if ((block[i] & 0x7c00) == 0x7c00) {
+      t[i] = 0x8000;  // NaN/Inf -> neutral value
+    } else if (block[i] & 0x8000) {
+      t[i] = ~block[i];  // Negative: invert all bits
+    } else {
+      t[i] = block[i] | 0x8000;  // Positive: set sign bit
+    }
+  }
+
+  // Find maximum t value
+  tMax = 0;
+  for (int i = 0; i < 16; ++i) {
+    if (tMax < t[i]) tMax = t[i];
+  }
+
+  // Compute running differences and find valid shift
+  do {
+    shift += 1;
+
+    // Compute absolute differences from tMax, shifted and rounded
+    for (int i = 0; i < 16; ++i) {
+      d[i] = B44ShiftAndRound(tMax - t[i], shift);
+    }
+
+    // Convert to running differences (specific pattern for B44)
+    r[0] = d[0] - d[4] + bias;
+    r[1] = d[4] - d[8] + bias;
+    r[2] = d[8] - d[12] + bias;
+
+    r[3] = d[0] - d[1] + bias;
+    r[4] = d[4] - d[5] + bias;
+    r[5] = d[8] - d[9] + bias;
+    r[6] = d[12] - d[13] + bias;
+
+    r[7]  = d[1] - d[2] + bias;
+    r[8]  = d[5] - d[6] + bias;
+    r[9]  = d[9] - d[10] + bias;
+    r[10] = d[13] - d[14] + bias;
+
+    r[11] = d[2] - d[3] + bias;
+    r[12] = d[6] - d[7] + bias;
+    r[13] = d[10] - d[11] + bias;
+    r[14] = d[14] - d[15] + bias;
+
+    rMin = r[0];
+    rMax = r[0];
+    for (int i = 1; i < 15; ++i) {
+      if (rMin > r[i]) rMin = r[i];
+      if (rMax < r[i]) rMax = r[i];
+    }
+  } while (rMin < 0 || rMax > 0x3f);
+
+  // Check for flat block (all pixels same value)
+  if (rMin == bias && rMax == bias && flatfields) {
+    // Encode as 3 bytes: t[0] and marker 0xfc
+    out[0] = static_cast<unsigned char>(t[0] >> 8);
+    out[1] = static_cast<unsigned char>(t[0]);
+    out[2] = 0xfc;  // Flat block marker (shift >= 13)
+    return 3;
+  }
+
+  if (exactmax) {
+    // Adjust t[0] so the max pixel is represented accurately
+    t[0] = tMax - static_cast<unsigned short>(d[0] << shift);
+  }
+
+  // Pack t[0], shift, and r[0]..r[14] into 14 bytes
+  out[0]  = static_cast<unsigned char>(t[0] >> 8);
+  out[1]  = static_cast<unsigned char>(t[0]);
+  out[2]  = static_cast<unsigned char>((shift << 2) | (r[0] >> 4));
+  out[3]  = static_cast<unsigned char>((r[0] << 4) | (r[1] >> 2));
+  out[4]  = static_cast<unsigned char>((r[1] << 6) | r[2]);
+  out[5]  = static_cast<unsigned char>((r[3] << 2) | (r[4] >> 4));
+  out[6]  = static_cast<unsigned char>((r[4] << 4) | (r[5] >> 2));
+  out[7]  = static_cast<unsigned char>((r[5] << 6) | r[6]);
+  out[8]  = static_cast<unsigned char>((r[7] << 2) | (r[8] >> 4));
+  out[9]  = static_cast<unsigned char>((r[8] << 4) | (r[9] >> 2));
+  out[10] = static_cast<unsigned char>((r[9] << 6) | r[10]);
+  out[11] = static_cast<unsigned char>((r[11] << 2) | (r[12] >> 4));
+  out[12] = static_cast<unsigned char>((r[12] << 4) | (r[13] >> 2));
+  out[13] = static_cast<unsigned char>((r[13] << 6) | r[14]);
+
+  return 14;
+}
+
+static bool CompressB44(std::vector<unsigned char>& outBuf,
+                        const unsigned char *inPtr, size_t inLen,
+                        int data_width, int num_lines,
+                        size_t num_channels,
+                        const EXRChannelInfo *channels,
+                        bool is_b44a) {
+  // Calculate number of 4x4 blocks
+  int num_blocks_x = (data_width + 3) / 4;
+  int num_blocks_y = (num_lines + 3) / 4;
+
+  // Estimate output size (14 bytes per block per HALF channel)
+  size_t max_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+      max_size += static_cast<size_t>(num_blocks_x) * num_blocks_y * 14;
+    } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT ||
+               channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+      max_size += static_cast<size_t>(data_width) * num_lines * 4;
+    } else {
+      max_size += static_cast<size_t>(data_width) * num_lines * 2;
+    }
+  }
+
+  outBuf.resize(max_size);
+  unsigned char* out_p = outBuf.data();
+
+  // Process each channel
+  size_t in_offset = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    if (channels[c].pixel_type != TINYEXR_PIXELTYPE_HALF) {
+      // Non-HALF channels are stored uncompressed
+      size_t ch_bytes = static_cast<size_t>(data_width) * num_lines;
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT ||
+          channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        ch_bytes *= 4;
+      } else {
+        ch_bytes *= 2;
+      }
+      memcpy(out_p, inPtr + in_offset, ch_bytes);
+      out_p += ch_bytes;
+      in_offset += ch_bytes;
+      continue;
+    }
+
+    // Process HALF channel in 4x4 blocks
+    const unsigned short* ch_ptr = reinterpret_cast<const unsigned short*>(inPtr + in_offset);
+
+    for (int by = 0; by < num_blocks_y; by++) {
+      for (int bx = 0; bx < num_blocks_x; bx++) {
+        unsigned short block[16];
+
+        // Gather block pixels with edge replication for padding
+        for (int dy = 0; dy < 4; dy++) {
+          int y = by * 4 + dy;
+          int src_y = (y >= num_lines) ? (num_lines - 1) : y;
+
+          for (int dx = 0; dx < 4; dx++) {
+            int x = bx * 4 + dx;
+            int src_x = (x >= data_width) ? (data_width - 1) : x;
+
+            block[dy * 4 + dx] = ch_ptr[src_y * data_width + src_x];
+          }
+        }
+
+        // Pack block - PackB44Block handles flat block detection internally
+        // flatfields = is_b44a, exactmax = true (for better accuracy)
+        int bytes_written = PackB44Block(out_p, block, is_b44a, true);
+        out_p += bytes_written;
+      }
+    }
+
+    in_offset += static_cast<size_t>(data_width) * num_lines * 2;
+  }
+
+  // Resize to actual size
+  outBuf.resize(static_cast<size_t>(out_p - outBuf.data()));
+
+  return true;
+}
+
+// Overloaded version that takes ChannelInfo instead of EXRChannelInfo
+// Note: Uses requested_pixel_type which is the file format, not the input type
+static bool CompressB44(std::vector<unsigned char>& outBuf,
+                        const unsigned char *inPtr, size_t inLen,
+                        int data_width, int num_lines,
+                        size_t num_channels,
+                        const std::vector<ChannelInfo>& channels,
+                        bool is_b44a) {
+  // Calculate number of 4x4 blocks
+  int num_blocks_x = (data_width + 3) / 4;
+  int num_blocks_y = (num_lines + 3) / 4;
+
+  // Estimate output size - use requested_pixel_type (file format)
+  size_t max_size = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int file_type = channels[c].requested_pixel_type;
+    if (file_type == TINYEXR_PIXELTYPE_HALF) {
+      max_size += static_cast<size_t>(num_blocks_x) * num_blocks_y * 14;
+    } else if (file_type == TINYEXR_PIXELTYPE_UINT ||
+               file_type == TINYEXR_PIXELTYPE_FLOAT) {
+      max_size += static_cast<size_t>(data_width) * num_lines * 4;
+    } else {
+      max_size += static_cast<size_t>(data_width) * num_lines * 2;
+    }
+  }
+
+  outBuf.resize(max_size);
+  unsigned char* out_p = outBuf.data();
+
+  size_t in_offset = 0;
+  for (size_t c = 0; c < num_channels; c++) {
+    int file_type = channels[c].requested_pixel_type;
+    if (file_type != TINYEXR_PIXELTYPE_HALF) {
+      size_t ch_bytes = static_cast<size_t>(data_width) * num_lines;
+      if (file_type == TINYEXR_PIXELTYPE_UINT ||
+          file_type == TINYEXR_PIXELTYPE_FLOAT) {
+        ch_bytes *= 4;
+      } else {
+        ch_bytes *= 2;
+      }
+      memcpy(out_p, inPtr + in_offset, ch_bytes);
+      out_p += ch_bytes;
+      in_offset += ch_bytes;
+      continue;
+    }
+
+    const unsigned short* ch_ptr = reinterpret_cast<const unsigned short*>(inPtr + in_offset);
+
+    for (int by = 0; by < num_blocks_y; by++) {
+      for (int bx = 0; bx < num_blocks_x; bx++) {
+        unsigned short block[16];
+
+        // Gather block pixels with edge replication for padding
+        for (int dy = 0; dy < 4; dy++) {
+          int y = by * 4 + dy;
+          int src_y = (y >= num_lines) ? (num_lines - 1) : y;
+
+          for (int dx = 0; dx < 4; dx++) {
+            int x = bx * 4 + dx;
+            int src_x = (x >= data_width) ? (data_width - 1) : x;
+
+            block[dy * 4 + dx] = ch_ptr[src_y * data_width + src_x];
+          }
+        }
+
+        // Pack block - PackB44Block handles flat block detection internally
+        int bytes_written = PackB44Block(out_p, block, is_b44a, true);
+        out_p += bytes_written;
+      }
+    }
+
+    in_offset += static_cast<size_t>(data_width) * num_lines * 2;
+  }
+
+  outBuf.resize(static_cast<size_t>(out_p - outBuf.data()));
+  return true;
+}
+
 #if TINYEXR_USE_ZFP
 
 struct ZFPCompressionParam {
@@ -4090,6 +5335,257 @@ static bool DecodePixelData(/* out */ unsigned char **out_images,
     (void)num_channels;
     return false;
 #endif
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    // PXR24 compression: Use true PXR24 decompression
+    // PXR24 truncates FLOAT to 24-bits, HALF/UINT pass through unchanged
+    std::vector<unsigned char> outBuf(static_cast<size_t>(width) *
+                                      static_cast<size_t>(num_lines) *
+                                      pixel_data_size);
+
+    if (!tinyexr::DecompressPxr24(
+            reinterpret_cast<unsigned char *>(&outBuf.at(0)), outBuf.size(),
+            data_ptr, static_cast<size_t>(data_len),
+            width, num_lines, static_cast<size_t>(num_channels), channels)) {
+      return false;
+    }
+
+    // Process decompressed data (same as ZIP path)
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
+              &outBuf.at(v * static_cast<size_t>(pixel_data_size) *
+                             static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            tinyexr::FP16 hf;
+            tinyexr::cpy2(&(hf.u), line_ptr + u);
+            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
+
+            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
+              unsigned short *image =
+                  reinterpret_cast<unsigned short **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = hf.u;
+            } else {  // HALF -> FLOAT
+              tinyexr::FP32 f32 = half_to_float(hf);
+              float *image = reinterpret_cast<float **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = f32.f;
+            }
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false);
+
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            unsigned int val;
+            tinyexr::cpy4(&val, line_ptr + u);
+            tinyexr::swap4(&val);
+
+            unsigned int *image =
+                reinterpret_cast<unsigned int **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false);
+        for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
+          const float *line_ptr = reinterpret_cast<float *>(
+              &outBuf.at(v * pixel_data_size * static_cast<size_t>(width) +
+                         channel_offset_list[c] * static_cast<size_t>(width)));
+          for (size_t u = 0; u < static_cast<size_t>(width); u++) {
+            float val;
+            tinyexr::cpy4(&val, line_ptr + u);
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            float *image = reinterpret_cast<float **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else {
+        return false;
+      }
+    }
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    // B44/B44A compression: Use true B44 block decompression
+    // B44 is a lossy block compression for HALF data (4x4 blocks -> 14 bytes)
+    bool is_b44a = (compression_type == TINYEXR_COMPRESSIONTYPE_B44A);
+
+    // Compute outBuf size matching DecompressB44's output layout: per-channel
+    // sequential data using subsampled dimensions for each channel.
+    size_t b44_out_size = 0;
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      int xs = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+      int ys = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+      size_t cw = static_cast<size_t>((width     + xs - 1) / xs);
+      size_t ch = static_cast<size_t>((num_lines + ys - 1) / ys);
+      size_t bpp = (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) ? 2u : 4u;
+      b44_out_size += cw * ch * bpp;
+    }
+
+    std::vector<unsigned char> outBuf(b44_out_size);
+
+    if (!tinyexr::DecompressB44(
+            reinterpret_cast<unsigned char *>(&outBuf.at(0)), outBuf.size(),
+            data_ptr, static_cast<size_t>(data_len),
+            width, num_lines, static_cast<size_t>(num_channels), channels,
+            is_b44a)) {
+      return false;
+    }
+
+    // Process decompressed data - B44 returns data organized per channel,
+    // using subsampled dimensions (ch_width/ch_height based on x/y_sampling).
+    // Accumulate ch_offset based on actual subsampled sizes of preceding
+    // channels to handle mixed channel types and subsampling correctly.
+    size_t ch_offset = 0;
+    for (size_t c = 0; c < static_cast<size_t>(num_channels); c++) {
+      int xs = channels[c].x_sampling > 0 ? channels[c].x_sampling : 1;
+      int ys = channels[c].y_sampling > 0 ? channels[c].y_sampling : 1;
+      size_t ch_width  = static_cast<size_t>((width     + xs - 1) / xs);
+      size_t ch_height = static_cast<size_t>((num_lines + ys - 1) / ys);
+      size_t ch_bytes = (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) ? 2 : 4;
+
+      if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) {
+        for (size_t v = 0; v < ch_height; v++) {
+          const unsigned short *line_ptr = reinterpret_cast<unsigned short *>(
+              &outBuf.at(ch_offset + v * ch_width * 2));
+          for (size_t u = 0; u < ch_width; u++) {
+            tinyexr::FP16 hf;
+            tinyexr::cpy2(&(hf.u), line_ptr + u);
+            // B44 stream stores data in little-endian order (same as the
+            // encoder's buf); reverse the byte swap the encoder applied.
+            tinyexr::swap2(reinterpret_cast<unsigned short *>(&hf.u));
+
+            if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) {
+              unsigned short *image =
+                  reinterpret_cast<unsigned short **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = hf.u;
+            } else {  // HALF -> FLOAT
+              tinyexr::FP32 f32 = half_to_float(hf);
+              float *image = reinterpret_cast<float **>(out_images)[c];
+              if (line_order == 0) {
+                image += (static_cast<size_t>(line_no) + v) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              } else {
+                image += (static_cast<size_t>(height) - 1U -
+                          (static_cast<size_t>(line_no) + v)) *
+                             static_cast<size_t>(x_stride) +
+                         u;
+              }
+              *image = f32.f;
+            }
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT, false);
+
+        for (size_t v = 0; v < ch_height; v++) {
+          const unsigned int *line_ptr = reinterpret_cast<unsigned int *>(
+              &outBuf.at(ch_offset + v * ch_width * 4));
+          for (size_t u = 0; u < ch_width; u++) {
+            unsigned int val;
+            tinyexr::cpy4(&val, line_ptr + u);
+            tinyexr::swap4(&val);
+
+            unsigned int *image =
+                reinterpret_cast<unsigned int **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) {
+        TINYEXR_CHECK_AND_RETURN_C(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT, false);
+        for (size_t v = 0; v < ch_height; v++) {
+          const float *line_ptr = reinterpret_cast<float *>(
+              &outBuf.at(ch_offset + v * ch_width * 4));
+          for (size_t u = 0; u < ch_width; u++) {
+            float val;
+            tinyexr::cpy4(&val, line_ptr + u);
+            tinyexr::swap4(reinterpret_cast<unsigned int *>(&val));
+
+            float *image = reinterpret_cast<float **>(out_images)[c];
+            if (line_order == 0) {
+              image += (static_cast<size_t>(line_no) + v) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            } else {
+              image += (static_cast<size_t>(height) - 1U -
+                        (static_cast<size_t>(line_no) + v)) *
+                           static_cast<size_t>(x_stride) +
+                       u;
+            }
+            *image = val;
+          }
+        }
+      } else {
+        return false;
+      }
+      ch_offset += ch_width * ch_height * ch_bytes;
+    }
   } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) {
     for (size_t c = 0; c < num_channels; c++) {
       for (size_t v = 0; v < static_cast<size_t>(num_lines); v++) {
@@ -4217,7 +5713,7 @@ static bool DecodePixelData(/* out */ unsigned char **out_images,
 static bool DecodeTiledPixelData(
     unsigned char **out_images, int *width, int *height,
     const int *requested_pixel_types, const unsigned char *data_ptr,
-    size_t data_len, int compression_type, int line_order, int data_width,
+    size_t data_len, int compression_type, int data_width,
     int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x,
     int tile_size_y, size_t pixel_data_size, size_t num_attributes,
     const EXRAttribute *attributes, size_t num_channels,
@@ -4243,8 +5739,9 @@ static bool DecodeTiledPixelData(
   }
 
   // Image size = tile size.
+  // Line order within tiles is always increasing.
   return DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len,
-                         compression_type, line_order, (*width), tile_size_y,
+                         compression_type, /* line_order*/ 0, (*width), tile_size_y,
                          /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0,
                          (*height), pixel_data_size, num_attributes, attributes,
                          num_channels, channels, channel_offset_list);
@@ -4514,6 +6011,13 @@ static int ParseEXRHeader(HeaderInfo *info, bool *empty_header,
 #endif
       }
 
+      // PXR24, B44, B44A compression types
+      if (data[0] == TINYEXR_COMPRESSIONTYPE_PXR24 ||
+          data[0] == TINYEXR_COMPRESSIONTYPE_B44 ||
+          data[0] == TINYEXR_COMPRESSIONTYPE_B44A) {
+        ok = true;
+      }
+
       if (!ok) {
         if (err) {
           (*err) = "Unknown compression type.";
@@ -4866,7 +6370,7 @@ static int LevelIndex(int lx, int ly, int tile_level_mode, int num_x_levels) {
   default:
     return -1;
   }
-//  return 0;
+  return 0;
 }
 
 static int LevelSize(int toplevel_size, int level, int tile_rounding_mode) {
@@ -4935,10 +6439,12 @@ static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header,
   std::atomic<int> tile_count(0);
 
   int num_threads = std::max(1, int(std::thread::hardware_concurrency()));
+#if (TINYEXR_MAX_THREADS > 0)
+  num_threads = std::min(num_threads,TINYEXR_MAX_THREADS);
+#endif
   if (num_threads > int(num_tiles)) {
     num_threads = int(num_tiles);
   }
-
   for (int t = 0; t < num_threads; t++) {
     workers.emplace_back(std::thread([&]()
       {
@@ -5017,7 +6523,6 @@ static int DecodeTiledLevel(EXRImage* exr_image, const EXRHeader* exr_header,
       &(exr_image->tiles[tile_idx].height),
       exr_header->requested_pixel_types, data_ptr,
       static_cast<size_t>(data_len), exr_header->compression_type,
-      exr_header->line_order,
       exr_image->width, exr_image->height,
       tile_coordinates[0], tile_coordinates[1], exr_header->tile_size_x,
       exr_header->tile_size_y, static_cast<size_t>(pixel_data_size),
@@ -5078,16 +6583,23 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header,
     num_scanline_blocks = 32;
   } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
     num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    num_scanline_blocks = 32;
+  }
 
 #if TINYEXR_USE_ZFP
+  if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
     tinyexr::ZFPCompressionParam zfp_compression_param;
     if (!FindZFPCompressionParam(&zfp_compression_param,
                                  exr_header->custom_attributes,
                                  int(exr_header->num_custom_attributes), err)) {
       return TINYEXR_ERROR_INVALID_HEADER;
     }
-#endif
   }
+#endif
 
   if (exr_header->data_window.max_x < exr_header->data_window.min_x ||
       exr_header->data_window.max_y < exr_header->data_window.min_y) {
@@ -5291,10 +6803,12 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header,
     std::atomic<int> y_count(0);
 
     int num_threads = std::max(1, int(std::thread::hardware_concurrency()));
+#if (TINYEXR_MAX_THREADS > 0)
+    num_threads = std::min(num_threads,TINYEXR_MAX_THREADS);
+#endif
     if (num_threads > int(num_blocks)) {
       num_threads = int(num_blocks);
     }
-
     for (int t = 0; t < num_threads; t++) {
       workers.emplace_back(std::thread([&]() {
         int y = 0;
@@ -5369,10 +6883,11 @@ static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header,
                 if (line_no < 0) {
                   invalid_data = true;
                 } else {
+                  // Line order is increasing because we read in line offset table order.
                   if (!tinyexr::DecodePixelData(
                           exr_image->images, exr_header->requested_pixel_types,
                           data_ptr, static_cast<size_t>(data_len),
-                          exr_header->compression_type, exr_header->line_order,
+                          exr_header->compression_type, /* line_order*/ 0,
                           int(data_width), int(data_height), int(data_width), y, line_no,
                           num_lines, static_cast<size_t>(pixel_data_size),
                           static_cast<size_t>(
@@ -5860,7 +7375,7 @@ static bool ReconstructTileOffsets(OffsetData& offset_data,
         if (size_t(tileX) >= offset_data.offsets[size_t(level_idx)][size_t(tileY)].size()) {
           return false;
         }
-
+        
         offset_data.offsets[size_t(level_idx)][size_t(tileY)][size_t(tileX)] = tileOffset;
       }
     }
@@ -5914,6 +7429,11 @@ static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header,
     num_scanline_blocks = 32;
   } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
     num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    num_scanline_blocks = 16;
+  } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    num_scanline_blocks = 32;
   }
 
   if (exr_header->data_window.max_x < exr_header->data_window.min_x ||
@@ -6187,18 +7707,15 @@ int EXRLayers(const char *filename, const char **layer_names[], int *num_layers,
 }
 
 int LoadEXR(float **out_rgba, int *width, int *height, const char *filename,
-            const char **err, int *num_chans) {
+            const char **err) {
   return LoadEXRWithLayer(out_rgba, width, height, filename,
-                          /* layername */ NULL, err, num_chans);
+                          /* layername */ NULL, err);
 }
 
 int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
                      const char *filename, const char *layername,
-                     const char **err, int *num_chans) {
-    if (num_chans)
-        *num_chans = 0;
-
-    if (out_rgba == NULL) {
+                     const char **err) {
+  if (out_rgba == NULL) {
     tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err);
     return TINYEXR_ERROR_INVALID_ARGUMENT;
   }
@@ -6293,9 +7810,6 @@ int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
   }
 
   if (channels.size() == 1) {
-    if (num_chans)
-      *num_chans = 1;
-
     int chIdx = int(channels.front().index);
     // Grayscale channel only.
 
@@ -6374,9 +7888,6 @@ int LoadEXRWithLayer(float **out_rgba, int *width, int *height,
       return TINYEXR_ERROR_INVALID_DATA;
     }
 
-    if (num_chans)
-        *num_chans = (idxA != -1) ? 4 : 3;
-
     (*out_rgba) = reinterpret_cast<float *>(
         malloc(4 * sizeof(float) * static_cast<size_t>(exr_image.width) *
                static_cast<size_t>(exr_image.height)));
@@ -6555,6 +8066,7 @@ int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
 
   ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err);
   if (ret != TINYEXR_SUCCESS) {
+    FreeEXRHeader(&exr_header);
     return ret;
   }
 
@@ -6568,6 +8080,8 @@ int LoadEXRFromMemory(float **out_rgba, int *width, int *height,
   InitEXRImage(&exr_image);
   ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err);
   if (ret != TINYEXR_SUCCESS) {
+    FreeEXRHeader(&exr_header);
+    FreeEXRImage(&exr_image);
     return ret;
   }
 
@@ -6871,7 +8385,7 @@ struct MemoryMappedFile {
     if (read_bytes != size) {
       // TODO: Try to read data until reading `size` bytes.
       fclose(fp);
-      size = 0;
+      size = 0; 
       data = nullptr;
       return;
     }
@@ -7133,7 +8647,7 @@ static bool EncodePixelData(/* out */ std::vector<unsigned char>& out_data,
   } else if ((compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) ||
     (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) {
 #if defined(TINYEXR_USE_MINIZ) && (TINYEXR_USE_MINIZ==1)
-    std::vector<unsigned char> block(buminiz::mz_compressBound(
+    std::vector<unsigned char> block(mz_compressBound(
       static_cast<unsigned long>(buf.size())));
 #elif TINYEXR_USE_STB_ZLIB
     // there is no compressBound() function, so we use a value that
@@ -7238,6 +8752,70 @@ static bool EncodePixelData(/* out */ std::vector<unsigned char>& out_data,
     (void)compression_param;
     return false;
 #endif
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    // PXR24 compression: True PXR24 (truncates FLOAT to 24-bits + zlib)
+    std::vector<unsigned char> block;
+
+    if (!tinyexr::CompressPxr24(block,
+                         reinterpret_cast<const unsigned char *>(&buf.at(0)),
+                         buf.size(), width, num_lines,
+                         channels.size(), channels)) {
+      if (err) {
+        (*err) += "PXR24 compression failed.\n";
+      }
+      return false;
+    }
+
+    out_data.insert(out_data.end(), block.begin(), block.end());
+
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    // B44/B44A compression: True B44 block compression for HALF data
+    bool is_b44a = (compression_type == TINYEXR_COMPRESSIONTYPE_B44A);
+    std::vector<unsigned char> block;
+
+    // CompressB44 expects per-channel sequential layout, but buf is
+    // scanline-interleaved: within each row, channels are stored contiguously
+    // (channel_offset_list[c] * width bytes into the row), and rows are
+    // stacked.  Convert to per-channel sequential before compressing.
+    std::vector<unsigned char> seq_buf(buf_size);
+    unsigned char *seq_p = seq_buf.data();
+    for (size_t c = 0; c < channels.size(); c++) {
+      int file_type = channels[c].requested_pixel_type;
+      // HALF is 2 bytes; FLOAT and UINT are both 4 bytes
+      size_t ch_size = (file_type == TINYEXR_PIXELTYPE_HALF) ? 2 : 4;
+      for (int y = 0; y < num_lines; y++) {
+        const unsigned char *src =
+            &buf[y * pixel_data_size * width + channel_offset_list[c] * width];
+        size_t row_bytes = static_cast<size_t>(width) * ch_size;
+        memcpy(seq_p, src, row_bytes);
+#if !TINYEXR_LITTLE_ENDIAN
+        // buf has already been byte-swapped to little-endian for file output.
+        // CompressB44 reads HALF values as host-endian unsigned shorts, so
+        // un-swap the bytes back to host-endian for correct B44 encoding.
+        if (file_type == TINYEXR_PIXELTYPE_HALF) {
+          unsigned short *p = reinterpret_cast<unsigned short *>(seq_p);
+          for (int x = 0; x < width; x++) {
+            tinyexr::swap2(p + x);
+          }
+        }
+#endif
+        seq_p += row_bytes;
+      }
+    }
+
+    if (!tinyexr::CompressB44(block,
+                         reinterpret_cast<const unsigned char *>(seq_buf.data()),
+                         seq_buf.size(), width, num_lines,
+                         channels.size(), channels, is_b44a)) {
+      if (err) {
+        (*err) += "B44 compression failed.\n";
+      }
+      return false;
+    }
+
+    out_data.insert(out_data.end(), block.begin(), block.end());
+
   } else {
     return false;
   }
@@ -7282,6 +8860,9 @@ static int EncodeTiledLevel(const EXRImage* level_image, const EXRHeader* exr_he
   std::atomic<int> tile_count(0);
 
   int num_threads = std::max(1, int(std::thread::hardware_concurrency()));
+#if (TINYEXR_MAX_THREADS > 0)
+  num_threads = std::min(num_threads,TINYEXR_MAX_THREADS);
+#endif
   if (num_threads > int(num_tiles)) {
     num_threads = int(num_tiles);
   }
@@ -7378,6 +8959,11 @@ static int NumScanlines(int compression_type) {
     num_scanlines = 32;
   } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) {
     num_scanlines = 16;
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_PXR24) {
+    num_scanlines = 16;  // PXR24 uses 16 scanlines per block (same as ZIP)
+  } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+             compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+    num_scanlines = 32;  // B44/B44A uses 32 scanlines per block
   }
   return num_scanlines;
 }
@@ -7531,7 +9117,9 @@ static int EncodeChunk(const EXRImage* exr_image, const EXRHeader* exr_header,
     std::atomic<int> block_count(0);
 
     int num_threads = std::min(std::max(1, int(std::thread::hardware_concurrency())), num_blocks);
-
+#if (TINYEXR_MAX_THREADS > 0)
+    num_threads = std::min(num_threads,TINYEXR_MAX_THREADS);
+#endif
     for (int t = 0; t < num_threads; t++) {
       workers.emplace_back(std::thread([&]() {
         int i = 0;
@@ -7708,7 +9296,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
         if (!PrecalculateTileInfo(num_x_tiles, num_y_tiles, exr_headers[i])) {
           SetErrorMessage("Failed to precalculate Tile info",
                           err);
-          return (size_t)TINYEXR_ERROR_INVALID_DATA;
+          return TINYEXR_ERROR_INVALID_DATA;
         }
         int ntiles = InitTileOffsets(offset_data[i], exr_headers[i], num_x_tiles, num_y_tiles);
         if (ntiles > 0) {
@@ -7716,8 +9304,8 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
         } else {
           SetErrorMessage("Failed to compute Tile offsets",
                           err);
-          return (size_t)TINYEXR_ERROR_INVALID_DATA;
-
+          return TINYEXR_ERROR_INVALID_DATA;
+          
         }
         total_chunk_count += chunk_count[i];
       }
@@ -7918,7 +9506,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
   // Allocating required memory
   if (total_size == 0) { // something went wrong
     tinyexr::SetErrorMessage("Output memory size is zero", err);
-    return (size_t)TINYEXR_ERROR_INVALID_DATA;
+    return TINYEXR_ERROR_INVALID_DATA;
   }
   (*memory_out) = static_cast<unsigned char*>(malloc(size_t(total_size)));
 
@@ -7939,7 +9527,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
           sum += num_bytes;
           if (sum > total_size) {
             tinyexr::SetErrorMessage("Invalid offset bytes in Tiled Part image.", err);
-            return (size_t)TINYEXR_ERROR_INVALID_DATA;
+            return TINYEXR_ERROR_INVALID_DATA;
           }
 
           memcpy(memory_ptr,
@@ -7954,7 +9542,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
       sum += num_bytes;
       if (sum > total_size) {
         tinyexr::SetErrorMessage("Invalid offset bytes in Part image.", err);
-        return (size_t)TINYEXR_ERROR_INVALID_DATA;
+        return TINYEXR_ERROR_INVALID_DATA;
       }
       std::vector<tinyexr::tinyexr_uint64>& offsets = offset_data[i].offsets[0][0];
       memcpy(memory_ptr, reinterpret_cast<unsigned char*>(&offsets[0]), num_bytes);
@@ -7969,7 +9557,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
         sum += 4;
         if (sum > total_size) {
           tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err);
-          return (size_t)TINYEXR_ERROR_INVALID_DATA;
+          return TINYEXR_ERROR_INVALID_DATA;
         }
         unsigned int part_number = i;
         swap4(&part_number);
@@ -7979,7 +9567,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
       sum += data_lists[i][j].size();
       if (sum > total_size) {
         tinyexr::SetErrorMessage("Buffer overrun in reading Part image chunk data.", err);
-        return (size_t)TINYEXR_ERROR_INVALID_DATA;
+        return TINYEXR_ERROR_INVALID_DATA;
       }
       memcpy(memory_ptr, &data_lists[i][j][0], data_lists[i][j].size());
       memory_ptr += data_lists[i][j].size();
@@ -7988,7 +9576,7 @@ static size_t SaveEXRNPartImageToMemory(const EXRImage* exr_images,
 
   if (sum != total_size) {
     tinyexr::SetErrorMessage("Corrupted Part image chunk data.", err);
-    return (size_t)TINYEXR_ERROR_INVALID_DATA;
+    return TINYEXR_ERROR_INVALID_DATA;
   }
 
   return size_t(total_size);  // OK
@@ -8080,7 +9668,7 @@ size_t SaveEXRMultipartImageToMemory(const EXRImage* exr_images,
                                      const EXRHeader** exr_headers,
                                      unsigned int num_parts,
                                      unsigned char** memory_out, const char** err) {
-  if (exr_images == NULL || exr_headers == NULL || num_parts < 2 ||
+  if (exr_images == NULL || exr_headers == NULL || num_parts == 0 ||
       memory_out == NULL) {
     tinyexr::SetErrorMessage("Invalid argument for SaveEXRNPartImageToMemory",
                               err);
@@ -8094,7 +9682,7 @@ int SaveEXRMultipartImageToFile(const EXRImage* exr_images,
                                 unsigned int num_parts,
                                 const char* filename,
                                 const char** err) {
-  if (exr_images == NULL || exr_headers == NULL || num_parts < 2) {
+  if (exr_images == NULL || exr_headers == NULL || num_parts == 0) {
     tinyexr::SetErrorMessage("Invalid argument for SaveEXRMultipartImageToFile",
                               err);
     return TINYEXR_ERROR_INVALID_ARGUMENT;
@@ -8226,7 +9814,7 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) {
 
     if (attr_name.compare("compression") == 0) {
       compression_type = data[0];
-      if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) {
+      if (compression_type > TINYEXR_COMPRESSIONTYPE_B44A) {
         std::stringstream ss;
         ss << "Unsupported compression type : " << compression_type;
         tinyexr::SetErrorMessage(ss.str(), err);
@@ -8235,6 +9823,9 @@ int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) {
 
       if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) {
         num_scanline_blocks = 16;
+      } else if (compression_type == TINYEXR_COMPRESSIONTYPE_B44 ||
+                 compression_type == TINYEXR_COMPRESSIONTYPE_B44A) {
+        num_scanline_blocks = 32;
       }
 
     } else if (attr_name.compare("channels") == 0) {
@@ -8583,20 +10174,7 @@ int EXRNumLevels(const EXRImage* exr_image) {
   if(exr_image->images) return 1; // scanlines
   int levels = 1;
   const EXRImage* level_image = exr_image;
-
-#if 0
-  while ((level_image = level_image->next_level))
-      ++levels;
-#else
-  for (; ;)
-  {
-      level_image = level_image->next_level;
-      if (!level_image)
-          break;
-      ++levels;
-  }
-#endif
-
+  while((level_image = level_image->next_level)) ++levels;
   return levels;
 }
 
@@ -8608,16 +10186,19 @@ int FreeEXRImage(EXRImage *exr_image) {
   if (exr_image->next_level) {
     FreeEXRImage(exr_image->next_level);
     delete exr_image->next_level;
+    exr_image->next_level = NULL;
   }
 
   for (int i = 0; i < exr_image->num_channels; i++) {
     if (exr_image->images && exr_image->images[i]) {
       free(exr_image->images[i]);
+      exr_image->images[i] = NULL;
     }
   }
 
   if (exr_image->images) {
     free(exr_image->images);
+    exr_image->images = NULL;
   }
 
   if (exr_image->tiles) {
@@ -8625,15 +10206,21 @@ int FreeEXRImage(EXRImage *exr_image) {
       for (int i = 0; i < exr_image->num_channels; i++) {
         if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) {
           free(exr_image->tiles[tid].images[i]);
+          exr_image->tiles[tid].images[i] = NULL;
         }
       }
       if (exr_image->tiles[tid].images) {
         free(exr_image->tiles[tid].images);
+        exr_image->tiles[tid].images = NULL;
       }
     }
     free(exr_image->tiles);
+    exr_image->tiles = NULL;
   }
 
+  exr_image->num_channels = 0;
+  exr_image->num_tiles = 0;
+
   return TINYEXR_SUCCESS;
 }
 
@@ -8787,6 +10374,71 @@ int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers,
       exr_headers, num_headers, exr_version, file.data, file.size, err);
 }
 
+// ========================================================================
+// Refactored loader functions using Reader class for safer memory access
+// ========================================================================
+
+namespace {  // anonymous namespace for internal helpers
+
+// Parse EXR version header using Reader class
+static int ParseEXRVersionWithReader(EXRVersion *version, tinyexr::Reader& reader) {
+  if (version == NULL) {
+    return TINYEXR_ERROR_INVALID_ARGUMENT;
+  }
+
+  if (reader.length() < tinyexr::kEXRVersionSize) {
+    reader.add_error("Insufficient data size for EXR version header");
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  // Check magic number: 0x76, 0x2f, 0x31, 0x01
+  uint8_t header[4];
+  if (!reader.read(4, header)) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  const uint8_t expected_header[] = {0x76, 0x2f, 0x31, 0x01};
+  if (header[0] != expected_header[0] || header[1] != expected_header[1] ||
+      header[2] != expected_header[2] || header[3] != expected_header[3]) {
+    reader.add_error("Invalid EXR magic number");
+    return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
+  }
+
+  // Parse version byte (must be 2)
+  uint8_t version_byte;
+  if (!reader.read1(&version_byte)) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  if (version_byte != 2) {
+    reader.add_error("Invalid EXR version");
+    return TINYEXR_ERROR_INVALID_EXR_VERSION;
+  }
+
+  version->version = 2;
+
+  // Parse flags byte
+  uint8_t flags;
+  if (!reader.read1(&flags)) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  version->tiled = (flags & 0x2) ? true : false;       // 9th bit
+  version->long_name = (flags & 0x4) ? true : false;   // 10th bit
+  version->non_image = (flags & 0x8) ? true : false;   // 11th bit (deep image)
+  version->multipart = (flags & 0x10) ? true : false;  // 12th bit
+
+  // Skip remaining 2 bytes to complete the 8-byte version header
+  uint8_t dummy[2];
+  if (!reader.read(2, dummy)) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+}  // anonymous namespace
+
 int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory,
                               size_t size) {
   if (version == NULL || memory == NULL) {
@@ -8797,51 +10449,13 @@ int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory,
     return TINYEXR_ERROR_INVALID_DATA;
   }
 
-  const unsigned char *marker = memory;
+  // Use Reader class for safer memory access
+  tinyexr::Reader reader(memory, size, tinyexr::Endian::Little);
+  int ret = ParseEXRVersionWithReader(version, reader);
 
-  // Header check.
-  {
-    const char header[] = {0x76, 0x2f, 0x31, 0x01};
-
-    if (memcmp(marker, header, 4) != 0) {
-      return TINYEXR_ERROR_INVALID_MAGIC_NUMBER;
-    }
-    marker += 4;
-  }
-
-  version->tiled = false;
-  version->long_name = false;
-  version->non_image = false;
-  version->multipart = false;
-
-  // Parse version header.
-  {
-    // must be 2
-    if (marker[0] != 2) {
-      return TINYEXR_ERROR_INVALID_EXR_VERSION;
-    }
-
-    if (version == NULL) {
-      return TINYEXR_SUCCESS;  // May OK
-    }
-
-    version->version = 2;
-
-    if (marker[1] & 0x2) {  // 9th bit
-      version->tiled = true;
-    }
-    if (marker[1] & 0x4) {  // 10th bit
-      version->long_name = true;
-    }
-    if (marker[1] & 0x8) {        // 11th bit
-      version->non_image = true;  // (deep image)
-    }
-    if (marker[1] & 0x10) {  // 12th bit
-      version->multipart = true;
-    }
-  }
-
-  return TINYEXR_SUCCESS;
+  // Note: errors are accumulated in reader.errors() but not propagated
+  // to maintain compatibility with existing API
+  return ret;
 }
 
 int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) {
@@ -9035,7 +10649,7 @@ int LoadEXRMultipartImageFromFile(EXRImage *exr_images,
 }
 
 int SaveEXRToMemory(const float *data, int width, int height, int components,
-            const int save_as_fp16, const unsigned char **outbuf, const char **err) {
+            const int save_as_fp16, unsigned char **outbuf, const char **err) {
 
   if ((components == 1) || components == 3 || components == 4) {
     // OK
@@ -9074,13 +10688,19 @@ int SaveEXRToMemory(const float *data, int width, int height, int components,
     images[3].resize(static_cast<size_t>(width * height));
 
     // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers
-    for (size_t i = 0; i < static_cast<size_t>(width * height); i++) {
-      images[0][i] = data[static_cast<size_t>(components) * i + 0];
-      images[1][i] = data[static_cast<size_t>(components) * i + 1];
-      images[2][i] = data[static_cast<size_t>(components) * i + 2];
-      if (components == 4) {
-        images[3][i] = data[static_cast<size_t>(components) * i + 3];
-      }
+    if (components == 4) {
+        for (size_t i = 0; i < static_cast<size_t>(width * height); i++) {
+          images[0][i] = data[static_cast<size_t>(components) * i + 0];
+          images[1][i] = data[static_cast<size_t>(components) * i + 1];
+          images[2][i] = data[static_cast<size_t>(components) * i + 2];
+          images[3][i] = data[static_cast<size_t>(components) * i + 3];
+        }
+    } else {
+       for (size_t i = 0; i < static_cast<size_t>(width * height); i++) {
+          images[0][i] = data[static_cast<size_t>(components) * i + 0];
+          images[1][i] = data[static_cast<size_t>(components) * i + 1];
+          images[2][i] = data[static_cast<size_t>(components) * i + 2];
+        }
     }
   }
 
@@ -9225,13 +10845,19 @@ int SaveEXR(const float *data, int width, int height, int components,
     images[3].resize(pixel_count);
 
     // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers
-    for (size_t i = 0; i < pixel_count; i++) {
-      images[0][i] = data[static_cast<size_t>(components) * i + 0];
-      images[1][i] = data[static_cast<size_t>(components) * i + 1];
-      images[2][i] = data[static_cast<size_t>(components) * i + 2];
-      if (components == 4) {
-        images[3][i] = data[static_cast<size_t>(components) * i + 3];
-      }
+    if (components == 4) {
+       for (size_t i = 0; i < pixel_count; i++) {
+         images[0][i] = data[static_cast<size_t>(components) * i + 0];
+         images[1][i] = data[static_cast<size_t>(components) * i + 1];
+         images[2][i] = data[static_cast<size_t>(components) * i + 2];
+         images[3][i] = data[static_cast<size_t>(components) * i + 3];
+       }
+    } else {
+       for (size_t i = 0; i < pixel_count; i++) {
+         images[0][i] = data[static_cast<size_t>(components) * i + 0];
+         images[1][i] = data[static_cast<size_t>(components) * i + 1];
+         images[2][i] = data[static_cast<size_t>(components) * i + 2];
+       }
     }
   }
 
@@ -9314,9 +10940,6 @@ int SaveEXR(const float *data, int width, int height, int components,
   }
 
   int ret = SaveEXRImageToFile(&image, &header, outfilename, err);
-  if (ret != TINYEXR_SUCCESS) {
-    return ret;
-  }
 
   free(header.channels);
   free(header.pixel_types);
@@ -9325,6 +10948,394 @@ int SaveEXR(const float *data, int width, int height, int components,
   return ret;
 }
 
+// ----------------------------------------------------------------
+// Spectral EXR API implementations
+// ----------------------------------------------------------------
+
+// Helper to format wavelength with comma as decimal separator (European convention)
+void EXRFormatWavelength(char *buffer, size_t buffer_size, float wavelength_nm) {
+  if (!buffer || buffer_size < 16) return;
+
+  // Format with 6 decimal places
+  int whole = static_cast<int>(wavelength_nm);
+  int frac = static_cast<int>((wavelength_nm - whole) * 1000000.0f + 0.5f);
+
+#ifdef _MSC_VER
+  sprintf_s(buffer, buffer_size, "%d,%06d", whole, frac);
+#else
+  snprintf(buffer, buffer_size, "%d,%06d", whole, frac);
+#endif
+}
+
+// Create spectral channel name for emissive spectrum
+void EXRSpectralChannelName(char *buffer, size_t buffer_size,
+                            float wavelength_nm, int stokes_component) {
+  if (!buffer || buffer_size < 32) return;
+
+  char wavelength_str[32];
+  EXRFormatWavelength(wavelength_str, sizeof(wavelength_str), wavelength_nm);
+
+#ifdef _MSC_VER
+  sprintf_s(buffer, buffer_size, "S%d.%snm", stokes_component, wavelength_str);
+#else
+  snprintf(buffer, buffer_size, "S%d.%snm", stokes_component, wavelength_str);
+#endif
+}
+
+// Create spectral channel name for reflective spectrum
+void EXRReflectiveChannelName(char *buffer, size_t buffer_size,
+                              float wavelength_nm) {
+  if (!buffer || buffer_size < 32) return;
+
+  char wavelength_str[32];
+  EXRFormatWavelength(wavelength_str, sizeof(wavelength_str), wavelength_nm);
+
+#ifdef _MSC_VER
+  sprintf_s(buffer, buffer_size, "T.%snm", wavelength_str);
+#else
+  snprintf(buffer, buffer_size, "T.%snm", wavelength_str);
+#endif
+}
+
+// Parse wavelength from spectral channel name
+float EXRParseSpectralChannelWavelength(const char *channel_name) {
+  if (!channel_name) return -1.0f;
+
+  const char *p = channel_name;
+
+  // Skip prefix: "S{n}." or "T."
+  if (*p == 'S' && p[1] >= '0' && p[1] <= '3' && p[2] == '.') {
+    p += 3;
+  } else if (*p == 'T' && p[1] == '.') {
+    p += 2;
+  } else {
+    return -1.0f;
+  }
+
+  // Parse wavelength with comma as decimal separator
+  // Format: "550,000000nm"
+  char wavelength_str[64];
+  size_t len = 0;
+  while (*p && *p != 'n' && len < sizeof(wavelength_str) - 1) {
+    wavelength_str[len++] = (*p == ',') ? '.' : *p;
+    p++;
+  }
+  wavelength_str[len] = '\0';
+
+  // Check for "nm" suffix
+  if (*p != 'n' || p[1] != 'm') {
+    return -1.0f;
+  }
+
+  return static_cast<float>(atof(wavelength_str));
+}
+
+// Get Stokes component from channel name
+int EXRGetStokesComponent(const char *channel_name) {
+  if (!channel_name) return -1;
+
+  if (channel_name[0] == 'S' &&
+      channel_name[1] >= '0' && channel_name[1] <= '3' &&
+      channel_name[2] == '.') {
+    return channel_name[1] - '0';
+  }
+
+  return -1;
+}
+
+// Check if channel name is a spectral channel
+int EXRIsSpectralChannel(const char *channel_name) {
+  if (!channel_name) return 0;
+
+  // Check for "S{n}.{wavelength}nm" pattern
+  if (channel_name[0] == 'S' &&
+      channel_name[1] >= '0' && channel_name[1] <= '3' &&
+      channel_name[2] == '.') {
+    return EXRParseSpectralChannelWavelength(channel_name) > 0.0f ? 1 : 0;
+  }
+
+  // Check for "T.{wavelength}nm" pattern
+  if (channel_name[0] == 'T' && channel_name[1] == '.') {
+    return EXRParseSpectralChannelWavelength(channel_name) > 0.0f ? 1 : 0;
+  }
+
+  return 0;
+}
+
+// Helper to find custom attribute by name
+static const EXRAttribute* FindCustomAttribute(const EXRHeader *exr_header,
+                                                const char *name) {
+  if (!exr_header || !name) return NULL;
+
+  for (int i = 0; i < exr_header->num_custom_attributes; i++) {
+    if (strcmp(exr_header->custom_attributes[i].name, name) == 0) {
+      return &exr_header->custom_attributes[i];
+    }
+  }
+  return NULL;
+}
+
+// Get spectrum type from EXR header
+int EXRGetSpectrumType(const EXRHeader *exr_header) {
+  if (!exr_header) return -1;
+
+  // Check for spectralLayoutVersion attribute
+  const EXRAttribute *layout_attr = FindCustomAttribute(exr_header, "spectralLayoutVersion");
+  if (!layout_attr) return -1;
+
+  // Check channel names to determine type
+  int has_stokes = 0;
+  int has_reflective = 0;
+  int has_emissive = 0;
+
+  for (int i = 0; i < exr_header->num_channels; i++) {
+    const char *name = exr_header->channels[i].name;
+    if (name[0] == 'T' && name[1] == '.') {
+      has_reflective = 1;
+    } else if (name[0] == 'S' && name[1] >= '0' && name[1] <= '3' && name[2] == '.') {
+      has_emissive = 1;
+      if (name[1] != '0') {
+        has_stokes = 1;
+      }
+    }
+  }
+
+  if (has_reflective) return TINYEXR_SPECTRUM_REFLECTIVE;
+  if (has_stokes) return TINYEXR_SPECTRUM_POLARISED;
+  if (has_emissive) return TINYEXR_SPECTRUM_EMISSIVE;
+
+  return -1;
+}
+
+// Get wavelengths from EXR header channels
+int EXRGetWavelengths(const EXRHeader *exr_header,
+                      float *wavelengths, int max_wavelengths) {
+  if (!exr_header || !wavelengths || max_wavelengths <= 0) return 0;
+
+  int count = 0;
+
+  for (int i = 0; i < exr_header->num_channels && count < max_wavelengths; i++) {
+    float wl = EXRParseSpectralChannelWavelength(exr_header->channels[i].name);
+    if (wl > 0.0f) {
+      // Check if wavelength already in list
+      int found = 0;
+      for (int j = 0; j < count; j++) {
+        if (std::fabs(static_cast<double>(wavelengths[j] - wl)) < 0.01) {
+          found = 1;
+          break;
+        }
+      }
+      if (!found) {
+        wavelengths[count++] = wl;
+      }
+    }
+  }
+
+  // Sort wavelengths
+  for (int i = 0; i < count - 1; i++) {
+    for (int j = i + 1; j < count; j++) {
+      if (wavelengths[i] > wavelengths[j]) {
+        float tmp = wavelengths[i];
+        wavelengths[i] = wavelengths[j];
+        wavelengths[j] = tmp;
+      }
+    }
+  }
+
+  return count;
+}
+
+// Get spectral units from EXR header
+const char* EXRGetSpectralUnits(const EXRHeader *exr_header) {
+  if (!exr_header) return NULL;
+
+  // Try ROOT/units first (spectral-exr format)
+  const EXRAttribute *attr = FindCustomAttribute(exr_header, "ROOT/units");
+  if (attr && attr->value && attr->size > 0) {
+    return reinterpret_cast<const char*>(attr->value);
+  }
+
+  // Try emissiveUnits
+  attr = FindCustomAttribute(exr_header, "emissiveUnits");
+  if (attr && attr->value && attr->size > 0) {
+    return reinterpret_cast<const char*>(attr->value);
+  }
+
+  return NULL;
+}
+
+// Helper to add a string attribute
+static int AddStringAttribute(EXRHeader *exr_header, const char *name, const char *value) {
+  if (!exr_header || !name || !value) return TINYEXR_ERROR_INVALID_ARGUMENT;
+
+  int new_count = exr_header->num_custom_attributes + 1;
+  if (new_count > TINYEXR_MAX_CUSTOM_ATTRIBUTES) {
+    return TINYEXR_ERROR_DATA_TOO_LARGE;
+  }
+
+  // Reallocate attributes array
+  EXRAttribute *new_attrs = static_cast<EXRAttribute*>(
+      realloc(exr_header->custom_attributes,
+              sizeof(EXRAttribute) * static_cast<size_t>(new_count)));
+  if (!new_attrs) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  exr_header->custom_attributes = new_attrs;
+  EXRAttribute *attr = &exr_header->custom_attributes[exr_header->num_custom_attributes];
+
+  // Initialize the new attribute
+  memset(attr, 0, sizeof(EXRAttribute));
+
+#ifdef _MSC_VER
+  strncpy_s(attr->name, sizeof(attr->name), name, 255);
+  strncpy_s(attr->type, sizeof(attr->type), "string", 255);
+#else
+  strncpy(attr->name, name, 255);
+  attr->name[255] = '\0';
+  strncpy(attr->type, "string", 255);
+  attr->type[255] = '\0';
+#endif
+
+  size_t value_len = strlen(value) + 1;  // Include null terminator
+  attr->value = static_cast<unsigned char*>(malloc(value_len));
+  if (!attr->value) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  memcpy(attr->value, value, value_len);
+  attr->size = static_cast<int>(value_len);
+
+  exr_header->num_custom_attributes = new_count;
+
+  return TINYEXR_SUCCESS;
+}
+
+// Helper to add an int attribute
+static int AddIntAttribute(EXRHeader *exr_header, const char *name, int value) {
+  if (!exr_header || !name) return TINYEXR_ERROR_INVALID_ARGUMENT;
+
+  int new_count = exr_header->num_custom_attributes + 1;
+  if (new_count > TINYEXR_MAX_CUSTOM_ATTRIBUTES) {
+    return TINYEXR_ERROR_DATA_TOO_LARGE;
+  }
+
+  // Reallocate attributes array
+  EXRAttribute *new_attrs = static_cast<EXRAttribute*>(
+      realloc(exr_header->custom_attributes,
+              sizeof(EXRAttribute) * static_cast<size_t>(new_count)));
+  if (!new_attrs) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+
+  exr_header->custom_attributes = new_attrs;
+  EXRAttribute *attr = &exr_header->custom_attributes[exr_header->num_custom_attributes];
+
+  // Initialize the new attribute
+  memset(attr, 0, sizeof(EXRAttribute));
+
+#ifdef _MSC_VER
+  strncpy_s(attr->name, sizeof(attr->name), name, 255);
+  strncpy_s(attr->type, sizeof(attr->type), "int", 255);
+#else
+  strncpy(attr->name, name, 255);
+  attr->name[255] = '\0';
+  strncpy(attr->type, "int", 255);
+  attr->type[255] = '\0';
+#endif
+
+  attr->value = static_cast<unsigned char*>(malloc(sizeof(int)));
+  if (!attr->value) {
+    return TINYEXR_ERROR_INVALID_DATA;
+  }
+  memcpy(attr->value, &value, sizeof(int));
+  attr->size = sizeof(int);
+
+  exr_header->num_custom_attributes = new_count;
+
+  return TINYEXR_SUCCESS;
+}
+
+// Set spectral attributes on EXR header
+int EXRSetSpectralAttributes(EXRHeader *exr_header,
+                             int spectrum_type,
+                             const char *units) {
+  if (!exr_header) return TINYEXR_ERROR_INVALID_ARGUMENT;
+
+  int ret;
+
+  // Add spectralLayoutVersion (always "1.0")
+  ret = AddStringAttribute(exr_header, "spectralLayoutVersion", "1.0");
+  if (ret != TINYEXR_SUCCESS) return ret;
+
+  // Add units attribute based on spectrum type
+  if (units && strlen(units) > 0) {
+    if (spectrum_type == TINYEXR_SPECTRUM_REFLECTIVE) {
+      ret = AddStringAttribute(exr_header, "ROOT/units", units);
+    } else {
+      ret = AddStringAttribute(exr_header, "emissiveUnits", units);
+    }
+    if (ret != TINYEXR_SUCCESS) return ret;
+  }
+
+  // Add polarisation handedness for polarised images
+  if (spectrum_type == TINYEXR_SPECTRUM_POLARISED) {
+    ret = AddStringAttribute(exr_header, "polarisationHandedness", "left");
+    if (ret != TINYEXR_SUCCESS) return ret;
+  }
+
+  return TINYEXR_SUCCESS;
+}
+
+// Check if file contains spectral data
+int IsSpectralEXR(const char *filename) {
+  EXRVersion version;
+  int ret = ParseEXRVersionFromFile(&version, filename);
+  if (ret != TINYEXR_SUCCESS) return ret;
+
+  const char *err = NULL;
+  EXRHeader header;
+  InitEXRHeader(&header);
+
+  ret = ParseEXRHeaderFromFile(&header, &version, filename, &err);
+  if (ret != TINYEXR_SUCCESS) {
+    if (err) FreeEXRErrorMessage(err);
+    return ret;
+  }
+
+  // Check for spectralLayoutVersion attribute
+  int is_spectral = (FindCustomAttribute(&header, "spectralLayoutVersion") != NULL);
+
+  FreeEXRHeader(&header);
+
+  return is_spectral ? TINYEXR_SUCCESS : TINYEXR_ERROR_INVALID_DATA;
+}
+
+// Check if memory contains spectral EXR data
+int IsSpectralEXRFromMemory(const unsigned char *memory, size_t size) {
+  if (!memory || size < 8) return TINYEXR_ERROR_INVALID_DATA;
+
+  EXRVersion version;
+  int ret = ParseEXRVersionFromMemory(&version, memory, size);
+  if (ret != TINYEXR_SUCCESS) return ret;
+
+  const char *err = NULL;
+  EXRHeader header;
+  InitEXRHeader(&header);
+
+  ret = ParseEXRHeaderFromMemory(&header, &version, memory, size, &err);
+  if (ret != TINYEXR_SUCCESS) {
+    if (err) FreeEXRErrorMessage(err);
+    return ret;
+  }
+
+  // Check for spectralLayoutVersion attribute
+  int is_spectral = (FindCustomAttribute(&header, "spectralLayoutVersion") != NULL);
+
+  FreeEXRHeader(&header);
+
+  return is_spectral ? TINYEXR_SUCCESS : TINYEXR_ERROR_INVALID_DATA;
+}
+
 #ifdef __clang__
 // zero-as-null-pointer-constant
 #pragma clang diagnostic pop
diff --git a/encoder/basisu_enc.cpp b/encoder/basisu_enc.cpp
index 83631b9..e92b4b8 100644
--- a/encoder/basisu_enc.cpp
+++ b/encoder/basisu_enc.cpp
@@ -28,9 +28,7 @@
 
 #include <vector>
 
-#ifndef TINYEXR_USE_ZFP
-#define TINYEXR_USE_ZFP (1)
-#endif
+#define TINYEXR_USE_MINIZ (0)
 #include "3rdparty/tinyexr.h"
 
 #ifndef MINIZ_HEADER_FILE_ONLY
@@ -3439,7 +3437,7 @@ namespace basisu
 			return false;
 		return write_vec_to_file(pFilename, file_data);
 	}
-		
+
 	bool read_exr(const char* pFilename, imagef& img, int& n_chans)
 	{
 		n_chans = 0;
@@ -3447,8 +3445,8 @@ namespace basisu
 		int width = 0, height = 0;
 		float* out_rgba = nullptr;
 		const char* err = nullptr;
-		
-		int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err, &n_chans);
+
+		int status = LoadEXRWithLayer(&out_rgba, &width, &height, pFilename, nullptr, &err);
 		if (status != 0)
 		{
 			error_printf("Failed loading .EXR image \"%s\"! (TinyEXR error: %s)\n", pFilename, err ? err : "?");
@@ -3457,7 +3455,7 @@ namespace basisu
 			return false;
 		}
 
-		const uint32_t MAX_SUPPORTED_DIM = 65536;
+		const uint32_t MAX_SUPPORTED_DIM = 32768;
 		if ((width < 1) || (height < 1) || (width > (int)MAX_SUPPORTED_DIM) || (height > (int)MAX_SUPPORTED_DIM))
 		{
 			error_printf("Invalid dimensions of .EXR image \"%s\"!\n", pFilename);
@@ -3466,32 +3464,60 @@ namespace basisu
 		}
 
 		img.resize(width, height);
+
+		memcpy((void*)img.get_ptr(), out_rgba, static_cast<size_t>(sizeof(float) * 4 * img.get_total_pixels()));
 		
-		if (n_chans == 1)
+		free(out_rgba);
+		out_rgba = nullptr;
+
+		uint32_t total_all_same_rgba = 0, total_all_same_rgb = 0, total_has_alpha = 0;
+
+		for (int y = 0; y < height; y++)
 		{
-			const float* pSrc = out_rgba;
-			vec4F* pDst = img.get_ptr();
-
-			for (int y = 0; y < height; y++)
+			for (int x = 0; x < width; x++)
 			{
-				for (int x = 0; x < width; x++)
-				{
-					(*pDst)[0] = pSrc[0];
-					(*pDst)[1] = pSrc[1];
-					(*pDst)[2] = pSrc[2];
-					(*pDst)[3] = 1.0f;
+				const vec4F& p = img(x, y);
 
-					pSrc += 4;
-					++pDst;
-				}
-			}
+				if ((p[0] == p[1]) && (p[0] == p[2]))
+					total_all_same_rgb++;
+
+				const float a = p[3];
+
+				if ((a == p[0]) && (a == p[1]) && (a == p[2]))
+					total_all_same_rgba++;
+								
+				if (a != 1.0f)
+					total_has_alpha++;
+
+			} // x
+		} // y
+
+		const uint32_t total_pixels = width * height;
+		if (total_all_same_rgba == total_pixels)
+		{
+			// TinyEXR loads single channel EXR images into all output channels (including alpha) - assume they are luminance and fix our alpha.
+			// Odds are this is an opaque luminance-only image, not a true alpha channel image. (As of early 2026 we don't support any HDR format with alpha, anyway.)
+			for (int y = 0; y < height; y++)
+				for (int x = 0; x < width; x++)
+					img(x, y)[3] = 1.0f;
+			
+			n_chans = 1;
+		}
+		else if (total_has_alpha)
+		{
+			n_chans = 4;
+		}
+		else if (total_all_same_rgb == total_pixels)
+		{
+			n_chans = 1;
 		}
 		else
 		{
-			memcpy((void *)img.get_ptr(), out_rgba, static_cast<size_t>(sizeof(float) * 4 * img.get_total_pixels()));
+			n_chans = 3;
 		}
 
-		free(out_rgba);
+		//fmt_printf("Number of detected EXR channels: {}\n", n_chans);
+				
 		return true;
 	}
 
@@ -3513,6 +3539,8 @@ namespace basisu
 		memcpy((void *)img.get_ptr(), out_rgba, width * height * sizeof(float) * 4);
 		free(out_rgba);
 
+		// TODO: detect luminance-only etc.
+
 		return true;
 	}
 
diff --git a/encoder/basisu_tinyexr.cpp b/encoder/basisu_tinyexr.cpp
index 9606914..6d3828a 100644
--- a/encoder/basisu_tinyexr.cpp
+++ b/encoder/basisu_tinyexr.cpp
@@ -5,34 +5,27 @@
 #endif
 #endif
 
+// Pull in our local fork of the miniz library. (Binomial wrote the original miniz library. Basisu was tested with this specific version.)
 #define MINIZ_HEADER_FILE_ONLY
 #define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
 #include "basisu_miniz.h"
 
-// Force tinyexr to use zlib-style compression API's, then we'll direct them to our own customized copy of miniz. (Binomial wrote the original miniz library.)
-// This allows us to use tinyexr.h without modify it at all, or relying on zlib.
+// A bit of a hack to force tinyexr to use plain zlib-style compression API's, then we'll direct them to our own customized copy of miniz with #define's.
+// This allows us to use tinyexr.h without modifying it at all, or relying on zlib, or pulling in a system-wide miniz dependency. 
+// This assumes tinyexr.h doesn't include zlib.h (it doesn't: "Please include your own zlib-compatible API header before...")
+// (Time will tell how fragile this is in reality.)
 #define TINYEXR_USE_MINIZ (0)
 
-enum { Z_OK = 0, Z_STREAM_END = 1, Z_NEED_DICT = 2, Z_ERRNO = -1, Z_STREAM_ERROR = -2, Z_DATA_ERROR = -3, Z_MEM_ERROR = -4, Z_BUF_ERROR = -5, Z_VERSION_ERROR = -6, Z_PARAM_ERROR = -10000 };
-typedef unsigned long uLongf;
-typedef unsigned long uLong;
+#define Z_OK buminiz::MZ_OK
+#define uLong buminiz::mz_ulong
+#define uLongf buminiz::mz_ulong
+
 typedef unsigned char Byte;
 typedef Byte Bytef;
 
-uLong compressBound(uLong src_size)
-{
-    return buminiz::mz_compressBound(src_size);
-}
-
-int compress(Bytef* dest, uLongf* destLen, const Bytef* source, uLong sourceLen)
-{
-    return buminiz::mz_compress(dest, destLen, source, sourceLen);
-}
-
-int uncompress(Bytef* dest, uLongf* destLen, const Bytef* source, uLong sourceLen)
-{
-    return buminiz::mz_uncompress(dest, destLen, source, sourceLen);
-}
+#define compressBound	buminiz::mz_compressBound
+#define compress		buminiz::mz_compress
+#define uncompress		buminiz::mz_uncompress
 
 #ifdef _MSC_VER
 #pragma warning (disable: 4060)
@@ -40,6 +33,7 @@ int uncompress(Bytef* dest, uLongf* destLen, const Bytef* source, uLong sourceLe
 #pragma warning (disable: 4245)
 #pragma warning (disable: 4505)
 #pragma warning (disable: 4702)
+#pragma warning (disable: 4530) // warning C4530: C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
 #endif
 
 #define TINYEXR_IMPLEMENTATION
diff --git a/encoder_lib/encoder_lib.vcxproj b/encoder_lib/encoder_lib.vcxproj
index 6318f21..9d4c1b9 100644
--- a/encoder_lib/encoder_lib.vcxproj
+++ b/encoder_lib/encoder_lib.vcxproj
@@ -28,11 +28,11 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\encoder\3rdparty\android_astc_decomp.cpp" />
-    <ClCompile Include="..\encoder\3rdparty\tinyexr.cpp" />
     <ClCompile Include="..\encoder\basisu_astc_hdr_6x6_enc.cpp" />
     <ClCompile Include="..\encoder\basisu_astc_hdr_common.cpp" />
     <ClCompile Include="..\encoder\basisu_astc_ldr_common.cpp" />
     <ClCompile Include="..\encoder\basisu_astc_ldr_encode.cpp" />
+    <ClCompile Include="..\encoder\basisu_tinyexr.cpp" />
     <ClCompile Include="..\encoder\basisu_uastc_hdr_4x4_enc.cpp" />
     <ClCompile Include="..\encoder\basisu_backend.cpp" />
     <ClCompile Include="..\encoder\basisu_basis_file.cpp" />
diff --git a/encoder_lib/encoder_lib.vcxproj.filters b/encoder_lib/encoder_lib.vcxproj.filters
index 80e8fd0..8d0ad48 100644
--- a/encoder_lib/encoder_lib.vcxproj.filters
+++ b/encoder_lib/encoder_lib.vcxproj.filters
@@ -84,9 +84,6 @@
     <ClCompile Include="..\encoder\pvpngreader.cpp">
       <Filter>Source Files\encoder</Filter>
     </ClCompile>
-    <ClCompile Include="..\encoder\3rdparty\tinyexr.cpp">
-      <Filter>Source Files\encoder\3rdparty</Filter>
-    </ClCompile>
     <ClCompile Include="..\zstd\zstd.c">
       <Filter>Source Files\encoder\3rdparty</Filter>
     </ClCompile>
@@ -105,6 +102,9 @@
     <ClCompile Include="..\encoder\basisu_astc_ldr_encode.cpp">
       <Filter>Source Files\encoder</Filter>
     </ClCompile>
+    <ClCompile Include="..\encoder\basisu_tinyexr.cpp">
+      <Filter>Source Files\encoder</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\transcoder\basisu_astc_helpers.h">
diff --git a/webgl/encoder/CMakeLists.txt b/webgl/encoder/CMakeLists.txt
index 91dcd3d..8027b8e 100644
--- a/webgl/encoder/CMakeLists.txt
+++ b/webgl/encoder/CMakeLists.txt
@@ -67,7 +67,7 @@ if(EMSCRIPTEN)
     ../../encoder/basisu_astc_hdr_common.cpp
     ../../encoder/basisu_astc_ldr_common.cpp
     ../../encoder/basisu_astc_ldr_encode.cpp
-    ../../encoder/3rdparty/tinyexr.cpp
+    ../../encoder/basisu_tinyexr.cpp
   )
   if(KTX2_ZSTANDARD)
     list(APPEND SRC_LIST ../../zstd/zstd.c)