mirror of
https://github.com/BinomialLLC/basis_universal.git
synced 2026-06-08 08:33:53 +00:00
This update impacts the UASTC HDR 4x4, UASTC HDR 6x6i, and XUASTC LDR formats. New KTX2 files in these specific formats using v2.1 can't be loaded using v2.0, however we've kept backwards compatibility with all of our previously written KTX2 files. (v2.1 can transcode all previously written files.) This is part of our effort to be compatible with Khronos's specification. bc6hf encoder's smooth block path updated to handle inputs with texels close to MAX_HALF_FLOAT. Adding command line options to basisu tool so it can write either v2.0 or v1.6 compatible UASTC HDR 6x6i files. The default is v1.6. Updating WebGL KTX2 testbed so on ASTC devices it automatically switches between sRGB or linear internal formats for the created texture.
4854 lines
144 KiB
C++
4854 lines
144 KiB
C++
// basisu_astc_helpers.h
|
|
// Be sure to define ASTC_HELPERS_IMPLEMENTATION somewhere to get the implementation, otherwise you only get the header.
|
|
#ifndef BASISU_ASTC_HELPERS_HEADER
|
|
#define BASISU_ASTC_HELPERS_HEADER
|
|
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <math.h>
|
|
#include <fenv.h>
|
|
|
|
namespace astc_helpers
|
|
{
|
|
const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid
|
|
const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels
|
|
const uint32_t MAX_BLOCK_PIXELS = MAX_BLOCK_DIM * MAX_BLOCK_DIM;
|
|
const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values
|
|
const uint32_t MAX_CEM_ENDPOINT_VALS = 8; // see Table 94. ASTC LDR/HDR color endpoint modes (max 8 values to encode any CEM, minimum 2)
|
|
|
|
// The number of BISE values needed to encode endpoints for each CEM.
|
|
const uint32_t NUM_MODE0_ENDPOINTS = 2, NUM_MODE4_ENDPOINTS = 4;
|
|
const uint32_t NUM_MODE6_ENDPOINTS = 4, NUM_MODE8_ENDPOINTS = 6, NUM_MODE9_ENDPOINTS = 6; // LDR RGB
|
|
const uint32_t NUM_MODE10_ENDPOINTS = 6, NUM_MODE12_ENDPOINTS = 8, NUM_MODE13_ENDPOINTS = 8; // LDR RGBA
|
|
const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4; // hdr
|
|
|
|
const uint32_t MAX_WEIGHTS = 32; // max supported # of weights (or "selectors") in any mode, i.e. the max # of colors per endpoint pair
|
|
const uint32_t MAX_WEIGHT_INTERPOLANT_VALUE = 64; // grid texel weights must range from [0,64], i.e. the weight interpolant range is [0,64]
|
|
|
|
// 14 unique block dimensions supported by ASTC
|
|
static const uint32_t NUM_ASTC_BLOCK_SIZES = 14;
|
|
extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2];
|
|
|
|
// The Color Endpoint Modes (CEM's)
|
|
enum cems
|
|
{
|
|
CEM_LDR_LUM_DIRECT = 0,
|
|
CEM_LDR_LUM_BASE_PLUS_OFS = 1,
|
|
CEM_HDR_LUM_LARGE_RANGE = 2,
|
|
CEM_HDR_LUM_SMALL_RANGE = 3,
|
|
CEM_LDR_LUM_ALPHA_DIRECT = 4,
|
|
CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS = 5,
|
|
CEM_LDR_RGB_BASE_SCALE = 6,
|
|
CEM_HDR_RGB_BASE_SCALE = 7,
|
|
CEM_LDR_RGB_DIRECT = 8,
|
|
CEM_LDR_RGB_BASE_PLUS_OFFSET = 9,
|
|
CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A = 10,
|
|
CEM_HDR_RGB = 11,
|
|
CEM_LDR_RGBA_DIRECT = 12,
|
|
CEM_LDR_RGBA_BASE_PLUS_OFFSET = 13,
|
|
CEM_HDR_RGB_LDR_ALPHA = 14,
|
|
CEM_HDR_RGB_HDR_ALPHA = 15
|
|
};
|
|
|
|
// All Bounded Integer Sequence Coding (BISE or ISE) ranges.
|
|
// Weights: Ranges [0,11] are valid.
|
|
// Endpoints: Ranges [4,20] are valid.
|
|
enum bise_levels
|
|
{
|
|
BISE_2_LEVELS = 0,
|
|
BISE_3_LEVELS = 1,
|
|
BISE_4_LEVELS = 2,
|
|
BISE_5_LEVELS = 3,
|
|
BISE_6_LEVELS = 4,
|
|
BISE_8_LEVELS = 5,
|
|
BISE_10_LEVELS = 6,
|
|
BISE_12_LEVELS = 7,
|
|
BISE_16_LEVELS = 8,
|
|
BISE_20_LEVELS = 9,
|
|
BISE_24_LEVELS = 10,
|
|
BISE_32_LEVELS = 11,
|
|
BISE_40_LEVELS = 12,
|
|
BISE_48_LEVELS = 13,
|
|
BISE_64_LEVELS = 14,
|
|
BISE_80_LEVELS = 15,
|
|
BISE_96_LEVELS = 16,
|
|
BISE_128_LEVELS = 17,
|
|
BISE_160_LEVELS = 18,
|
|
BISE_192_LEVELS = 19,
|
|
BISE_256_LEVELS = 20
|
|
};
|
|
|
|
const uint32_t TOTAL_ISE_RANGES = 21;
|
|
|
|
enum
|
|
{
|
|
cBLOCK_SIZE_4x4 = 0, // 16 samples
|
|
cBLOCK_SIZE_5x4 = 1, // 20 samples
|
|
cBLOCK_SIZE_5x5 = 2, // 25 samples
|
|
cBLOCK_SIZE_6x5 = 3, // 30 samples
|
|
|
|
cBLOCK_SIZE_6x6 = 4, // 36 samples
|
|
cBLOCK_SIZE_8x5 = 5, // 40 samples
|
|
cBLOCK_SIZE_8x6 = 6, // 48 samples
|
|
cBLOCK_SIZE_10x5 = 7, // 50 samples
|
|
|
|
cBLOCK_SIZE_10x6 = 8, // 60 samples
|
|
cBLOCK_SIZE_8x8 = 9, // 64 samples
|
|
cBLOCK_SIZE_10x8 = 10, // 80 samples
|
|
cBLOCK_SIZE_10x10 = 11, // 100 samples
|
|
|
|
cBLOCK_SIZE_12x10 = 12, // 120 samples
|
|
cBLOCK_SIZE_12x12 = 13, // 144 samples
|
|
|
|
cTOTAL_BLOCK_SIZES = 14
|
|
};
|
|
|
|
// Valid endpoint ISE ranges
|
|
const uint32_t FIRST_VALID_ENDPOINT_ISE_RANGE = BISE_6_LEVELS; // 4
|
|
const uint32_t LAST_VALID_ENDPOINT_ISE_RANGE = BISE_256_LEVELS; // 20
|
|
const uint32_t TOTAL_ENDPOINT_ISE_RANGES = LAST_VALID_ENDPOINT_ISE_RANGE - FIRST_VALID_ENDPOINT_ISE_RANGE + 1;
|
|
|
|
// Valid weight ISE ranges
|
|
const uint32_t FIRST_VALID_WEIGHT_ISE_RANGE = BISE_2_LEVELS; // 0
|
|
const uint32_t LAST_VALID_WEIGHT_ISE_RANGE = BISE_32_LEVELS; // 11
|
|
const uint32_t TOTAL_WEIGHT_ISE_RANGES = LAST_VALID_WEIGHT_ISE_RANGE - FIRST_VALID_WEIGHT_ISE_RANGE + 1;
|
|
|
|
// The ISE range table.
|
|
extern const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3]; // 0=bits (0 to 8), 1=trits (0 or 1), 2=quints (0 or 1)
|
|
|
|
// Possible Color Component Select values, used in dual plane mode.
|
|
// The CCS component will be interpolated using the 2nd weight plane.
|
|
enum ccs
|
|
{
|
|
CCS_GBA_R = 0,
|
|
CCS_RBA_G = 1,
|
|
CCS_RGA_B = 2,
|
|
CCS_RGB_A = 3
|
|
};
|
|
|
|
struct astc_block
|
|
{
|
|
uint32_t m_vals[4];
|
|
};
|
|
|
|
const uint32_t MAX_PARTITIONS = 4; // Max # of partitions or subsets for single plane mode
|
|
const uint32_t MAX_DUAL_PLANE_PARTITIONS = 3; // Max # of partitions or subsets for dual plane mode
|
|
const uint32_t NUM_PARTITION_PATTERNS = 1024; // Total # of partition pattern seeds (10-bits)
|
|
const uint32_t MAX_ENDPOINTS = 18; // Maximum # of endpoint values in a block
|
|
|
|
struct log_astc_block
|
|
{
|
|
bool m_error_flag;
|
|
|
|
bool m_solid_color_flag_ldr, m_solid_color_flag_hdr;
|
|
|
|
uint8_t m_user_mode; // user defined value, not used in this module
|
|
|
|
// Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr
|
|
uint8_t m_grid_width, m_grid_height; // weight grid dimensions, not the dimension of the block
|
|
|
|
bool m_dual_plane;
|
|
|
|
uint8_t m_weight_ise_range; // 0-11
|
|
uint8_t m_endpoint_ise_range; // 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking
|
|
|
|
uint8_t m_color_component_selector; // 0-3, controls which channel uses the 2nd (odd) weights, only used in dual plane mode
|
|
|
|
uint8_t m_num_partitions; // or the # of subsets, 1-4 (1-3 if dual plane mode)
|
|
uint16_t m_partition_id; // 10-bits, must be 0 if m_num_partitions==1
|
|
|
|
uint8_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's
|
|
|
|
union
|
|
{
|
|
// ISE weight grid values. In dual plane mode, the order is p0,p1, p0,p1, etc.
|
|
uint8_t m_weights[MAX_GRID_WEIGHTS];
|
|
uint16_t m_solid_color[4];
|
|
};
|
|
|
|
// ISE endpoint values
|
|
// Endpoint order examples:
|
|
// 1 subset LA : LL0 LH0 AL0 AH0
|
|
// 1 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0
|
|
// 1 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0
|
|
// 2 subset LA : LL0 LH0 AL0 AH0 LL1 LH1 AL1 AH1
|
|
// 2 subset RGB : RL0 RH0 GL0 GH0 BL0 BH0 RL1 RH1 GL1 GH1 BL1 BH1
|
|
// 2 subset RGBA : RL0 RH0 GL0 GH0 BL0 BH0 AL0 AH0 RL1 RH1 GL1 GH1 BL1 BH1 AL1 AH1
|
|
uint8_t m_endpoints[MAX_ENDPOINTS];
|
|
|
|
void clear()
|
|
{
|
|
memset(this, 0, sizeof(*this));
|
|
}
|
|
};
|
|
|
|
// Open interval
|
|
inline int bounds_check(int v, int l, int h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
|
|
inline uint32_t bounds_check(uint32_t v, uint32_t l, uint32_t h) { (void)v; (void)l; (void)h; assert(v >= l && v < h); return v; }
|
|
|
|
inline uint32_t get_bits(uint32_t val, int low, int high)
|
|
{
|
|
const int num_bits = (high - low) + 1;
|
|
assert((num_bits >= 1) && (num_bits <= 32));
|
|
|
|
val >>= low;
|
|
if (num_bits != 32)
|
|
val &= ((1u << num_bits) - 1);
|
|
|
|
return val;
|
|
}
|
|
|
|
// Returns the number of levels in the given ISE range.
|
|
inline uint32_t get_ise_levels(uint32_t ise_range)
|
|
{
|
|
assert(ise_range < TOTAL_ISE_RANGES);
|
|
return (1 + 2 * g_ise_range_table[ise_range][1] + 4 * g_ise_range_table[ise_range][2]) << g_ise_range_table[ise_range][0];
|
|
}
|
|
|
|
inline int get_ise_sequence_bits(int count, int range)
|
|
{
|
|
// See 18.22 Data Size Determination - note this will be <= the # of bits actually written by encode_bise(). (It's magic.)
|
|
int total_bits = g_ise_range_table[range][0] * count;
|
|
total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5;
|
|
total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3;
|
|
return total_bits;
|
|
}
|
|
|
|
inline uint32_t weight_interpolate(uint32_t l, uint32_t h, uint32_t w)
|
|
{
|
|
assert(w <= MAX_WEIGHT_INTERPOLANT_VALUE);
|
|
return (l * (64 - w) + h * w + 32) >> 6;
|
|
}
|
|
|
|
void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats = nullptr);
|
|
|
|
struct pack_stats
|
|
{
|
|
uint32_t m_header_bits;
|
|
uint32_t m_endpoint_bits;
|
|
uint32_t m_weight_bits;
|
|
|
|
inline pack_stats() { clear(); }
|
|
inline void clear() { memset(this, 0, sizeof(*this)); }
|
|
};
|
|
|
|
enum
|
|
{
|
|
cValidateEarlyOutAtEndpointISEChecks = 1,
|
|
cValidateSkipFinalEndpointWeightPacking = 2,
|
|
};
|
|
|
|
// Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions.
|
|
bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr, pack_stats *pStats = nullptr, uint32_t validate_flags = 0);
|
|
|
|
// Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component.
|
|
void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a, pack_stats *pStats = nullptr);
|
|
|
|
// Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's)
|
|
void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats = nullptr);
|
|
|
|
// These helpers are all quite slow, but are useful for table preparation.
|
|
|
|
// Dequantizes ISE encoded endpoint val to [0,255]
|
|
uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range); // ISE ranges 4-11
|
|
|
|
// Dequantizes ISE encoded weight val to [0,64]
|
|
uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range); // ISE ranges 0-10
|
|
|
|
uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range);
|
|
uint32_t find_nearest_bise_weight(int v, uint32_t ise_range);
|
|
|
|
void create_quant_tables(
|
|
uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
|
|
uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
|
|
uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
|
|
uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
|
|
uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
|
|
bool weight_flag); // false if block endpoints, true if weights
|
|
|
|
// True if the CEM is LDR.
|
|
bool is_cem_ldr(uint32_t mode);
|
|
inline bool is_cem_hdr(uint32_t mode) { return !is_cem_ldr(mode); }
|
|
|
|
bool does_cem_have_alpha(uint32_t mode);
|
|
|
|
// True if the passed in dimensions are a valid ASTC block size. There are 14 supported configs, from 4x4 (8bpp) to 12x12 (.89bpp).
|
|
bool is_valid_block_size(uint32_t w, uint32_t h);
|
|
|
|
// w/h must be a valid ASTC block size, or it returns cBLOCK_SIZE_4x4
|
|
uint32_t get_block_size_index(uint32_t w, uint32_t h);
|
|
|
|
float get_bitrate_from_block_size(uint32_t w, uint32_t h);
|
|
|
|
uint32_t get_texel_partition_from_table(uint32_t block_width, uint32_t block_height, uint32_t seed, uint32_t subsets, uint32_t x, uint32_t y);
|
|
|
|
bool block_has_any_hdr_cems(const log_astc_block& log_blk);
|
|
bool block_has_any_ldr_cems(const log_astc_block& log_blk);
|
|
|
|
// Returns the # of endpoint values for the given CEM.
|
|
inline uint32_t get_num_cem_values(uint32_t cem) { assert(cem <= 15); return 2 + 2 * (cem >> 2); }
|
|
|
|
struct dequant_table
|
|
{
|
|
basisu::vector<uint8_t> m_val_to_ise; // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
|
|
basisu::vector<uint8_t> m_ISE_to_val; // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
|
|
basisu::vector<uint8_t> m_ISE_to_rank; // returns the level rank index given an ISE symbol, [levels]
|
|
basisu::vector<uint8_t> m_rank_to_ISE; // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
|
|
|
|
void init(bool weight_flag, uint32_t num_levels)
|
|
{
|
|
m_val_to_ise.resize(weight_flag ? (MAX_WEIGHT_INTERPOLANT_VALUE + 1) : 256);
|
|
m_ISE_to_val.resize(num_levels);
|
|
m_ISE_to_rank.resize(num_levels);
|
|
m_rank_to_ISE.resize(num_levels);
|
|
}
|
|
|
|
uint32_t get_rank_to_val(uint32_t rank) const
|
|
{
|
|
const uint32_t ise = m_rank_to_ISE[rank];
|
|
const uint32_t val = m_ISE_to_val[ise];
|
|
return val;
|
|
}
|
|
|
|
uint32_t get_val_to_rank(uint32_t val)
|
|
{
|
|
const uint32_t ise = m_val_to_ise[val];
|
|
const uint32_t rank = m_ISE_to_rank[ise];
|
|
return rank;
|
|
}
|
|
};
|
|
|
|
struct dequant_tables
|
|
{
|
|
dequant_table m_weights[TOTAL_WEIGHT_ISE_RANGES];
|
|
dequant_table m_endpoints[TOTAL_ENDPOINT_ISE_RANGES];
|
|
bool m_initialized_flag = false;
|
|
|
|
const dequant_table& get_weight_tab(uint32_t range) const
|
|
{
|
|
assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
|
|
return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
|
|
}
|
|
|
|
dequant_table& get_weight_tab(uint32_t range)
|
|
{
|
|
assert((range >= FIRST_VALID_WEIGHT_ISE_RANGE) && (range <= LAST_VALID_WEIGHT_ISE_RANGE));
|
|
return m_weights[range - FIRST_VALID_WEIGHT_ISE_RANGE];
|
|
}
|
|
|
|
const dequant_table& get_endpoint_tab(uint32_t range) const
|
|
{
|
|
assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
|
|
return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
|
|
}
|
|
|
|
dequant_table& get_endpoint_tab(uint32_t range)
|
|
{
|
|
assert((range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (range <= LAST_VALID_ENDPOINT_ISE_RANGE));
|
|
return m_endpoints[range - FIRST_VALID_ENDPOINT_ISE_RANGE];
|
|
}
|
|
|
|
void init()
|
|
{
|
|
if (m_initialized_flag)
|
|
return;
|
|
|
|
for (uint32_t range = FIRST_VALID_WEIGHT_ISE_RANGE; range <= LAST_VALID_WEIGHT_ISE_RANGE; range++)
|
|
{
|
|
const uint32_t num_levels = get_ise_levels(range);
|
|
dequant_table& tab = get_weight_tab(range);
|
|
|
|
tab.init(true, num_levels);
|
|
|
|
create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), tab.m_ISE_to_rank.data(), tab.m_rank_to_ISE.data(), range, true);
|
|
}
|
|
|
|
for (uint32_t range = FIRST_VALID_ENDPOINT_ISE_RANGE; range <= LAST_VALID_ENDPOINT_ISE_RANGE; range++)
|
|
{
|
|
const uint32_t num_levels = get_ise_levels(range);
|
|
dequant_table& tab = get_endpoint_tab(range);
|
|
|
|
tab.init(false, num_levels);
|
|
|
|
create_quant_tables(tab.m_val_to_ise.data(), tab.m_ISE_to_val.data(), tab.m_ISE_to_rank.data(), tab.m_rank_to_ISE.data(), range, false);
|
|
}
|
|
|
|
m_initialized_flag = true;
|
|
}
|
|
};
|
|
|
|
extern dequant_tables g_dequant_tables;
|
|
void init_tables();
|
|
|
|
struct weighted_sample
|
|
{
|
|
uint8_t m_src_x;
|
|
uint8_t m_src_y;
|
|
uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8
|
|
};
|
|
|
|
void compute_upsample_weights(
|
|
int block_width, int block_height,
|
|
int weight_grid_width, int weight_grid_height,
|
|
weighted_sample* pWeights); // there will be block_width * block_height bilinear samples
|
|
|
|
void upsample_weight_grid(
|
|
uint32_t bx, uint32_t by, // destination/to dimension
|
|
uint32_t wx, uint32_t wy, // source/from dimension
|
|
const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
|
|
uint8_t* pDst_weights); // [by][bx]
|
|
|
|
void upsample_weight_grid_xuastc_ldr(
|
|
uint32_t bx, uint32_t by, // destination/to dimension
|
|
uint32_t wx, uint32_t wy, // source/from dimension
|
|
const uint8_t* pSrc_weights0, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
|
|
uint8_t* pDst_weights0, // [by][bx]
|
|
const uint8_t* pSrc_weights1, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
|
|
uint8_t* pDst_weights1); // [by][bx]
|
|
|
|
bool is_small_block(uint32_t block_width, uint32_t block_height);
|
|
|
|
// Procedurally returns the texel partition/subset index given the block coordinate and config (very slow).
|
|
int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block);
|
|
|
|
// Returns the texel partition/subset index given the block coordinate and config - table lookup, but currently ONLY 2-3 SUBSETS to save RAM.
|
|
int get_precomputed_texel_partition(uint32_t block_width, uint32_t block_height, uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions);
|
|
|
|
void blue_contract(
|
|
int r, int g, int b, int a,
|
|
int& dr, int& dg, int& db, int& da);
|
|
|
|
void bit_transfer_signed(int& a, int& b);
|
|
|
|
void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t* pE);
|
|
|
|
typedef uint16_t half_float;
|
|
half_float float_to_half(float val, bool toward_zero);
|
|
float half_to_float(half_float hval);
|
|
|
|
// Notes:
|
|
// qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless)
|
|
// However, this is not lossless in the general sense.
|
|
inline half_float qlog16_to_half(int k)
|
|
{
|
|
assert((k >= 0) && (k <= 0xFFFF));
|
|
|
|
int E = (k & 0xF800) >> 11;
|
|
int M = k & 0x7FF;
|
|
|
|
int Mt;
|
|
if (M < 512)
|
|
Mt = 3 * M;
|
|
else if (M >= 1536)
|
|
Mt = 5 * M - 2048;
|
|
else
|
|
Mt = 4 * M - 512;
|
|
|
|
return (half_float)((E << 10) + (Mt >> 3));
|
|
}
|
|
|
|
const int MAX_RGB9E5 = 0xff80;
|
|
void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b);
|
|
uint32_t pack_rgb9e5(float r, float g, float b);
|
|
|
|
enum decode_mode
|
|
{
|
|
cDecodeModeSRGB8 = 0, // returns uint8_t's, not valid on HDR blocks
|
|
cDecodeModeLDR8 = 1, // returns uint8_t's, not valid on HDR blocks
|
|
cDecodeModeHDR16 = 2, // returns uint16_t's (half floats), valid on all LDR/HDR blocks
|
|
cDecodeModeRGB9E5 = 3 // returns uint32_t's, packed as RGB 9E5 (shared exponent), see https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
|
|
};
|
|
|
|
// Decodes logical block to output pixels.
|
|
// pPixels must point to either 32-bit pixel values (SRGB8/LDR8/9E5) or 64-bit pixel values (HDR16)
|
|
bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode);
|
|
|
|
// Assuming the ASTC logical block is valid, this checks for the extra XUASTC LDR constraints.
|
|
bool is_block_xuastc_ldr(const log_astc_block& log_blk);
|
|
|
|
// XUASTC LDR only - primary assumption is the logical block comes directly from our supercompressor. DO NOT call on general ASTC blocks.
|
|
bool decode_block_xuastc_ldr(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode, const uint8_t* pUpsampled_weights_to_use = nullptr, uint32_t start_x = 0, uint32_t start_y = 0, uint32_t end_x = 0, uint32_t end_y = 0);
|
|
|
|
void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t *pBits128, uint32_t bit_ofs);
|
|
|
|
// Unpack a physical ASTC encoded GPU texture block to a logical block description.
|
|
bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height);
|
|
|
|
uint8_t& get_weight(log_astc_block& log_block, uint32_t plane_index, uint32_t idx);
|
|
uint8_t get_weight(const log_astc_block& log_block, uint32_t plane_index, uint32_t idx);
|
|
void extract_weights(const log_astc_block& log_block, uint8_t* pWeights, uint32_t plane_index);
|
|
void set_weights(log_astc_block& log_block, const uint8_t* pWeights, uint32_t plane_index);
|
|
uint32_t get_total_weights(const log_astc_block& log_block);
|
|
|
|
uint8_t* get_endpoints(log_astc_block& log_block, uint32_t partition_index);
|
|
const uint8_t* get_endpoints(const log_astc_block& log_block, uint32_t partition_index);
|
|
|
|
const char* get_cem_name(uint32_t cem_index);
|
|
bool cem_is_ldr_direct(uint32_t cem_index);
|
|
bool cem_is_ldr_base_scale(uint32_t cem_index);
|
|
bool cem_is_ldr_base_plus_ofs(uint32_t cem_index);
|
|
|
|
bool cem_supports_bc(uint32_t cem);
|
|
|
|
void bit_transfer_signed_dec(int& a, int& b);
|
|
void bit_transfer_signed_enc(int& a, int& b);
|
|
|
|
bool cem8_or_12_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index);
|
|
bool cem9_or_13_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index);
|
|
bool used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index);
|
|
|
|
uint32_t get_base_cem_without_alpha(uint32_t cem);
|
|
|
|
int apply_delta_to_bise_endpoint_val(uint32_t endpoint_ise_range, int ise_val, int delta);
|
|
|
|
// index range: [0,NUM_ASTC_BLOCK_SIZES-1]
|
|
void get_astc_block_size_by_index(uint32_t index, uint32_t& width, uint32_t& height);
|
|
|
|
// -1 if invalid
|
|
int find_astc_block_size_index(uint32_t width, uint32_t height);
|
|
|
|
// 8-bit linear8 or sRGB8, le/he are [0,255], w is [0,64]
|
|
inline int channel_interpolate(int le, int he, int w, bool astc_srgb_decode)
|
|
{
|
|
assert((w >= 0) && (w <= 64));
|
|
assert((le >= 0) && (le <= 255));
|
|
assert((he >= 0) && (he <= 255));
|
|
|
|
if (astc_srgb_decode)
|
|
{
|
|
le = (le << 8) | 0x80;
|
|
he = (he << 8) | 0x80;
|
|
}
|
|
else
|
|
{
|
|
le = (le << 8) | le;
|
|
he = (he << 8) | he;
|
|
}
|
|
|
|
return astc_helpers::weight_interpolate(le, he, w) >> 8;
|
|
}
|
|
|
|
} // namespace astc_helpers
|
|
|
|
#endif // BASISU_ASTC_HELPERS_HEADER
|
|
|
|
//------------------------------------------------------------------
|
|
|
|
#ifdef BASISU_ASTC_HELPERS_IMPLEMENTATION
|
|
|
|
namespace astc_helpers
|
|
{
|
|
template<typename T> inline T my_min(T a, T b) { return (a < b) ? a : b; }
|
|
template<typename T> inline T my_max(T a, T b) { return (a > b) ? a : b; }
|
|
|
|
const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2] = {
|
|
{ 4, 4 }, { 5, 4 }, { 5, 5 }, { 6, 5 },
|
|
{ 6, 6 }, { 8, 5 }, { 8, 6 }, { 10, 5 },
|
|
{ 10, 6 }, { 8, 8 }, { 10, 8 }, { 10, 10 },
|
|
{ 12, 10 }, { 12, 12 }
|
|
};
|
|
|
|
const int8_t g_ise_range_table[TOTAL_ISE_RANGES][3] =
|
|
{
|
|
//b t q
|
|
//2 3 5 // rng ise_index notes
|
|
{ 1, 0, 0 }, // 0..1 0
|
|
{ 0, 1, 0 }, // 0..2 1
|
|
{ 2, 0, 0 }, // 0..3 2
|
|
{ 0, 0, 1 }, // 0..4 3
|
|
{ 1, 1, 0 }, // 0..5 4 min endpoint ISE index
|
|
{ 3, 0, 0 }, // 0..7 5
|
|
{ 1, 0, 1 }, // 0..9 6
|
|
{ 2, 1, 0 }, // 0..11 7
|
|
{ 4, 0, 0 }, // 0..15 8
|
|
{ 2, 0, 1 }, // 0..19 9
|
|
{ 3, 1, 0 }, // 0..23 10
|
|
{ 5, 0, 0 }, // 0..31 11 max weight ISE index
|
|
{ 3, 0, 1 }, // 0..39 12
|
|
{ 4, 1, 0 }, // 0..47 13
|
|
{ 6, 0, 0 }, // 0..63 14
|
|
{ 4, 0, 1 }, // 0..79 15
|
|
{ 5, 1, 0 }, // 0..95 16
|
|
{ 7, 0, 0 }, // 0..127 17
|
|
{ 5, 0, 1 }, // 0..159 18
|
|
{ 6, 1, 0 }, // 0..191 19
|
|
{ 8, 0, 0 }, // 0..255 20
|
|
};
|
|
|
|
static inline void astc_set_bits_1_to_9(uint32_t* pDst, uint32_t& bit_offset, uint32_t code, uint32_t codesize)
|
|
{
|
|
uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst);
|
|
|
|
assert(codesize <= 9);
|
|
if (codesize)
|
|
{
|
|
uint32_t byte_bit_offset = bit_offset & 7;
|
|
uint32_t val = code << byte_bit_offset;
|
|
|
|
uint32_t index = bit_offset >> 3;
|
|
pBuf[index] |= (uint8_t)val;
|
|
|
|
if (codesize > (8 - byte_bit_offset))
|
|
pBuf[index + 1] |= (uint8_t)(val >> 8);
|
|
|
|
bit_offset += codesize;
|
|
}
|
|
}
|
|
|
|
static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high)
|
|
{
|
|
return (bits >> low) & ((1 << (high - low + 1)) - 1);
|
|
}
|
|
|
|
// Writes bits to output in an endian safe way
|
|
static inline void astc_set_bits(uint32_t* pOutput, uint32_t& bit_pos, uint32_t value, uint32_t total_bits)
|
|
{
|
|
assert(total_bits <= 31);
|
|
assert(value < (1u << total_bits));
|
|
|
|
uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput);
|
|
|
|
while (total_bits)
|
|
{
|
|
const uint32_t bits_to_write = my_min<int>(total_bits, 8 - (bit_pos & 7));
|
|
|
|
pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7));
|
|
|
|
bit_pos += bits_to_write;
|
|
total_bits -= bits_to_write;
|
|
value >>= bits_to_write;
|
|
}
|
|
}
|
|
|
|
static const uint8_t g_astc_quint_encode[125] =
|
|
{
|
|
0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57,
|
|
58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104,
|
|
105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54,
|
|
126, 127, 94, 95, 62, 39, 47, 55, 63, 7 /*31 - results in the same decode as 7*/
|
|
};
|
|
|
|
// Encodes 3 values to output, usable for any range that uses quints and bits
|
|
static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t* pStats)
|
|
{
|
|
// First extract the quints and the bits from the 3 input values
|
|
int quints = 0, bits[3];
|
|
const uint32_t bit_mask = (1 << n) - 1;
|
|
for (int i = 0; i < 3; i++)
|
|
{
|
|
static const int s_muls[3] = { 1, 5, 25 };
|
|
|
|
const int t = pValues[i] >> n;
|
|
|
|
quints += t * s_muls[i];
|
|
bits[i] = pValues[i] & bit_mask;
|
|
}
|
|
|
|
// Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits.
|
|
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
|
|
|
|
assert(quints < 125);
|
|
const int T = g_astc_quint_encode[quints];
|
|
|
|
// Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
|
|
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
|
|
(bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
|
|
|
|
if (pStats)
|
|
*pStats += n * 3 + 7;
|
|
}
|
|
|
|
static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
|
|
43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154,
|
|
131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202,
|
|
208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224,
|
|
225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159,
|
|
191, 223, 124, 125, 126 };
|
|
|
|
// Encodes 5 values to output, usable for any range that uses trits and bits
|
|
static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t *pStats)
|
|
{
|
|
// First extract the trits and the bits from the 5 input values
|
|
int trits = 0, bits[5];
|
|
const uint32_t bit_mask = (1 << n) - 1;
|
|
for (int i = 0; i < 5; i++)
|
|
{
|
|
static const int s_muls[5] = { 1, 3, 9, 27, 81 };
|
|
|
|
const int t = pValues[i] >> n;
|
|
|
|
trits += t * s_muls[i];
|
|
bits[i] = pValues[i] & bit_mask;
|
|
}
|
|
|
|
// Encode the trits, by inverting the bit manipulations done by the decoder, converting 5 trits into 8-bits.
|
|
// See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding
|
|
|
|
assert(trits < 243);
|
|
const int T = g_astc_trit_encode[trits];
|
|
|
|
// Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94.
|
|
astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2);
|
|
|
|
astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
|
|
(bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
|
|
|
|
if (pStats)
|
|
*pStats += n * 5 + 8;
|
|
}
|
|
|
|
// Packs values using ASTC's BISE to output buffer.
|
|
void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats)
|
|
{
|
|
uint32_t temp[5] = { 0 };
|
|
|
|
const int num_bits = g_ise_range_table[range][0];
|
|
|
|
int group_size = 0;
|
|
if (g_ise_range_table[range][1])
|
|
group_size = 5;
|
|
else if (g_ise_range_table[range][2])
|
|
group_size = 3;
|
|
|
|
#ifndef NDEBUG
|
|
const uint32_t num_levels = get_ise_levels(range);
|
|
for (int i = 0; i < num_vals; i++)
|
|
{
|
|
assert(pSrc_vals[i] < num_levels);
|
|
}
|
|
#endif
|
|
|
|
if (group_size)
|
|
{
|
|
// Range has trits or quints - pack each group of 5 or 3 values
|
|
const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3);
|
|
|
|
for (int group_index = 0; group_index < total_groups; group_index++)
|
|
{
|
|
uint8_t vals[5] = { 0 };
|
|
|
|
const int limit = my_min(group_size, num_vals - group_index * group_size);
|
|
for (int i = 0; i < limit; i++)
|
|
vals[i] = pSrc_vals[group_index * group_size + i];
|
|
|
|
// Note this always writes a group of 3 or 5 bits values, even for incomplete groups. So it can write more than needed.
|
|
// get_ise_sequence_bits() returns the # of bits that must be written for proper decoding.
|
|
if (group_size == 5)
|
|
astc_encode_trits(temp, vals, bit_pos, num_bits, pStats);
|
|
else
|
|
astc_encode_quints(temp, vals, bit_pos, num_bits, pStats);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < num_vals; i++)
|
|
astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
|
|
|
|
if (pStats)
|
|
*pStats += num_vals * num_bits;
|
|
}
|
|
|
|
pDst[0] |= temp[0]; pDst[1] |= temp[1];
|
|
pDst[2] |= temp[2]; pDst[3] |= temp[3];
|
|
}
|
|
|
|
inline uint32_t rev_dword(uint32_t bits)
|
|
{
|
|
uint32_t v = (bits << 16) | (bits >> 16);
|
|
v = ((v & 0x00ff00ff) << 8) | ((v & 0xff00ff00) >> 8); v = ((v & 0x0f0f0f0f) << 4) | ((v & 0xf0f0f0f0) >> 4);
|
|
v = ((v & 0x33333333) << 2) | ((v & 0xcccccccc) >> 2); v = ((v & 0x55555555) << 1) | ((v & 0xaaaaaaaa) >> 1);
|
|
return v;
|
|
}
|
|
|
|
static inline bool is_packable(int value, int num_bits) { assert((num_bits >= 1) && (num_bits < 31)); return (value >= 0) && (value < (1 << num_bits)); }
|
|
|
|
static bool get_config_bits(const log_astc_block &log_block, uint32_t &config_bits)
|
|
{
|
|
config_bits = 0;
|
|
|
|
const int W = log_block.m_grid_width, H = log_block.m_grid_height;
|
|
|
|
const uint32_t P = log_block.m_weight_ise_range >= 6; // high precision
|
|
const uint32_t Dp_P = (log_block.m_dual_plane << 1) | P; // pack dual plane+high precision bits
|
|
|
|
// See Tables 81-82
|
|
// Compute p from weight range
|
|
uint32_t p = 2 + log_block.m_weight_ise_range - (P ? 6 : 0);
|
|
|
|
// Rearrange p's bits to p0 p2 p1
|
|
p = (p >> 1) + ((p & 1) << 2);
|
|
|
|
// Try encoding each row of table 82.
|
|
|
|
// W+4 H+2
|
|
if (is_packable(W - 4, 2) && is_packable(H - 2, 2))
|
|
{
|
|
config_bits = (Dp_P << 9) | ((W - 4) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | (p & 3);
|
|
return true;
|
|
}
|
|
|
|
// W+8 H+2
|
|
if (is_packable(W - 8, 2) && is_packable(H - 2, 2))
|
|
{
|
|
config_bits = (Dp_P << 9) | ((W - 8) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 4 | (p & 3);
|
|
return true;
|
|
}
|
|
|
|
// W+2 H+8
|
|
if (is_packable(W - 2, 2) && is_packable(H - 8, 2))
|
|
{
|
|
config_bits = (Dp_P << 9) | ((H - 8) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 8 | (p & 3);
|
|
return true;
|
|
}
|
|
|
|
// W+2 H+6
|
|
if (is_packable(W - 2, 2) && is_packable(H - 6, 1))
|
|
{
|
|
config_bits = (Dp_P << 9) | ((H - 6) << 7) | ((W - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
|
|
return true;
|
|
}
|
|
|
|
// W+2 H+2
|
|
if (is_packable(W - 2, 1) && is_packable(H - 2, 2))
|
|
{
|
|
config_bits = (Dp_P << 9) | ((W) << 7) | ((H - 2) << 5) | ((p & 4) << 2) | 12 | (p & 3);
|
|
return true;
|
|
}
|
|
|
|
// 12 H+2
|
|
if ((W == 12) && is_packable(H - 2, 2))
|
|
{
|
|
config_bits = (Dp_P << 9) | ((H - 2) << 5) | (p << 2);
|
|
return true;
|
|
}
|
|
|
|
// W+2 12
|
|
if ((H == 12) && is_packable(W - 2, 2))
|
|
{
|
|
config_bits = (Dp_P << 9) | (1 << 7) | ((W - 2) << 5) | (p << 2);
|
|
return true;
|
|
}
|
|
|
|
// 6 10
|
|
if ((W == 6) && (H == 10))
|
|
{
|
|
config_bits = (Dp_P << 9) | (3 << 7) | (p << 2);
|
|
return true;
|
|
}
|
|
|
|
// 10 6
|
|
if ((W == 10) && (H == 6))
|
|
{
|
|
config_bits = (Dp_P << 9) | (0b1101 << 5) | (p << 2);
|
|
return true;
|
|
}
|
|
|
|
// W+6 H+6 (no dual plane or high prec)
|
|
if ((!Dp_P) && is_packable(W - 6, 2) && is_packable(H - 6, 2))
|
|
{
|
|
config_bits = ((H - 6) << 9) | 256 | ((W - 6) << 5) | (p << 2);
|
|
return true;
|
|
}
|
|
|
|
// Failed: unsupported weight grid dimensions or config.
|
|
return false;
|
|
}
|
|
|
|
bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range, pack_stats *pStats, uint32_t validate_flags)
|
|
{
|
|
// Basic sanity checking
|
|
if (!log_block.m_dual_plane)
|
|
{
|
|
assert(log_block.m_color_component_selector == 0);
|
|
}
|
|
else
|
|
{
|
|
assert(log_block.m_color_component_selector <= 3);
|
|
}
|
|
|
|
memset(&phys_block, 0, sizeof(phys_block));
|
|
|
|
if (pExpected_endpoint_range)
|
|
*pExpected_endpoint_range = -1;
|
|
|
|
assert(!log_block.m_error_flag);
|
|
if (log_block.m_error_flag)
|
|
return false;
|
|
|
|
if (log_block.m_solid_color_flag_ldr)
|
|
{
|
|
pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);
|
|
return true;
|
|
}
|
|
else if (log_block.m_solid_color_flag_hdr)
|
|
{
|
|
pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);
|
|
return true;
|
|
}
|
|
|
|
if ((log_block.m_num_partitions < 1) || (log_block.m_num_partitions > MAX_PARTITIONS))
|
|
return false;
|
|
|
|
// Max usable weight range is 11
|
|
if (log_block.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE)
|
|
return false;
|
|
|
|
// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
|
|
if ((log_block.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_block.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
|
|
return false;
|
|
|
|
if (log_block.m_color_component_selector > 3)
|
|
return false;
|
|
|
|
// TODO: sanity check grid width/height vs. block's physical width/height
|
|
|
|
uint32_t config_bits = 0;
|
|
if (!get_config_bits(log_block, config_bits))
|
|
return false;
|
|
|
|
uint32_t bit_pos = 0;
|
|
astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11);
|
|
if (pStats)
|
|
pStats->m_header_bits += 11;
|
|
|
|
const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);
|
|
const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
|
|
|
|
// 18.24 Illegal Encodings
|
|
if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
|
|
return false;
|
|
|
|
uint32_t total_extra_bits = 0;
|
|
|
|
astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2);
|
|
if (pStats)
|
|
pStats->m_header_bits += 2;
|
|
|
|
if (log_block.m_num_partitions > 1)
|
|
{
|
|
if (log_block.m_partition_id >= NUM_PARTITION_PATTERNS)
|
|
return false;
|
|
|
|
astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10);
|
|
if (pStats)
|
|
pStats->m_header_bits += 10;
|
|
|
|
uint32_t highest_cem = 0, lowest_cem = UINT32_MAX;
|
|
for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
|
|
{
|
|
highest_cem = my_max<uint32_t>(highest_cem, log_block.m_color_endpoint_modes[j]);
|
|
lowest_cem = my_min<uint32_t>(lowest_cem, log_block.m_color_endpoint_modes[j]);
|
|
}
|
|
|
|
if (highest_cem > 15)
|
|
return false;
|
|
|
|
// Ensure CEM range is contiguous
|
|
if (((highest_cem >> 2) > (1 + (lowest_cem >> 2))))
|
|
return false;
|
|
|
|
// See tables 79/80
|
|
uint32_t encoded_cem = log_block.m_color_endpoint_modes[0] << 2;
|
|
if (lowest_cem != highest_cem)
|
|
{
|
|
encoded_cem = my_min<uint32_t>(3, 1 + (lowest_cem >> 2));
|
|
|
|
// See tables at 23.11 Color Endpoint Mode
|
|
for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
|
|
{
|
|
const int M = log_block.m_color_endpoint_modes[j] & 3;
|
|
|
|
const int C = (log_block.m_color_endpoint_modes[j] >> 2) - ((encoded_cem & 3) - 1);
|
|
if ((C & 1) != C)
|
|
return false;
|
|
|
|
encoded_cem |= (C << (2 + j)) | (M << (2 + log_block.m_num_partitions + 2 * j));
|
|
}
|
|
|
|
total_extra_bits = 3 * log_block.m_num_partitions - 4;
|
|
|
|
if ((total_weight_bits + total_extra_bits) > 128)
|
|
return false;
|
|
|
|
uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits;
|
|
astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits);
|
|
if (pStats)
|
|
pStats->m_header_bits += total_extra_bits;
|
|
}
|
|
|
|
astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6);
|
|
if (pStats)
|
|
pStats->m_header_bits += 6;
|
|
}
|
|
else
|
|
{
|
|
if (log_block.m_partition_id)
|
|
return false;
|
|
if (log_block.m_color_endpoint_modes[0] > 15)
|
|
return false;
|
|
|
|
astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4);
|
|
if (pStats)
|
|
pStats->m_header_bits += 4;
|
|
}
|
|
|
|
if (log_block.m_dual_plane)
|
|
{
|
|
if (log_block.m_num_partitions > 3)
|
|
return false;
|
|
|
|
total_extra_bits += 2;
|
|
|
|
uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits;
|
|
astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2);
|
|
if (pStats)
|
|
pStats->m_header_bits += 2;
|
|
}
|
|
|
|
const uint32_t total_config_bits = bit_pos + total_extra_bits;
|
|
const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
|
|
if (num_remaining_bits < 0)
|
|
return false;
|
|
|
|
uint32_t total_cem_vals = 0;
|
|
for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
|
|
total_cem_vals += 2 + 2 * (log_block.m_color_endpoint_modes[j] >> 2);
|
|
|
|
if (total_cem_vals > MAX_ENDPOINTS)
|
|
return false;
|
|
|
|
if (validate_flags & cValidateEarlyOutAtEndpointISEChecks)
|
|
return true;
|
|
|
|
int endpoint_ise_range = -1;
|
|
for (int k = 20; k > 0; k--)
|
|
{
|
|
int bits = get_ise_sequence_bits(total_cem_vals, k);
|
|
if (bits <= num_remaining_bits)
|
|
{
|
|
endpoint_ise_range = k;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
|
|
if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
|
|
return false;
|
|
|
|
// Ensure the caller utilized the right endpoint ISE range.
|
|
if ((int)log_block.m_endpoint_ise_range != endpoint_ise_range)
|
|
{
|
|
if (pExpected_endpoint_range)
|
|
*pExpected_endpoint_range = endpoint_ise_range;
|
|
return false;
|
|
}
|
|
|
|
if (pStats)
|
|
{
|
|
pStats->m_endpoint_bits += get_ise_sequence_bits(total_cem_vals, endpoint_ise_range);
|
|
pStats->m_weight_bits += get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
|
|
}
|
|
|
|
if (validate_flags & cValidateSkipFinalEndpointWeightPacking)
|
|
return true;
|
|
|
|
// Pack endpoints forwards
|
|
encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range);
|
|
|
|
// Pack weights backwards
|
|
uint32_t weight_data[4] = { 0 };
|
|
encode_bise(weight_data, log_block.m_weights, 0, total_grid_weights, log_block.m_weight_ise_range);
|
|
|
|
for (uint32_t i = 0; i < 4; i++)
|
|
phys_block.m_vals[i] |= rev_dword(weight_data[3 - i]);
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline uint32_t bit_replication_scale(uint32_t src, int num_src_bits, int num_dst_bits)
|
|
{
|
|
assert(num_src_bits <= num_dst_bits);
|
|
assert((src & ((1 << num_src_bits) - 1)) == src);
|
|
|
|
uint32_t dst = 0;
|
|
for (int shift = num_dst_bits - num_src_bits; shift > -num_src_bits; shift -= num_src_bits)
|
|
dst |= (shift >= 0) ? (src << shift) : (src >> -shift);
|
|
|
|
return dst;
|
|
}
|
|
|
|
uint32_t dequant_bise_endpoint(uint32_t val, uint32_t ise_range)
|
|
{
|
|
assert((ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE));
|
|
assert(val < get_ise_levels(ise_range));
|
|
|
|
uint32_t u = 0;
|
|
|
|
switch (ise_range)
|
|
{
|
|
case 5:
|
|
{
|
|
u = bit_replication_scale(val, 3, 8);
|
|
break;
|
|
}
|
|
case 8:
|
|
{
|
|
u = bit_replication_scale(val, 4, 8);
|
|
break;
|
|
}
|
|
case 11:
|
|
{
|
|
u = bit_replication_scale(val, 5, 8);
|
|
break;
|
|
}
|
|
case 14:
|
|
{
|
|
u = bit_replication_scale(val, 6, 8);
|
|
break;
|
|
}
|
|
case 17:
|
|
{
|
|
u = bit_replication_scale(val, 7, 8);
|
|
break;
|
|
}
|
|
case 20:
|
|
{
|
|
u = val;
|
|
break;
|
|
}
|
|
case 4:
|
|
case 6:
|
|
case 7:
|
|
case 9:
|
|
case 10:
|
|
case 12:
|
|
case 13:
|
|
case 15:
|
|
case 16:
|
|
case 18:
|
|
case 19:
|
|
{
|
|
const uint32_t num_bits = g_ise_range_table[ise_range][0];
|
|
const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
|
|
const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
|
|
|
|
// compute Table 103 row index
|
|
const int range_index = (num_bits * 2 + (num_quints ? 1 : 0)) - 2;
|
|
|
|
assert(range_index >= 0 && range_index <= 10);
|
|
|
|
uint32_t bits = val & ((1 << num_bits) - 1);
|
|
uint32_t tval = val >> num_bits;
|
|
|
|
assert(tval < (num_trits ? 3U : 5U));
|
|
|
|
uint32_t a = bits & 1;
|
|
uint32_t b = (bits >> 1) & 1;
|
|
uint32_t c = (bits >> 2) & 1;
|
|
uint32_t d = (bits >> 3) & 1;
|
|
uint32_t e = (bits >> 4) & 1;
|
|
uint32_t f = (bits >> 5) & 1;
|
|
|
|
uint32_t A = a ? 511 : 0;
|
|
uint32_t B = 0;
|
|
|
|
switch (range_index)
|
|
{
|
|
case 2:
|
|
{
|
|
// 876543210
|
|
// b000b0bb0
|
|
B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
|
|
break;
|
|
}
|
|
case 3:
|
|
{
|
|
// 876543210
|
|
// b0000bb00
|
|
B = (b << 2) | (b << 3) | (b << 8);
|
|
break;
|
|
}
|
|
case 4:
|
|
{
|
|
// 876543210
|
|
// cb000cbcb
|
|
B = b | (c << 1) | (b << 2) | (c << 3) | (b << 7) | (c << 8);
|
|
break;
|
|
}
|
|
case 5:
|
|
{
|
|
// 876543210
|
|
// cb0000cbc
|
|
B = c | (b << 1) | (c << 2) | (b << 7) | (c << 8);
|
|
break;
|
|
}
|
|
case 6:
|
|
{
|
|
// 876543210
|
|
// dcb000dcb
|
|
B = b | (c << 1) | (d << 2) | (b << 6) | (c << 7) | (d << 8);
|
|
break;
|
|
}
|
|
case 7:
|
|
{
|
|
// 876543210
|
|
// dcb0000dc
|
|
B = c | (d << 1) | (b << 6) | (c << 7) | (d << 8);
|
|
break;
|
|
}
|
|
case 8:
|
|
{
|
|
// 876543210
|
|
// edcb000ed
|
|
B = d | (e << 1) | (b << 5) | (c << 6) | (d << 7) | (e << 8);
|
|
break;
|
|
}
|
|
case 9:
|
|
{
|
|
// 876543210
|
|
// edcb0000e
|
|
B = e | (b << 5) | (c << 6) | (d << 7) | (e << 8);
|
|
break;
|
|
}
|
|
case 10:
|
|
{
|
|
// 876543210
|
|
// fedcb000f
|
|
B = f | (b << 4) | (c << 5) | (d << 6) | (e << 7) | (f << 8);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
static uint8_t C_vals[11] = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
|
|
uint32_t C = C_vals[range_index];
|
|
uint32_t D = tval;
|
|
|
|
u = D * C + B;
|
|
u = u ^ A;
|
|
u = (A & 0x80) | (u >> 2);
|
|
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return u;
|
|
}
|
|
|
|
uint32_t dequant_bise_weight(uint32_t val, uint32_t ise_range)
|
|
{
|
|
assert(val < get_ise_levels(ise_range));
|
|
|
|
uint32_t u = 0;
|
|
switch (ise_range)
|
|
{
|
|
case 0:
|
|
{
|
|
u = val ? 63 : 0;
|
|
break;
|
|
}
|
|
case 1: // 0-2
|
|
{
|
|
const uint8_t s_tab_0_2[3] = { 0, 32, 63 };
|
|
u = s_tab_0_2[val];
|
|
break;
|
|
}
|
|
case 2: // 0-3
|
|
{
|
|
u = bit_replication_scale(val, 2, 6);
|
|
break;
|
|
}
|
|
case 3: // 0-4
|
|
{
|
|
const uint8_t s_tab_0_4[5] = { 0, 16, 32, 47, 63 };
|
|
u = s_tab_0_4[val];
|
|
break;
|
|
}
|
|
case 5: // 0-7
|
|
{
|
|
u = bit_replication_scale(val, 3, 6);
|
|
break;
|
|
}
|
|
case 8: // 0-15
|
|
{
|
|
u = bit_replication_scale(val, 4, 6);
|
|
break;
|
|
}
|
|
case 11: // 0-31
|
|
{
|
|
u = bit_replication_scale(val, 5, 6);
|
|
break;
|
|
}
|
|
case 4: // 0-5
|
|
case 6: // 0-9
|
|
case 7: // 0-11
|
|
case 9: // 0-19
|
|
case 10: // 0-23
|
|
{
|
|
const uint32_t num_bits = g_ise_range_table[ise_range][0];
|
|
const uint32_t num_trits = g_ise_range_table[ise_range][1]; BASISU_NOTE_UNUSED(num_trits);
|
|
const uint32_t num_quints = g_ise_range_table[ise_range][2]; BASISU_NOTE_UNUSED(num_quints);
|
|
|
|
// compute Table 103 row index
|
|
const int range_index = num_bits * 2 + (num_quints ? 1 : 0);
|
|
|
|
// Extract bits and tris/quints from value
|
|
const uint32_t bits = val & ((1u << num_bits) - 1);
|
|
const uint32_t D = val >> num_bits;
|
|
|
|
assert(D < (num_trits ? 3U : 5U));
|
|
|
|
// Now dequantize
|
|
// See Table 103. ASTC weight unquantization parameters
|
|
static const uint32_t C_table[5] = { 50, 28, 23, 13, 11 };
|
|
|
|
const uint32_t a = bits & 1, b = (bits >> 1) & 1, c = (bits >> 2) & 1;
|
|
|
|
const uint32_t A = (a == 0) ? 0 : 0x7F;
|
|
|
|
uint32_t B = 0;
|
|
if (range_index == 4)
|
|
B = ((b << 6) | (b << 2) | (b << 0));
|
|
else if (range_index == 5)
|
|
B = ((b << 6) | (b << 1));
|
|
else if (range_index == 6)
|
|
B = ((c << 6) | (b << 5) | (c << 1) | (b << 0));
|
|
|
|
const uint32_t C = C_table[range_index - 2];
|
|
|
|
u = D * C + B;
|
|
u = u ^ A;
|
|
u = (A & 0x20) | (u >> 2);
|
|
break;
|
|
}
|
|
default:
|
|
assert(0);
|
|
break;
|
|
}
|
|
|
|
if (u > 32)
|
|
u++;
|
|
|
|
return u;
|
|
}
|
|
|
|
// Returns the nearest ISE symbol given a [0,255] endpoint value.
|
|
uint32_t find_nearest_bise_endpoint(int v, uint32_t ise_range)
|
|
{
|
|
assert(ise_range >= FIRST_VALID_ENDPOINT_ISE_RANGE && ise_range <= LAST_VALID_ENDPOINT_ISE_RANGE);
|
|
|
|
const uint32_t total_levels = get_ise_levels(ise_range);
|
|
int best_e = INT_MAX, best_index = 0;
|
|
for (uint32_t i = 0; i < total_levels; i++)
|
|
{
|
|
const int qv = dequant_bise_endpoint(i, ise_range);
|
|
int e = (int)labs(v - qv);
|
|
if (e < best_e)
|
|
{
|
|
best_e = e;
|
|
best_index = i;
|
|
if (!best_e)
|
|
break;
|
|
}
|
|
}
|
|
return best_index;
|
|
}
|
|
|
|
// Returns the nearest ISE weight given a [0,64] endpoint value.
|
|
uint32_t find_nearest_bise_weight(int v, uint32_t ise_range)
|
|
{
|
|
assert(ise_range >= FIRST_VALID_WEIGHT_ISE_RANGE && ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
|
|
assert(v <= (int)MAX_WEIGHT_INTERPOLANT_VALUE);
|
|
|
|
const uint32_t total_levels = get_ise_levels(ise_range);
|
|
int best_e = INT_MAX, best_index = 0;
|
|
for (uint32_t i = 0; i < total_levels; i++)
|
|
{
|
|
const int qv = dequant_bise_weight(i, ise_range);
|
|
int e = (int)labs(v - qv);
|
|
if (e < best_e)
|
|
{
|
|
best_e = e;
|
|
best_index = i;
|
|
if (!best_e)
|
|
break;
|
|
}
|
|
}
|
|
return best_index;
|
|
}
|
|
|
|
void create_quant_tables(
|
|
uint8_t* pVal_to_ise, // [0-255] or [0-64] value to nearest ISE symbol, array size is [256] or [65]
|
|
uint8_t* pISE_to_val, // ASTC encoded ISE symbol to [0,255] or [0,64] value, [levels]
|
|
uint8_t* pISE_to_rank, // returns the level rank index given an ISE symbol, [levels]
|
|
uint8_t* pRank_to_ISE, // returns the ISE symbol given a level rank, inverse of pISE_to_rank, [levels]
|
|
uint32_t ise_range, // ise range, [4,20] for endpoints, [0,11] for weights
|
|
bool weight_flag) // false if block endpoints, true if weights
|
|
{
|
|
const uint32_t num_dequant_vals = weight_flag ? (MAX_WEIGHT_INTERPOLANT_VALUE + 1) : 256;
|
|
|
|
for (uint32_t i = 0; i < num_dequant_vals; i++)
|
|
{
|
|
uint32_t bise_index = weight_flag ? astc_helpers::find_nearest_bise_weight(i, ise_range) : astc_helpers::find_nearest_bise_endpoint(i, ise_range);
|
|
|
|
if (pVal_to_ise)
|
|
pVal_to_ise[i] = (uint8_t)bise_index;
|
|
|
|
if (pISE_to_val)
|
|
pISE_to_val[bise_index] = weight_flag ? (uint8_t)astc_helpers::dequant_bise_weight(bise_index, ise_range) : (uint8_t)astc_helpers::dequant_bise_endpoint(bise_index, ise_range);
|
|
}
|
|
|
|
if (pISE_to_rank || pRank_to_ISE)
|
|
{
|
|
const uint32_t num_levels = get_ise_levels(ise_range);
|
|
|
|
if (!g_ise_range_table[ise_range][1] && !g_ise_range_table[ise_range][2])
|
|
{
|
|
// Only bits
|
|
for (uint32_t i = 0; i < num_levels; i++)
|
|
{
|
|
if (pISE_to_rank)
|
|
pISE_to_rank[i] = (uint8_t)i;
|
|
|
|
if (pRank_to_ISE)
|
|
pRank_to_ISE[i] = (uint8_t)i;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Range has trits or quints
|
|
uint32_t vals[256];
|
|
for (uint32_t i = 0; i < num_levels; i++)
|
|
{
|
|
uint32_t v = weight_flag ? astc_helpers::dequant_bise_weight(i, ise_range) : astc_helpers::dequant_bise_endpoint(i, ise_range);
|
|
|
|
// Low=ISE value
|
|
// High=dequantized value
|
|
vals[i] = (v << 16) | i;
|
|
}
|
|
|
|
// Sorts by dequantized value
|
|
std::sort(vals, vals + num_levels);
|
|
|
|
for (uint32_t rank = 0; rank < num_levels; rank++)
|
|
{
|
|
uint32_t ise_val = (uint8_t)vals[rank];
|
|
|
|
if (pISE_to_rank)
|
|
pISE_to_rank[ise_val] = (uint8_t)rank;
|
|
|
|
if (pRank_to_ISE)
|
|
pRank_to_ISE[rank] = (uint8_t)ise_val;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats)
|
|
{
|
|
uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
|
|
memset(pDst, 0xFF, 16);
|
|
|
|
pDst[0] = 0b11111100;
|
|
pDst[1] = 0b11111101;
|
|
|
|
pDst[8] = (uint8_t)rh;
|
|
pDst[9] = (uint8_t)(rh >> 8);
|
|
pDst[10] = (uint8_t)gh;
|
|
pDst[11] = (uint8_t)(gh >> 8);
|
|
pDst[12] = (uint8_t)bh;
|
|
pDst[13] = (uint8_t)(bh >> 8);
|
|
pDst[14] = (uint8_t)ah;
|
|
pDst[15] = (uint8_t)(ah >> 8);
|
|
|
|
if (pStats)
|
|
pStats->m_header_bits += 16 + 64;
|
|
}
|
|
|
|
// rh-ah are half-floats
|
|
void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats *pStats)
|
|
{
|
|
uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
|
|
memset(pDst, 0xFF, 16);
|
|
|
|
pDst[0] = 0b11111100;
|
|
|
|
pDst[8] = (uint8_t)rh;
|
|
pDst[9] = (uint8_t)(rh >> 8);
|
|
pDst[10] = (uint8_t)gh;
|
|
pDst[11] = (uint8_t)(gh >> 8);
|
|
pDst[12] = (uint8_t)bh;
|
|
pDst[13] = (uint8_t)(bh >> 8);
|
|
pDst[14] = (uint8_t)ah;
|
|
pDst[15] = (uint8_t)(ah >> 8);
|
|
|
|
if (pStats)
|
|
pStats->m_header_bits += 8 + 64;
|
|
}
|
|
|
|
bool is_cem_ldr(uint32_t mode)
|
|
{
|
|
switch (mode)
|
|
{
|
|
case CEM_LDR_LUM_DIRECT:
|
|
case CEM_LDR_LUM_BASE_PLUS_OFS:
|
|
case CEM_LDR_LUM_ALPHA_DIRECT:
|
|
case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
|
|
case CEM_LDR_RGB_BASE_SCALE:
|
|
case CEM_LDR_RGB_DIRECT:
|
|
case CEM_LDR_RGB_BASE_PLUS_OFFSET:
|
|
case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
|
|
case CEM_LDR_RGBA_DIRECT:
|
|
case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool does_cem_have_alpha(uint32_t mode)
|
|
{
|
|
switch (mode)
|
|
{
|
|
case CEM_LDR_LUM_ALPHA_DIRECT:
|
|
case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
|
|
case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
|
|
case CEM_LDR_RGBA_DIRECT:
|
|
case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
|
|
case CEM_HDR_RGB_LDR_ALPHA:
|
|
case CEM_HDR_RGB_HDR_ALPHA:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool is_valid_block_size(uint32_t w, uint32_t h)
|
|
{
|
|
#define BU_ASTC_HELPERS_SIZECHK(x, y) if ((w == (x)) && (h == (y))) return true;
|
|
BU_ASTC_HELPERS_SIZECHK(4, 4); // 0
|
|
BU_ASTC_HELPERS_SIZECHK(5, 4); // 1
|
|
|
|
BU_ASTC_HELPERS_SIZECHK(5, 5); // 2
|
|
|
|
BU_ASTC_HELPERS_SIZECHK(6, 5); // 3
|
|
BU_ASTC_HELPERS_SIZECHK(6, 6); // 4
|
|
|
|
BU_ASTC_HELPERS_SIZECHK(8, 5); // 5
|
|
BU_ASTC_HELPERS_SIZECHK(8, 6); // 6
|
|
BU_ASTC_HELPERS_SIZECHK(10, 5); // 7
|
|
BU_ASTC_HELPERS_SIZECHK(10, 6); // 8
|
|
|
|
BU_ASTC_HELPERS_SIZECHK(8, 8); // 9
|
|
BU_ASTC_HELPERS_SIZECHK(10, 8); // 10
|
|
BU_ASTC_HELPERS_SIZECHK(10, 10); // 11
|
|
|
|
BU_ASTC_HELPERS_SIZECHK(12, 10); // 12
|
|
BU_ASTC_HELPERS_SIZECHK(12, 12); // 13
|
|
#undef BU_ASTC_HELPERS_SIZECHK
|
|
|
|
return false;
|
|
}
|
|
|
|
uint32_t get_block_size_index(uint32_t w, uint32_t h)
|
|
{
|
|
assert(is_valid_block_size(w, h));
|
|
|
|
const uint32_t t = w * h;
|
|
|
|
if (t <= 36)
|
|
{
|
|
if (t == 36)
|
|
return cBLOCK_SIZE_6x6;
|
|
else if (t == 16)
|
|
return cBLOCK_SIZE_4x4;
|
|
else if (t == 25)
|
|
return cBLOCK_SIZE_5x5;
|
|
else if (t == 20)
|
|
return cBLOCK_SIZE_5x4;
|
|
else if (t == 30)
|
|
return cBLOCK_SIZE_6x5;
|
|
}
|
|
else if (t <= 64)
|
|
{
|
|
if (t == 64)
|
|
return cBLOCK_SIZE_8x8;
|
|
else if (t == 60)
|
|
return cBLOCK_SIZE_10x6;
|
|
else if (t == 50)
|
|
return cBLOCK_SIZE_10x5;
|
|
else if (t == 48)
|
|
return cBLOCK_SIZE_8x6;
|
|
else if (t == 40)
|
|
return cBLOCK_SIZE_8x5;
|
|
}
|
|
else
|
|
{
|
|
if (t == 80)
|
|
return cBLOCK_SIZE_10x8;
|
|
else if (t == 100)
|
|
return cBLOCK_SIZE_10x10;
|
|
else if (t == 120)
|
|
return cBLOCK_SIZE_12x10;
|
|
else if (t == 144)
|
|
return cBLOCK_SIZE_12x12;
|
|
}
|
|
|
|
assert(0);
|
|
return cBLOCK_SIZE_4x4;
|
|
}
|
|
|
|
// returns the standard ASTC bitrates given a valid block size from the ASTC spec.
|
|
// 0=invalid block size
|
|
float get_bitrate_from_block_size(uint32_t w, uint32_t h)
|
|
{
|
|
#define BU_ASTC_HELPERS_BLOCK_BITRATE(x, y, b) if ((w == (x)) && (h == (y))) return (b);
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(4, 4, 8.0f);
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(5, 4, 6.4f);
|
|
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(5, 5, 5.12f);
|
|
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(6, 5, 4.27f);
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(6, 6, 3.56f);
|
|
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(8, 5, 3.20f);
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(8, 6, 2.67f);
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(10, 5, 2.56f);
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(10, 6, 2.13f);
|
|
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(8, 8, 2.00f);
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(10, 8, 1.60f);
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(10, 10, 1.28f);
|
|
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(12, 10, 1.07f);
|
|
BU_ASTC_HELPERS_BLOCK_BITRATE(12, 12, .89f);
|
|
#undef BU_ASTC_HELPERS_BLOCK_BITRATE
|
|
|
|
return 0.0f;
|
|
}
|
|
|
|
bool block_has_any_hdr_cems(const log_astc_block& log_blk)
|
|
{
|
|
assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
|
|
|
|
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
|
|
if (is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool block_has_any_ldr_cems(const log_astc_block& log_blk)
|
|
{
|
|
assert((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= MAX_PARTITIONS));
|
|
|
|
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
|
|
if (!is_cem_hdr(log_blk.m_color_endpoint_modes[i]))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
dequant_tables g_dequant_tables;
|
|
|
|
void precompute_texel_partitions();
|
|
|
|
// TODO: this is called twice when using the encoder, first init_rank_tabs=false then init_rank_tabs=true.
|
|
void init_tables()
|
|
{
|
|
g_dequant_tables.init();
|
|
|
|
precompute_texel_partitions();
|
|
}
|
|
|
|
void compute_upsample_weights(
|
|
int block_width, int block_height,
|
|
int weight_grid_width, int weight_grid_height,
|
|
weighted_sample* pWeights) // there will be block_width * block_height bilinear samples
|
|
{
|
|
const uint32_t scaleX = (1024 + block_width / 2) / (block_width - 1);
|
|
const uint32_t scaleY = (1024 + block_height / 2) / (block_height - 1);
|
|
|
|
for (int texelY = 0; texelY < block_height; texelY++)
|
|
{
|
|
for (int texelX = 0; texelX < block_width; texelX++)
|
|
{
|
|
const uint32_t gX = (scaleX * texelX * (weight_grid_width - 1) + 32) >> 6;
|
|
const uint32_t gY = (scaleY * texelY * (weight_grid_height - 1) + 32) >> 6;
|
|
const uint32_t jX = gX >> 4;
|
|
const uint32_t jY = gY >> 4;
|
|
const uint32_t fX = gX & 0xf;
|
|
const uint32_t fY = gY & 0xf;
|
|
const uint32_t w11 = (fX * fY + 8) >> 4;
|
|
const uint32_t w10 = fY - w11;
|
|
const uint32_t w01 = fX - w11;
|
|
const uint32_t w00 = 16 - fX - fY + w11;
|
|
|
|
weighted_sample& s = pWeights[texelX + texelY * block_width];
|
|
s.m_src_x = (uint8_t)jX;
|
|
s.m_src_y = (uint8_t)jY;
|
|
s.m_weights[0][0] = (uint8_t)w00;
|
|
s.m_weights[0][1] = (uint8_t)w01;
|
|
s.m_weights[1][0] = (uint8_t)w10;
|
|
s.m_weights[1][1] = (uint8_t)w11;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Should be dequantized [0,64] weights
|
|
void upsample_weight_grid(
|
|
uint32_t bx, uint32_t by, // destination/to dimension
|
|
uint32_t wx, uint32_t wy, // source/from dimension
|
|
const uint8_t* pSrc_weights, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
|
|
uint8_t* pDst_weights) // [by][bx]
|
|
{
|
|
assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12));
|
|
assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by));
|
|
|
|
const uint32_t total_src_weights = wx * wy;
|
|
const uint32_t total_dst_weights = bx * by;
|
|
|
|
if (total_src_weights == total_dst_weights)
|
|
{
|
|
assert((bx == wx) && (by == wy));
|
|
|
|
memcpy(pDst_weights, pSrc_weights, total_src_weights);
|
|
return;
|
|
}
|
|
|
|
weighted_sample weights[12 * 12];
|
|
compute_upsample_weights(bx, by, wx, wy, weights);
|
|
|
|
const weighted_sample* pS = weights;
|
|
|
|
for (uint32_t y = 0; y < by; y++)
|
|
{
|
|
for (uint32_t x = 0; x < bx; x++, ++pS)
|
|
{
|
|
const uint32_t w00 = pS->m_weights[0][0];
|
|
const uint32_t w01 = pS->m_weights[0][1];
|
|
const uint32_t w10 = pS->m_weights[1][0];
|
|
const uint32_t w11 = pS->m_weights[1][1];
|
|
|
|
assert(w00 || w01 || w10 || w11);
|
|
|
|
const uint32_t sx = pS->m_src_x, sy = pS->m_src_y;
|
|
|
|
uint32_t total = 8;
|
|
if (w00) total += pSrc_weights[bounds_check(sx + sy * wx, 0U, total_src_weights)] * w00;
|
|
if (w01) total += pSrc_weights[bounds_check(sx + 1 + sy * wx, 0U, total_src_weights)] * w01;
|
|
if (w10) total += pSrc_weights[bounds_check(sx + (sy + 1) * wx, 0U, total_src_weights)] * w10;
|
|
if (w11) total += pSrc_weights[bounds_check(sx + 1 + (sy + 1) * wx, 0U, total_src_weights)] * w11;
|
|
|
|
pDst_weights[x + y * bx] = (uint8_t)(total >> 4);
|
|
}
|
|
}
|
|
}
|
|
|
|
void upsample_weight_grid_xuastc_ldr(
|
|
uint32_t bx, uint32_t by, // destination/to dimension
|
|
uint32_t wx, uint32_t wy, // source/from dimension
|
|
const uint8_t* pSrc_weights0, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
|
|
uint8_t* pDst_weights0, // [by][bx]
|
|
const uint8_t* pSrc_weights1, // these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
|
|
uint8_t* pDst_weights1) // [by][bx]
|
|
{
|
|
assert((bx >= 2) && (by >= 2) && (bx <= 12) && (by <= 12));
|
|
assert((wx >= 2) && (wy >= 2) && (wx <= bx) && (wy <= by));
|
|
|
|
assert((bx != wx) || (by != wy));
|
|
|
|
const uint32_t scaleX = (1024 + bx / 2) / (bx - 1);
|
|
const uint32_t scaleY = (1024 + by / 2) / (by - 1);
|
|
|
|
const uint32_t gYUInc = scaleY * (wy - 1);
|
|
const uint32_t gXUInc = scaleX * (wx - 1);
|
|
|
|
uint32_t gYU = 32;
|
|
for (uint32_t texel_y = 0; texel_y < by; texel_y++)
|
|
{
|
|
const uint32_t gY = gYU >> 6;
|
|
gYU += gYUInc;
|
|
|
|
const uint32_t jY = gY >> 4;
|
|
const uint32_t fY = gY & 0xf;
|
|
|
|
uint32_t gXU = 32;
|
|
for (uint32_t texel_x = 0; texel_x < bx; texel_x++)
|
|
{
|
|
const uint32_t gX = gXU >> 6;
|
|
gXU += gXUInc;
|
|
|
|
const uint32_t jX = gX >> 4;
|
|
const uint32_t fX = gX & 0xf;
|
|
|
|
const uint32_t w11 = (fX * fY + 8) >> 4;
|
|
const uint32_t w10 = fY - w11;
|
|
const uint32_t w01 = fX - w11;
|
|
const uint32_t w00 = 16 - fX - fY + w11;
|
|
|
|
assert(w00 || w01 || w10 || w11);
|
|
|
|
const uint32_t sx = jX, sy = jY;
|
|
|
|
{
|
|
uint32_t total0 = 8;
|
|
|
|
if (w00) total0 += pSrc_weights0[sx + sy * wx] * w00;
|
|
if (w01) total0 += pSrc_weights0[sx + 1 + sy * wx] * w01;
|
|
if (w10) total0 += pSrc_weights0[sx + (sy + 1) * wx] * w10;
|
|
if (w11) total0 += pSrc_weights0[sx + 1 + (sy + 1) * wx] * w11;
|
|
|
|
pDst_weights0[texel_x + texel_y * bx] = (uint8_t)(total0 >> 4);
|
|
}
|
|
|
|
if (pDst_weights1)
|
|
{
|
|
uint32_t total1 = 8;
|
|
|
|
if (w00) total1 += pSrc_weights1[sx + sy * wx] * w00;
|
|
if (w01) total1 += pSrc_weights1[sx + 1 + sy * wx] * w01;
|
|
if (w10) total1 += pSrc_weights1[sx + (sy + 1) * wx] * w10;
|
|
if (w11) total1 += pSrc_weights1[sx + 1 + (sy + 1) * wx] * w11;
|
|
|
|
pDst_weights1[texel_x + texel_y * bx] = (uint8_t)(total1 >> 4);
|
|
}
|
|
} // texel_x
|
|
} // texel_y
|
|
}
|
|
|
|
inline uint32_t hash52(uint32_t v)
|
|
{
|
|
uint32_t p = v;
|
|
p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
|
|
p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
|
|
p ^= p << 6; p ^= p >> 17;
|
|
return p;
|
|
}
|
|
|
|
bool is_small_block(uint32_t block_width, uint32_t block_height)
|
|
{
|
|
assert((block_width >= MIN_BLOCK_DIM) && (block_width <= MAX_BLOCK_DIM));
|
|
assert((block_height >= MIN_BLOCK_DIM) && (block_height <= MAX_BLOCK_DIM));
|
|
|
|
const uint32_t num_blk_pixels = block_width * block_height;
|
|
|
|
return num_blk_pixels < 31;
|
|
}
|
|
|
|
// small_block = num_blk_pixels < 31
|
|
int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block)
|
|
{
|
|
assert(zIn == 0);
|
|
|
|
const uint32_t x = small_block ? xIn << 1 : xIn;
|
|
const uint32_t y = small_block ? yIn << 1 : yIn;
|
|
const uint32_t z = small_block ? zIn << 1 : zIn;
|
|
const uint32_t seed = seedIn + 1024 * (num_partitions - 1);
|
|
const uint32_t rnum = hash52(seed);
|
|
|
|
uint8_t seed1 = (uint8_t)(rnum & 0xf);
|
|
uint8_t seed2 = (uint8_t)((rnum >> 4) & 0xf);
|
|
uint8_t seed3 = (uint8_t)((rnum >> 8) & 0xf);
|
|
uint8_t seed4 = (uint8_t)((rnum >> 12) & 0xf);
|
|
uint8_t seed5 = (uint8_t)((rnum >> 16) & 0xf);
|
|
uint8_t seed6 = (uint8_t)((rnum >> 20) & 0xf);
|
|
uint8_t seed7 = (uint8_t)((rnum >> 24) & 0xf);
|
|
uint8_t seed8 = (uint8_t)((rnum >> 28) & 0xf);
|
|
uint8_t seed9 = (uint8_t)((rnum >> 18) & 0xf);
|
|
uint8_t seed10 = (uint8_t)((rnum >> 22) & 0xf);
|
|
uint8_t seed11 = (uint8_t)((rnum >> 26) & 0xf);
|
|
uint8_t seed12 = (uint8_t)(((rnum >> 30) | (rnum << 2)) & 0xf);
|
|
|
|
seed1 = (uint8_t)(seed1 * seed1);
|
|
seed2 = (uint8_t)(seed2 * seed2);
|
|
seed3 = (uint8_t)(seed3 * seed3);
|
|
seed4 = (uint8_t)(seed4 * seed4);
|
|
seed5 = (uint8_t)(seed5 * seed5);
|
|
seed6 = (uint8_t)(seed6 * seed6);
|
|
seed7 = (uint8_t)(seed7 * seed7);
|
|
seed8 = (uint8_t)(seed8 * seed8);
|
|
seed9 = (uint8_t)(seed9 * seed9);
|
|
seed10 = (uint8_t)(seed10 * seed10);
|
|
seed11 = (uint8_t)(seed11 * seed11);
|
|
seed12 = (uint8_t)(seed12 * seed12);
|
|
|
|
const int shA = (seed & 2) != 0 ? 4 : 5;
|
|
const int shB = (num_partitions == 3) ? 6 : 5;
|
|
const int sh1 = (seed & 1) != 0 ? shA : shB;
|
|
const int sh2 = (seed & 1) != 0 ? shB : shA;
|
|
const int sh3 = (seed & 0x10) != 0 ? sh1 : sh2;
|
|
|
|
seed1 = (uint8_t)(seed1 >> sh1);
|
|
seed2 = (uint8_t)(seed2 >> sh2);
|
|
seed3 = (uint8_t)(seed3 >> sh1);
|
|
seed4 = (uint8_t)(seed4 >> sh2);
|
|
seed5 = (uint8_t)(seed5 >> sh1);
|
|
seed6 = (uint8_t)(seed6 >> sh2);
|
|
seed7 = (uint8_t)(seed7 >> sh1);
|
|
seed8 = (uint8_t)(seed8 >> sh2);
|
|
seed9 = (uint8_t)(seed9 >> sh3);
|
|
seed10 = (uint8_t)(seed10 >> sh3);
|
|
seed11 = (uint8_t)(seed11 >> sh3);
|
|
seed12 = (uint8_t)(seed12 >> sh3);
|
|
|
|
const int a = 0x3f & (seed1 * x + seed2 * y + seed11 * z + (rnum >> 14));
|
|
const int b = 0x3f & (seed3 * x + seed4 * y + seed12 * z + (rnum >> 10));
|
|
const int c = (num_partitions >= 3) ? 0x3f & (seed5 * x + seed6 * y + seed9 * z + (rnum >> 6)) : 0;
|
|
const int d = (num_partitions >= 4) ? 0x3f & (seed7 * x + seed8 * y + seed10 * z + (rnum >> 2)) : 0;
|
|
|
|
return (a >= b && a >= c && a >= d) ? 0
|
|
: (b >= c && b >= d) ? 1
|
|
: (c >= d) ? 2
|
|
: 3;
|
|
}
|
|
|
|
// Precomputed partition patterns for each 10-bit seed and small/large block sizes for 2-3 subsets.
|
|
// This costs 144KB of RAM and some init, but considering the sheer complexity of compute_texel_partition() and how hotly it's called in the compressors and transcoders that's worth it.
|
|
// Byte packing:
|
|
// low 4 bits=small blocks (on valid up to 6x5)
|
|
// high 4 bits=large blocks (6x6 or larger)
|
|
|
|
static uint8_t g_texel_partitions[NUM_PARTITION_PATTERNS][12][12]; // [seed][y][x]
|
|
|
|
void sanity_check_texel_partition_tables()
|
|
{
|
|
#if 0
|
|
#if defined(_DEBUG) || defined(DEBUG)
|
|
// sanity checking
|
|
for (uint32_t i = 0; i < cTOTAL_BLOCK_SIZES; i++)
|
|
{
|
|
const uint32_t bw = g_astc_block_sizes[i][0], bh = g_astc_block_sizes[i][1];
|
|
const bool is_small_block_flag = is_small_block(bw, bh);
|
|
|
|
assert(get_block_size_index(bw, bh) == i);
|
|
|
|
for (uint32_t s = 0; s < NUM_PARTITION_PATTERNS; s++)
|
|
{
|
|
for (uint32_t y = 0; y < bh; y++)
|
|
{
|
|
for (uint32_t x = 0; x < bw; x++)
|
|
{
|
|
const uint32_t k2 = compute_texel_partition(s, x, y, 0, 2, is_small_block_flag);
|
|
const uint32_t k3 = compute_texel_partition(s, x, y, 0, 3, is_small_block_flag);
|
|
|
|
assert(get_precomputed_texel_partition(bw, bh, s, x, y, 2) == (int)k2);
|
|
assert(get_precomputed_texel_partition(bw, bh, s, x, y, 3) == (int)k3);
|
|
} // x
|
|
} // y
|
|
} // s
|
|
}
|
|
printf("precompute_texel_partitions: Sanity check OK\n");
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
void precompute_texel_partition()
|
|
{
|
|
for (uint32_t seed = 0; seed < NUM_PARTITION_PATTERNS; seed++)
|
|
{
|
|
for (uint32_t y = 0; y < MAX_BLOCK_DIM; y++)
|
|
{
|
|
for (uint32_t x = 0; x < MAX_BLOCK_DIM; x++)
|
|
{
|
|
uint32_t k = 0;
|
|
|
|
// small block (width*height<31)
|
|
if ((x <= 6) && (y <= 5))
|
|
{
|
|
uint32_t v2 = compute_texel_partition(seed, x, y, 0, 2, true); assert(v2 <= 1);
|
|
uint32_t v3 = compute_texel_partition(seed, x, y, 0, 3, true); assert(v3 <= 2);
|
|
k |= v2 | (v3 << 2);
|
|
}
|
|
|
|
// not small block
|
|
{
|
|
uint32_t v2 = compute_texel_partition(seed, x, y, 0, 2, false); assert(v2 <= 1);
|
|
uint32_t v3 = compute_texel_partition(seed, x, y, 0, 3, false); assert(v3 <= 2);
|
|
k |= ((v2 | (v3 << 2)) << 4);
|
|
}
|
|
|
|
assert(k <= 255);
|
|
|
|
g_texel_partitions[seed][y][x] = (uint8_t)k;
|
|
} // x
|
|
} // y
|
|
} // seed
|
|
}
|
|
|
|
int get_precomputed_texel_partition(uint32_t block_width, uint32_t block_height, uint32_t seed, uint32_t x, uint32_t y, uint32_t subsets)
|
|
{
|
|
assert(seed < NUM_PARTITION_PATTERNS);
|
|
assert((subsets >= 2) && (subsets <= 3));
|
|
assert((x < block_width) && (y < block_height));
|
|
|
|
const uint32_t v = g_texel_partitions[seed][y][x];
|
|
|
|
uint32_t shift = (subsets == 3) ? 2 : 0;
|
|
shift += ((block_width * block_height) >= 31) * 4;
|
|
uint32_t res = (v >> shift) & 3;
|
|
|
|
// sanity checking
|
|
assert(res == (uint32_t)compute_texel_partition(seed, x, y, 0, subsets, is_small_block(block_width, block_height)));
|
|
|
|
return res;
|
|
}
|
|
|
|
void precompute_texel_partitions()
|
|
{
|
|
if (!g_texel_partitions[0][0][0])
|
|
precompute_texel_partition();
|
|
|
|
sanity_check_texel_partition_tables();
|
|
}
|
|
|
|
void blue_contract(
|
|
int r, int g, int b, int a,
|
|
int &dr, int &dg, int &db, int &da)
|
|
{
|
|
dr = (r + b) >> 1;
|
|
dg = (g + b) >> 1;
|
|
db = b;
|
|
da = a;
|
|
}
|
|
|
|
inline void bit_transfer_signed(int& a, int& b)
|
|
{
|
|
b >>= 1;
|
|
b |= (a & 0x80);
|
|
a >>= 1;
|
|
a &= 0x3F;
|
|
if ((a & 0x20) != 0)
|
|
a -= 0x40;
|
|
}
|
|
|
|
static inline int clamp(int a, int l, int h)
|
|
{
|
|
if (a < l)
|
|
a = l;
|
|
else if (a > h)
|
|
a = h;
|
|
return a;
|
|
}
|
|
|
|
static inline float clampf(float a, float l, float h)
|
|
{
|
|
if (a < l)
|
|
a = l;
|
|
else if (a > h)
|
|
a = h;
|
|
return a;
|
|
}
|
|
|
|
inline int sign_extend(int src, int num_src_bits)
|
|
{
|
|
assert((num_src_bits >= 2) && (num_src_bits <= 31));
|
|
|
|
const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
|
|
if (negative)
|
|
return src | ~((1 << num_src_bits) - 1);
|
|
else
|
|
return src & ((1 << num_src_bits) - 1);
|
|
}
|
|
|
|
// endpoints is [4][2]
|
|
void decode_endpoint(uint32_t cem_index, int (*pEndpoints)[2], const uint8_t *pE)
|
|
{
|
|
assert(cem_index <= CEM_HDR_RGB_HDR_ALPHA);
|
|
|
|
int v0 = pE[0], v1 = pE[1];
|
|
|
|
int& e0_r = pEndpoints[0][0], &e0_g = pEndpoints[1][0], &e0_b = pEndpoints[2][0], &e0_a = pEndpoints[3][0];
|
|
int& e1_r = pEndpoints[0][1], &e1_g = pEndpoints[1][1], &e1_b = pEndpoints[2][1], &e1_a = pEndpoints[3][1];
|
|
|
|
switch (cem_index)
|
|
{
|
|
case CEM_LDR_LUM_DIRECT:
|
|
{
|
|
e0_r = v0; e1_r = v1;
|
|
e0_g = v0; e1_g = v1;
|
|
e0_b = v0; e1_b = v1;
|
|
e0_a = 0xFF; e1_a = 0xFF;
|
|
break;
|
|
}
|
|
case CEM_LDR_LUM_BASE_PLUS_OFS:
|
|
{
|
|
int l0 = (v0 >> 2) | (v1 & 0xc0);
|
|
int l1 = l0 + (v1 & 0x3f);
|
|
|
|
if (l1 > 0xFF)
|
|
l1 = 0xFF;
|
|
|
|
e0_r = l0; e1_r = l1;
|
|
e0_g = l0; e1_g = l1;
|
|
e0_b = l0; e1_b = l1;
|
|
e0_a = 0xFF; e1_a = 0xFF;
|
|
break;
|
|
}
|
|
case CEM_LDR_LUM_ALPHA_DIRECT:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3];
|
|
|
|
e0_r = v0; e1_r = v1;
|
|
e0_g = v0; e1_g = v1;
|
|
e0_b = v0; e1_b = v1;
|
|
e0_a = v2; e1_a = v3;
|
|
break;
|
|
}
|
|
case CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3];
|
|
|
|
bit_transfer_signed(v1, v0);
|
|
bit_transfer_signed(v3, v2);
|
|
|
|
e0_r = v0; e1_r = v0 + v1;
|
|
e0_g = v0; e1_g = v0 + v1;
|
|
e0_b = v0; e1_b = v0 + v1;
|
|
e0_a = v2; e1_a = v2 + v3;
|
|
|
|
for (uint32_t c = 0; c < 4; c++)
|
|
{
|
|
pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
|
|
pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case CEM_LDR_RGB_BASE_SCALE:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3];
|
|
|
|
e0_r = (v0 * v3) >> 8; e1_r = v0;
|
|
e0_g = (v1 * v3) >> 8; e1_g = v1;
|
|
e0_b = (v2 * v3) >> 8; e1_b = v2;
|
|
e0_a = 0xFF; e1_a = 0xFF;
|
|
|
|
break;
|
|
}
|
|
case CEM_LDR_RGB_DIRECT:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
|
|
|
|
if ((v1 + v3 + v5) >= (v0 + v2 + v4))
|
|
{
|
|
e0_r = v0; e1_r = v1;
|
|
e0_g = v2; e1_g = v3;
|
|
e0_b = v4; e1_b = v5;
|
|
e0_a = 0xFF; e1_a = 0xFF;
|
|
}
|
|
else
|
|
{
|
|
blue_contract(v1, v3, v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
|
|
blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case CEM_LDR_RGB_BASE_PLUS_OFFSET:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
|
|
|
|
bit_transfer_signed(v1, v0);
|
|
bit_transfer_signed(v3, v2);
|
|
bit_transfer_signed(v5, v4);
|
|
|
|
if ((v1 + v3 + v5) >= 0)
|
|
{
|
|
e0_r = v0; e1_r = v0 + v1;
|
|
e0_g = v2; e1_g = v2 + v3;
|
|
e0_b = v4; e1_b = v4 + v5;
|
|
e0_a = 0xFF; e1_a = 0xFF;
|
|
}
|
|
else
|
|
{
|
|
blue_contract(v0 + v1, v2 + v3, v4 + v5, 0xFF, e0_r, e0_g, e0_b, e0_a);
|
|
blue_contract(v0, v2, v4, 0xFF, e1_r, e1_g, e1_b, e1_a);
|
|
}
|
|
|
|
for (uint32_t c = 0; c < 4; c++)
|
|
{
|
|
pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
|
|
pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
|
|
|
|
e0_r = (v0 * v3) >> 8; e1_r = v0;
|
|
e0_g = (v1 * v3) >> 8; e1_g = v1;
|
|
e0_b = (v2 * v3) >> 8; e1_b = v2;
|
|
e0_a = v4; e1_a = v5;
|
|
|
|
break;
|
|
}
|
|
case CEM_LDR_RGBA_DIRECT:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
|
|
|
|
if ((v1 + v3 + v5) >= (v0 + v2 + v4))
|
|
{
|
|
e0_r = v0; e1_r = v1;
|
|
e0_g = v2; e1_g = v3;
|
|
e0_b = v4; e1_b = v5;
|
|
e0_a = v6; e1_a = v7;
|
|
}
|
|
else
|
|
{
|
|
blue_contract(v1, v3, v5, v7, e0_r, e0_g, e0_b, e0_a);
|
|
blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5], v6 = pE[6], v7 = pE[7];
|
|
|
|
bit_transfer_signed(v1, v0);
|
|
bit_transfer_signed(v3, v2);
|
|
bit_transfer_signed(v5, v4);
|
|
bit_transfer_signed(v7, v6);
|
|
|
|
if ((v1 + v3 + v5) >= 0)
|
|
{
|
|
e0_r = v0; e1_r = v0 + v1;
|
|
e0_g = v2; e1_g = v2 + v3;
|
|
e0_b = v4; e1_b = v4 + v5;
|
|
e0_a = v6; e1_a = v6 + v7;
|
|
}
|
|
else
|
|
{
|
|
blue_contract(v0 + v1, v2 + v3, v4 + v5, v6 + v7, e0_r, e0_g, e0_b, e0_a);
|
|
blue_contract(v0, v2, v4, v6, e1_r, e1_g, e1_b, e1_a);
|
|
}
|
|
|
|
for (uint32_t c = 0; c < 4; c++)
|
|
{
|
|
pEndpoints[c][0] = clamp(pEndpoints[c][0], 0, 255);
|
|
pEndpoints[c][1] = clamp(pEndpoints[c][1], 0, 255);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case CEM_HDR_LUM_LARGE_RANGE:
|
|
{
|
|
int y0, y1;
|
|
if (v1 >= v0)
|
|
{
|
|
y0 = (v0 << 4);
|
|
y1 = (v1 << 4);
|
|
}
|
|
else
|
|
{
|
|
y0 = (v1 << 4) + 8;
|
|
y1 = (v0 << 4) - 8;
|
|
}
|
|
|
|
e0_r = y0; e1_r = y1;
|
|
e0_g = y0; e1_g = y1;
|
|
e0_b = y0; e1_b = y1;
|
|
e0_a = 0x780; e1_a = 0x780;
|
|
|
|
break;
|
|
}
|
|
case CEM_HDR_LUM_SMALL_RANGE:
|
|
{
|
|
int y0, y1, d;
|
|
|
|
if ((v0 & 0x80) != 0)
|
|
{
|
|
y0 = ((v1 & 0xE0) << 4) | ((v0 & 0x7F) << 2);
|
|
d = (v1 & 0x1F) << 2;
|
|
}
|
|
else
|
|
{
|
|
y0 = ((v1 & 0xF0) << 4) | ((v0 & 0x7F) << 1);
|
|
d = (v1 & 0x0F) << 1;
|
|
}
|
|
|
|
y1 = y0 + d;
|
|
if (y1 > 0xFFF)
|
|
y1 = 0xFFF;
|
|
|
|
e0_r = y0; e1_r = y1;
|
|
e0_g = y0; e1_g = y1;
|
|
e0_b = y0; e1_b = y1;
|
|
e0_a = 0x780; e1_a = 0x780;
|
|
|
|
break;
|
|
}
|
|
case CEM_HDR_RGB_BASE_SCALE:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3];
|
|
|
|
int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
|
|
|
|
int majcomp, mode;
|
|
if ((modeval & 0xC) != 0xC)
|
|
{
|
|
majcomp = modeval >> 2;
|
|
mode = modeval & 3;
|
|
}
|
|
else if (modeval != 0xF)
|
|
{
|
|
majcomp = modeval & 3;
|
|
mode = 4;
|
|
}
|
|
else
|
|
{
|
|
majcomp = 0;
|
|
mode = 5;
|
|
}
|
|
|
|
int red = v0 & 0x3f;
|
|
int green = v1 & 0x1f;
|
|
int blue = v2 & 0x1f;
|
|
int scale = v3 & 0x1f;
|
|
|
|
int x0 = (v1 >> 6) & 1;
|
|
int x1 = (v1 >> 5) & 1;
|
|
int x2 = (v2 >> 6) & 1;
|
|
int x3 = (v2 >> 5) & 1;
|
|
int x4 = (v3 >> 7) & 1;
|
|
int x5 = (v3 >> 6) & 1;
|
|
int x6 = (v3 >> 5) & 1;
|
|
|
|
int ohm = 1 << mode;
|
|
if (ohm & 0x30) green |= x0 << 6;
|
|
if (ohm & 0x3A) green |= x1 << 5;
|
|
if (ohm & 0x30) blue |= x2 << 6;
|
|
if (ohm & 0x3A) blue |= x3 << 5;
|
|
if (ohm & 0x3D) scale |= x6 << 5;
|
|
if (ohm & 0x2D) scale |= x5 << 6;
|
|
if (ohm & 0x04) scale |= x4 << 7;
|
|
if (ohm & 0x3B) red |= x4 << 6;
|
|
if (ohm & 0x04) red |= x3 << 6;
|
|
if (ohm & 0x10) red |= x5 << 7;
|
|
if (ohm & 0x0F) red |= x2 << 7;
|
|
if (ohm & 0x05) red |= x1 << 8;
|
|
if (ohm & 0x0A) red |= x0 << 8;
|
|
if (ohm & 0x05) red |= x0 << 9;
|
|
if (ohm & 0x02) red |= x6 << 9;
|
|
if (ohm & 0x01) red |= x3 << 10;
|
|
if (ohm & 0x02) red |= x5 << 10;
|
|
|
|
static const int s_shamts[6] = { 1,1,2,3,4,5 };
|
|
|
|
const int shamt = s_shamts[mode];
|
|
red <<= shamt;
|
|
green <<= shamt;
|
|
blue <<= shamt;
|
|
scale <<= shamt;
|
|
|
|
if (mode != 5)
|
|
{
|
|
green = red - green;
|
|
blue = red - blue;
|
|
}
|
|
|
|
if (majcomp == 1)
|
|
std::swap(red, green);
|
|
|
|
if (majcomp == 2)
|
|
std::swap(red, blue);
|
|
|
|
e1_r = clamp(red, 0, 0xFFF);
|
|
e1_g = clamp(green, 0, 0xFFF);
|
|
e1_b = clamp(blue, 0, 0xFFF);
|
|
e1_a = 0x780;
|
|
|
|
e0_r = clamp(red - scale, 0, 0xFFF);
|
|
e0_g = clamp(green - scale, 0, 0xFFF);
|
|
e0_b = clamp(blue - scale, 0, 0xFFF);
|
|
e0_a = 0x780;
|
|
|
|
break;
|
|
}
|
|
case CEM_HDR_RGB_HDR_ALPHA:
|
|
case CEM_HDR_RGB_LDR_ALPHA:
|
|
case CEM_HDR_RGB:
|
|
{
|
|
int v2 = pE[2], v3 = pE[3], v4 = pE[4], v5 = pE[5];
|
|
|
|
int majcomp = ((v4 & 0x80) >> 7) | ((v5 & 0x80) >> 6);
|
|
|
|
e0_a = 0x780;
|
|
e1_a = 0x780;
|
|
|
|
if (majcomp == 3)
|
|
{
|
|
e0_r = v0 << 4;
|
|
e0_g = v2 << 4;
|
|
e0_b = (v4 & 0x7f) << 5;
|
|
|
|
e1_r = v1 << 4;
|
|
e1_g = v3 << 4;
|
|
e1_b = (v5 & 0x7f) << 5;
|
|
}
|
|
else
|
|
{
|
|
int mode = ((v1 & 0x80) >> 7) | ((v2 & 0x80) >> 6) | ((v3 & 0x80) >> 5);
|
|
int va = v0 | ((v1 & 0x40) << 2);
|
|
int vb0 = v2 & 0x3f;
|
|
int vb1 = v3 & 0x3f;
|
|
int vc = v1 & 0x3f;
|
|
int vd0 = v4 & 0x7f;
|
|
int vd1 = v5 & 0x7f;
|
|
|
|
static const int s_dbitstab[8] = { 7,6,7,6,5,6,5,6 };
|
|
vd0 = sign_extend(vd0, s_dbitstab[mode]);
|
|
vd1 = sign_extend(vd1, s_dbitstab[mode]);
|
|
|
|
int x0 = (v2 >> 6) & 1;
|
|
int x1 = (v3 >> 6) & 1;
|
|
int x2 = (v4 >> 6) & 1;
|
|
int x3 = (v5 >> 6) & 1;
|
|
int x4 = (v4 >> 5) & 1;
|
|
int x5 = (v5 >> 5) & 1;
|
|
|
|
int ohm = 1 << mode;
|
|
if (ohm & 0xA4) va |= x0 << 9;
|
|
if (ohm & 0x08) va |= x2 << 9;
|
|
if (ohm & 0x50) va |= x4 << 9;
|
|
if (ohm & 0x50) va |= x5 << 10;
|
|
if (ohm & 0xA0) va |= x1 << 10;
|
|
if (ohm & 0xC0) va |= x2 << 11;
|
|
if (ohm & 0x04) vc |= x1 << 6;
|
|
if (ohm & 0xE8) vc |= x3 << 6;
|
|
if (ohm & 0x20) vc |= x2 << 7;
|
|
if (ohm & 0x5B) vb0 |= x0 << 6;
|
|
if (ohm & 0x5B) vb1 |= x1 << 6;
|
|
if (ohm & 0x12) vb0 |= x2 << 7;
|
|
if (ohm & 0x12) vb1 |= x3 << 7;
|
|
|
|
int shamt = (mode >> 1) ^ 3;
|
|
va = (uint32_t)va << shamt;
|
|
vb0 = (uint32_t)vb0 << shamt;
|
|
vb1 = (uint32_t)vb1 << shamt;
|
|
vc = (uint32_t)vc << shamt;
|
|
vd0 = (uint32_t)vd0 << shamt;
|
|
vd1 = (uint32_t)vd1 << shamt;
|
|
|
|
e1_r = clamp(va, 0, 0xFFF);
|
|
e1_g = clamp(va - vb0, 0, 0xFFF);
|
|
e1_b = clamp(va - vb1, 0, 0xFFF);
|
|
|
|
e0_r = clamp(va - vc, 0, 0xFFF);
|
|
e0_g = clamp(va - vb0 - vc - vd0, 0, 0xFFF);
|
|
e0_b = clamp(va - vb1 - vc - vd1, 0, 0xFFF);
|
|
|
|
if (majcomp == 1)
|
|
{
|
|
std::swap(e0_r, e0_g);
|
|
std::swap(e1_r, e1_g);
|
|
}
|
|
else if (majcomp == 2)
|
|
{
|
|
std::swap(e0_r, e0_b);
|
|
std::swap(e1_r, e1_b);
|
|
}
|
|
}
|
|
|
|
if (cem_index == CEM_HDR_RGB_LDR_ALPHA)
|
|
{
|
|
int v6 = pE[6], v7 = pE[7];
|
|
|
|
e0_a = v6;
|
|
e1_a = v7;
|
|
}
|
|
else if (cem_index == CEM_HDR_RGB_HDR_ALPHA)
|
|
{
|
|
int v6 = pE[6], v7 = pE[7];
|
|
|
|
// Extract mode bits
|
|
int mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
|
|
v6 &= 0x7F;
|
|
v7 &= 0x7F;
|
|
|
|
if (mode == 3)
|
|
{
|
|
e0_a = v6 << 5;
|
|
e1_a = v7 << 5;
|
|
}
|
|
else
|
|
{
|
|
v6 |= (v7 << (mode + 1)) & 0x780;
|
|
v7 &= (0x3F >> mode);
|
|
v7 ^= (0x20 >> mode);
|
|
v7 -= (0x20 >> mode);
|
|
|
|
//v6 <<= (4 - mode); // undefined behavior if neg
|
|
v6 = ((uint32_t)v6) << (4 - mode);
|
|
|
|
//v7 <<= (4 - mode); // undefined behavior if neg
|
|
v7 = ((uint32_t)v7) << (4 - mode);
|
|
|
|
v7 += v6;
|
|
v7 = clamp(v7, 0, 0xFFF);
|
|
e0_a = v6;
|
|
e1_a = v7;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
assert(0);
|
|
for (uint32_t c = 0; c < 4; c++)
|
|
{
|
|
pEndpoints[c][0] = 0;
|
|
pEndpoints[c][1] = 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline bool is_half_inf_or_nan(half_float v)
|
|
{
|
|
return get_bits(v, 10, 14) == 31;
|
|
}
|
|
|
|
// This float->half conversion matches how "F32TO16" works on Intel GPU's.
|
|
half_float float_to_half(float val, bool toward_zero)
|
|
{
|
|
union { float f; int32_t i; uint32_t u; } fi = { val };
|
|
const int flt_m = fi.i & 0x7FFFFF, flt_e = (fi.i >> 23) & 0xFF, flt_s = (fi.i >> 31) & 0x1;
|
|
int s = flt_s, e = 0, m = 0;
|
|
|
|
// inf/NaN
|
|
if (flt_e == 0xff)
|
|
{
|
|
e = 31;
|
|
if (flt_m != 0) // NaN
|
|
m = 1;
|
|
}
|
|
// not zero or denormal
|
|
else if (flt_e != 0)
|
|
{
|
|
int new_exp = flt_e - 127;
|
|
if (new_exp > 15)
|
|
e = 31;
|
|
else if (new_exp < -14)
|
|
{
|
|
if (toward_zero)
|
|
m = (int)truncf((1 << 24) * fabsf(fi.f));
|
|
else
|
|
m = (int)lrintf((1 << 24) * fabsf(fi.f));
|
|
}
|
|
else
|
|
{
|
|
e = new_exp + 15;
|
|
if (toward_zero)
|
|
m = (int)truncf((float)flt_m * (1.0f / (float)(1 << 13)));
|
|
else
|
|
m = (int)lrintf((float)flt_m * (1.0f / (float)(1 << 13)));
|
|
}
|
|
}
|
|
|
|
assert((0 <= m) && (m <= 1024));
|
|
if (m == 1024)
|
|
{
|
|
e++;
|
|
m = 0;
|
|
}
|
|
|
|
assert((s >= 0) && (s <= 1));
|
|
assert((e >= 0) && (e <= 31));
|
|
assert((m >= 0) && (m <= 1023));
|
|
|
|
half_float result = (half_float)((s << 15) | (e << 10) | m);
|
|
return result;
|
|
}
|
|
|
|
float half_to_float(half_float hval)
|
|
{
|
|
union { float f; uint32_t u; } x = { 0 };
|
|
|
|
uint32_t s = ((uint32_t)hval >> 15) & 1;
|
|
uint32_t e = ((uint32_t)hval >> 10) & 0x1F;
|
|
uint32_t m = (uint32_t)hval & 0x3FF;
|
|
|
|
if (!e)
|
|
{
|
|
if (!m)
|
|
{
|
|
// +- 0
|
|
x.u = s << 31;
|
|
return x.f;
|
|
}
|
|
else
|
|
{
|
|
// denormalized
|
|
while (!(m & 0x00000400))
|
|
{
|
|
m <<= 1;
|
|
--e;
|
|
}
|
|
|
|
++e;
|
|
m &= ~0x00000400;
|
|
}
|
|
}
|
|
else if (e == 31)
|
|
{
|
|
if (m == 0)
|
|
{
|
|
// +/- INF
|
|
x.u = (s << 31) | 0x7f800000;
|
|
return x.f;
|
|
}
|
|
else
|
|
{
|
|
// +/- NaN
|
|
x.u = (s << 31) | 0x7f800000 | (m << 13);
|
|
return x.f;
|
|
}
|
|
}
|
|
|
|
e = e + (127 - 15);
|
|
m = m << 13;
|
|
|
|
assert(s <= 1);
|
|
assert(m <= 0x7FFFFF);
|
|
assert(e <= 255);
|
|
|
|
x.u = m | (e << 23) | (s << 31);
|
|
return x.f;
|
|
}
|
|
|
|
// See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
|
|
const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31;
|
|
const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS);
|
|
const int RGB9E5_MANTISSA_VALUES = (1 << RGB9E5_MANTISSA_BITS);
|
|
const int MAX_RGB9E5_MANTISSA = (RGB9E5_MANTISSA_VALUES - 1);
|
|
//const int MAX_RGB9E5 = (int)(((float)MAX_RGB9E5_MANTISSA) / RGB9E5_MANTISSA_VALUES * (1 << MAX_RGB9E5_EXP));
|
|
const int EPSILON_RGB9E5 = (int)((1.0f / (float)RGB9E5_MANTISSA_VALUES) / (float)(1 << RGB9E5_EXP_BIAS));
|
|
|
|
void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b)
|
|
{
|
|
int x = packed & 511;
|
|
int y = (packed >> 9) & 511;
|
|
int z = (packed >> 18) & 511;
|
|
int w = (packed >> 27) & 31;
|
|
|
|
const float scale = powf(2.0f, static_cast<float>(w - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
|
|
|
|
r = x * scale;
|
|
g = y * scale;
|
|
b = z * scale;
|
|
}
|
|
|
|
// floor_log2 is not correct for the denorm and zero values, but we are going to do a max of this value with the minimum rgb9e5 exponent that will hide these problem cases.
|
|
static inline int floor_log2(float x)
|
|
{
|
|
union float754
|
|
{
|
|
unsigned int raw;
|
|
float value;
|
|
};
|
|
|
|
float754 f;
|
|
f.value = x;
|
|
// Extract float exponent
|
|
return ((f.raw >> 23) & 0xFF) - 127;
|
|
}
|
|
|
|
static inline int maximumi(int a, int b) { return (a > b) ? a : b; }
|
|
static inline float maximumf(float a, float b) { return (a > b) ? a : b; }
|
|
|
|
uint32_t pack_rgb9e5(float r, float g, float b)
|
|
{
|
|
r = clampf(r, 0.0f, MAX_RGB9E5);
|
|
g = clampf(g, 0.0f, MAX_RGB9E5);
|
|
b = clampf(b, 0.0f, MAX_RGB9E5);
|
|
|
|
float maxrgb = maximumf(maximumf(r, g), b);
|
|
int exp_shared = maximumi(-RGB9E5_EXP_BIAS - 1, floor_log2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
|
|
assert((exp_shared >= 0) && (exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP));
|
|
|
|
float denom = powf(2.0f, (float)(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS));
|
|
|
|
int maxm = (int)floorf((maxrgb / denom) + 0.5f);
|
|
if (maxm == (MAX_RGB9E5_MANTISSA + 1))
|
|
{
|
|
denom *= 2;
|
|
exp_shared += 1;
|
|
assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
|
|
}
|
|
else
|
|
{
|
|
assert(maxm <= MAX_RGB9E5_MANTISSA);
|
|
}
|
|
|
|
int rm = (int)floorf((r / denom) + 0.5f);
|
|
int gm = (int)floorf((g / denom) + 0.5f);
|
|
int bm = (int)floorf((b / denom) + 0.5f);
|
|
|
|
assert((rm >= 0) && (rm <= MAX_RGB9E5_MANTISSA));
|
|
assert((gm >= 0) && (gm <= MAX_RGB9E5_MANTISSA));
|
|
assert((bm >= 0) && (bm <= MAX_RGB9E5_MANTISSA));
|
|
|
|
return rm | (gm << 9) | (bm << 18) | (exp_shared << 27);
|
|
}
|
|
|
|
static inline int clz17(uint32_t x)
|
|
{
|
|
assert(x <= 0x1FFFF);
|
|
x &= 0x1FFFF;
|
|
|
|
if (!x)
|
|
return 17;
|
|
|
|
uint32_t n = 0;
|
|
while ((x & 0x10000) == 0)
|
|
{
|
|
x <<= 1u;
|
|
n++;
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
static inline uint32_t pack_rgb9e5_ldr_astc(int Cr, int Cg, int Cb)
|
|
{
|
|
int lz = clz17(Cr | Cg | Cb | 1);
|
|
if (Cr == 65535) { Cr = 65536; lz = 0; }
|
|
if (Cg == 65535) { Cg = 65536; lz = 0; }
|
|
if (Cb == 65535) { Cb = 65536; lz = 0; }
|
|
Cr <<= lz; Cg <<= lz; Cb <<= lz;
|
|
Cr = (Cr >> 8) & 0x1FF;
|
|
Cg = (Cg >> 8) & 0x1FF;
|
|
Cb = (Cb >> 8) & 0x1FF;
|
|
uint32_t exponent = 16 - lz;
|
|
uint32_t texel = (exponent << 27) | (Cb << 18) | (Cg << 9) | Cr;
|
|
return texel;
|
|
}
|
|
|
|
static inline uint32_t pack_rgb9e5_hdr_astc(int Cr, int Cg, int Cb)
|
|
{
|
|
if (Cr > 0x7c00) Cr = 0; else if (Cr == 0x7c00) Cr = 0x7bff;
|
|
if (Cg > 0x7c00) Cg = 0; else if (Cg == 0x7c00) Cg = 0x7bff;
|
|
if (Cb > 0x7c00) Cb = 0; else if (Cb == 0x7c00) Cb = 0x7bff;
|
|
int Re = (Cr >> 10) & 0x1F;
|
|
int Ge = (Cg >> 10) & 0x1F;
|
|
int Be = (Cb >> 10) & 0x1F;
|
|
int Rex = (Re == 0) ? 1 : Re;
|
|
int Gex = (Ge == 0) ? 1 : Ge;
|
|
int Bex = (Be == 0) ? 1 : Be;
|
|
int Xm = ((Cr | Cg | Cb) & 0x200) >> 9;
|
|
int Xe = Re | Ge | Be;
|
|
uint32_t rshift, gshift, bshift, expo;
|
|
|
|
if (Xe == 0)
|
|
{
|
|
expo = rshift = gshift = bshift = Xm;
|
|
}
|
|
else if (Re >= Ge && Re >= Be)
|
|
{
|
|
expo = Rex + 1;
|
|
rshift = 2;
|
|
gshift = Rex - Gex + 2;
|
|
bshift = Rex - Bex + 2;
|
|
}
|
|
else if (Ge >= Be)
|
|
{
|
|
expo = Gex + 1;
|
|
rshift = Gex - Rex + 2;
|
|
gshift = 2;
|
|
bshift = Gex - Bex + 2;
|
|
}
|
|
else
|
|
{
|
|
expo = Bex + 1;
|
|
rshift = Bex - Rex + 2;
|
|
gshift = Bex - Gex + 2;
|
|
bshift = 2;
|
|
}
|
|
|
|
int Rm = (Cr & 0x3FF) | (Re == 0 ? 0 : 0x400);
|
|
int Gm = (Cg & 0x3FF) | (Ge == 0 ? 0 : 0x400);
|
|
int Bm = (Cb & 0x3FF) | (Be == 0 ? 0 : 0x400);
|
|
Rm = (Rm >> rshift) & 0x1FF;
|
|
Gm = (Gm >> gshift) & 0x1FF;
|
|
Bm = (Bm >> bshift) & 0x1FF;
|
|
|
|
uint32_t texel = (expo << 27) | (Bm << 18) | (Gm << 9) | (Rm << 0);
|
|
return texel;
|
|
}
|
|
|
|
static void write_error_block(void* pPixels, uint32_t num_blk_pixels, decode_mode dec_mode)
|
|
{
|
|
// Write block error color
|
|
if (dec_mode == cDecodeModeHDR16)
|
|
{
|
|
// NaN's
|
|
memset(pPixels, 0xFF, num_blk_pixels * sizeof(half_float) * 4);
|
|
}
|
|
else if (dec_mode == cDecodeModeRGB9E5)
|
|
{
|
|
const uint32_t purple_9e5 = pack_rgb9e5(1.0f, 0.0f, 1.0f);
|
|
|
|
for (uint32_t i = 0; i < num_blk_pixels; i++)
|
|
((uint32_t*)pPixels)[i] = purple_9e5;
|
|
}
|
|
else
|
|
{
|
|
for (uint32_t i = 0; i < num_blk_pixels; i++)
|
|
((uint32_t*)pPixels)[i] = 0xFFFF00FF;
|
|
}
|
|
}
|
|
|
|
// Important: pPixels is either 32-bit/texel or 64-bit/texel.
|
|
bool decode_block(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode)
|
|
{
|
|
assert(is_valid_block_size(blk_width, blk_height));
|
|
|
|
// Basic sanity checking
|
|
if (!log_blk.m_dual_plane)
|
|
{
|
|
assert(log_blk.m_color_component_selector == 0);
|
|
}
|
|
else
|
|
{
|
|
assert(log_blk.m_color_component_selector <= 3);
|
|
}
|
|
|
|
assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size());
|
|
if (!g_dequant_tables.m_endpoints[0].m_ISE_to_val.size())
|
|
return false;
|
|
|
|
const uint32_t num_blk_pixels = blk_width * blk_height;
|
|
|
|
if (log_blk.m_error_flag)
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
// Should this return false? It's not an invalid logical block config, though.
|
|
return false;
|
|
}
|
|
|
|
// Handle solid color blocks
|
|
if (log_blk.m_solid_color_flag_ldr)
|
|
{
|
|
// LDR solid block
|
|
if (dec_mode == cDecodeModeHDR16)
|
|
{
|
|
// Convert LDR pixels to half-float
|
|
half_float h[4];
|
|
for (uint32_t c = 0; c < 4; c++)
|
|
h[c] = (log_blk.m_solid_color[c] == 0xFFFF) ? 0x3C00 : float_to_half((float)log_blk.m_solid_color[c] * (1.0f / 65536.0f), true);
|
|
|
|
for (uint32_t i = 0; i < num_blk_pixels; i++)
|
|
memcpy((uint16_t*)pPixels + i * 4, h, sizeof(half_float) * 4);
|
|
}
|
|
else if (dec_mode == cDecodeModeRGB9E5)
|
|
{
|
|
float r = (log_blk.m_solid_color[0] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[0] * (1.0f / 65536.0f));
|
|
float g = (log_blk.m_solid_color[1] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[1] * (1.0f / 65536.0f));
|
|
float b = (log_blk.m_solid_color[2] == 0xFFFF) ? 1.0f : ((float)log_blk.m_solid_color[2] * (1.0f / 65536.0f));
|
|
|
|
const uint32_t packed = pack_rgb9e5(r, g, b);
|
|
|
|
for (uint32_t i = 0; i < num_blk_pixels; i++)
|
|
((uint32_t*)pPixels)[i] = packed;
|
|
}
|
|
else
|
|
{
|
|
// Convert LDR pixels to 8-bits
|
|
for (uint32_t i = 0; i < num_blk_pixels; i++)
|
|
for (uint32_t c = 0; c < 4; c++)
|
|
((uint8_t*)pPixels)[i * 4 + c] = (log_blk.m_solid_color[c] >> 8);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
else if (log_blk.m_solid_color_flag_hdr)
|
|
{
|
|
// HDR solid block, decode mode must be half-float or RGB9E5
|
|
if (dec_mode == cDecodeModeHDR16)
|
|
{
|
|
for (uint32_t i = 0; i < num_blk_pixels; i++)
|
|
memcpy((uint16_t*)pPixels + i * 4, log_blk.m_solid_color, sizeof(half_float) * 4);
|
|
}
|
|
else if (dec_mode == cDecodeModeRGB9E5)
|
|
{
|
|
float r = half_to_float(log_blk.m_solid_color[0]);
|
|
float g = half_to_float(log_blk.m_solid_color[1]);
|
|
float b = half_to_float(log_blk.m_solid_color[2]);
|
|
|
|
const uint32_t packed = pack_rgb9e5(r, g, b);
|
|
|
|
for (uint32_t i = 0; i < num_blk_pixels; i++)
|
|
((uint32_t*)pPixels)[i] = packed;
|
|
}
|
|
else
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Sanity check block's config
|
|
if ((log_blk.m_grid_width < 2) || (log_blk.m_grid_height < 2))
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
if ((log_blk.m_endpoint_ise_range < FIRST_VALID_ENDPOINT_ISE_RANGE) || (log_blk.m_endpoint_ise_range > LAST_VALID_ENDPOINT_ISE_RANGE))
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
if ((log_blk.m_weight_ise_range < FIRST_VALID_WEIGHT_ISE_RANGE) || (log_blk.m_weight_ise_range > LAST_VALID_WEIGHT_ISE_RANGE))
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
if ((log_blk.m_num_partitions < 1) || (log_blk.m_num_partitions > MAX_PARTITIONS))
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > MAX_DUAL_PLANE_PARTITIONS))
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
if (log_blk.m_partition_id >= NUM_PARTITION_PATTERNS)
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
if ((log_blk.m_num_partitions == 1) && (log_blk.m_partition_id > 0))
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
if (log_blk.m_color_component_selector > 3)
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
const uint32_t total_endpoint_levels = get_ise_levels(log_blk.m_endpoint_ise_range);
|
|
const uint32_t total_weight_levels = get_ise_levels(log_blk.m_weight_ise_range);
|
|
|
|
bool is_ldr_endpoints[MAX_PARTITIONS];
|
|
|
|
// Check CEM's
|
|
uint32_t total_cem_vals = 0;
|
|
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
|
|
{
|
|
if (log_blk.m_color_endpoint_modes[i] > 15)
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[i]);
|
|
|
|
is_ldr_endpoints[i] = is_cem_ldr(log_blk.m_color_endpoint_modes[i]);
|
|
}
|
|
|
|
if (total_cem_vals > MAX_ENDPOINTS)
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range);
|
|
const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data();
|
|
|
|
// Dequantized endpoints to [0,255]
|
|
uint8_t dequantized_endpoints[MAX_ENDPOINTS];
|
|
for (uint32_t i = 0; i < total_cem_vals; i++)
|
|
{
|
|
if (log_blk.m_endpoints[i] >= total_endpoint_levels)
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]];
|
|
}
|
|
|
|
// Dequantize weights to [0,64]
|
|
uint8_t dequantized_weights[2][12 * 12];
|
|
|
|
const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range);
|
|
const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data();
|
|
|
|
const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height;
|
|
for (uint32_t i = 0; i < total_weight_vals; i++)
|
|
{
|
|
if (log_blk.m_weights[i] >= total_weight_levels)
|
|
{
|
|
write_error_block(pPixels, num_blk_pixels, dec_mode);
|
|
return false;
|
|
}
|
|
|
|
const uint32_t plane_index = log_blk.m_dual_plane ? (i & 1) : 0;
|
|
const uint32_t grid_index = log_blk.m_dual_plane ? (i >> 1) : i;
|
|
|
|
dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]];
|
|
}
|
|
|
|
// Upsample weight grid. [0,64] weights
|
|
uint8_t upsampled_weights[2][12 * 12];
|
|
|
|
upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[0][0], &upsampled_weights[0][0]);
|
|
if (log_blk.m_dual_plane)
|
|
upsample_weight_grid(blk_width, blk_height, log_blk.m_grid_width, log_blk.m_grid_height, &dequantized_weights[1][0], &upsampled_weights[1][0]);
|
|
|
|
// Decode CEM's
|
|
int endpoints[4][4][2]; // [subset][comp][l/h]
|
|
|
|
uint32_t endpoint_val_index = 0;
|
|
for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)
|
|
{
|
|
const uint32_t cem_index = log_blk.m_color_endpoint_modes[subset];
|
|
|
|
decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]);
|
|
|
|
endpoint_val_index += get_num_cem_values(cem_index);
|
|
}
|
|
|
|
// Decode texels
|
|
const bool small_block = num_blk_pixels < 31;
|
|
const bool use_precomputed_texel_partitions = (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
|
|
const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;
|
|
|
|
bool success = true;
|
|
|
|
if (dec_mode == cDecodeModeRGB9E5)
|
|
{
|
|
// returns uint32_t's
|
|
for (uint32_t y = 0; y < blk_height; y++)
|
|
{
|
|
for (uint32_t x = 0; x < blk_width; x++)
|
|
{
|
|
const uint32_t pixel_index = x + y * blk_width;
|
|
|
|
uint32_t subset = 0;
|
|
if (log_blk.m_num_partitions > 1)
|
|
{
|
|
if (use_precomputed_texel_partitions)
|
|
{
|
|
subset = get_precomputed_texel_partition(blk_width, blk_height, log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
|
|
//assert((int)subset == compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)); // extra paranoia
|
|
}
|
|
else
|
|
subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
|
|
}
|
|
|
|
int comp[3];
|
|
|
|
for (uint32_t c = 0; c < 3; c++)
|
|
{
|
|
const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
|
|
|
|
if (is_ldr_endpoints[subset])
|
|
{
|
|
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
|
|
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
|
|
|
|
int le = endpoints[subset][c][0];
|
|
int he = endpoints[subset][c][1];
|
|
|
|
le = (le << 8) | le;
|
|
he = (he << 8) | he;
|
|
|
|
int k = weight_interpolate(le, he, w);
|
|
assert((k >= 0) && (k <= 0xFFFF));
|
|
|
|
comp[c] = k; // 1.0
|
|
}
|
|
else
|
|
{
|
|
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
|
|
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
|
|
|
|
int le = endpoints[subset][c][0] << 4;
|
|
int he = endpoints[subset][c][1] << 4;
|
|
|
|
int qlog16 = weight_interpolate(le, he, w);
|
|
|
|
comp[c] = qlog16_to_half(qlog16);
|
|
|
|
if (is_half_inf_or_nan((half_float)comp[c]))
|
|
comp[c] = 0x7BFF;
|
|
}
|
|
|
|
} // c
|
|
|
|
uint32_t packed;
|
|
if (is_ldr_endpoints[subset])
|
|
packed = pack_rgb9e5_ldr_astc(comp[0], comp[1], comp[2]);
|
|
else
|
|
packed = pack_rgb9e5_hdr_astc(comp[0], comp[1], comp[2]);
|
|
|
|
((uint32_t*)pPixels)[pixel_index] = packed;
|
|
|
|
} // x
|
|
} // y
|
|
}
|
|
else if (dec_mode == cDecodeModeHDR16)
|
|
{
|
|
// Note: must round towards zero when converting float to half for ASTC (18.19 Weight Application)
|
|
|
|
// returns half floats
|
|
for (uint32_t y = 0; y < blk_height; y++)
|
|
{
|
|
for (uint32_t x = 0; x < blk_width; x++)
|
|
{
|
|
const uint32_t pixel_index = x + y * blk_width;
|
|
|
|
uint32_t subset = 0;
|
|
if (log_blk.m_num_partitions > 1)
|
|
{
|
|
if (use_precomputed_texel_partitions)
|
|
{
|
|
subset = get_precomputed_texel_partition(blk_width, blk_height, log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
|
|
//assert((int)subset == compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)); // extra paranoia
|
|
}
|
|
else
|
|
subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
|
|
}
|
|
|
|
for (uint32_t c = 0; c < 4; c++)
|
|
{
|
|
const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
|
|
|
|
half_float o;
|
|
|
|
if ( (is_ldr_endpoints[subset]) ||
|
|
((log_blk.m_color_endpoint_modes[subset] == CEM_HDR_RGB_LDR_ALPHA) && (c == 3)) )
|
|
{
|
|
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFF));
|
|
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFF));
|
|
|
|
int le = endpoints[subset][c][0];
|
|
int he = endpoints[subset][c][1];
|
|
|
|
le = (le << 8) | le;
|
|
he = (he << 8) | he;
|
|
|
|
int k = weight_interpolate(le, he, w);
|
|
assert((k >= 0) && (k <= 0xFFFF));
|
|
|
|
if (k == 0xFFFF)
|
|
o = 0x3C00; // 1.0
|
|
else
|
|
o = float_to_half((float)k * (1.0f / 65536.0f), true);
|
|
}
|
|
else
|
|
{
|
|
assert((endpoints[subset][c][0] >= 0) && (endpoints[subset][c][0] <= 0xFFF));
|
|
assert((endpoints[subset][c][1] >= 0) && (endpoints[subset][c][1] <= 0xFFF));
|
|
|
|
int le = endpoints[subset][c][0] << 4;
|
|
int he = endpoints[subset][c][1] << 4;
|
|
|
|
int qlog16 = weight_interpolate(le, he, w);
|
|
|
|
o = qlog16_to_half(qlog16);
|
|
|
|
if (is_half_inf_or_nan(o))
|
|
o = 0x7BFF;
|
|
}
|
|
|
|
((half_float*)pPixels)[pixel_index * 4 + c] = o;
|
|
}
|
|
|
|
} // x
|
|
} // y
|
|
}
|
|
else
|
|
{
|
|
// returns uint8_t's
|
|
for (uint32_t y = 0; y < blk_height; y++)
|
|
{
|
|
for (uint32_t x = 0; x < blk_width; x++)
|
|
{
|
|
const uint32_t pixel_index = x + y * blk_width;
|
|
|
|
uint32_t subset = 0;
|
|
if (log_blk.m_num_partitions > 1)
|
|
{
|
|
if (use_precomputed_texel_partitions)
|
|
{
|
|
subset = get_precomputed_texel_partition(blk_width, blk_height, log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
|
|
//assert((int)subset == compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block)); // extra paranoia
|
|
}
|
|
else
|
|
subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
|
|
}
|
|
|
|
if (!is_ldr_endpoints[subset])
|
|
{
|
|
((uint32_t*)pPixels)[pixel_index] = 0xFFFF00FF;
|
|
success = false;
|
|
}
|
|
else
|
|
{
|
|
for (uint32_t c = 0; c < 4; c++)
|
|
{
|
|
const uint32_t w = upsampled_weights[(c == ccs) ? 1 : 0][pixel_index];
|
|
|
|
int le = endpoints[subset][c][0];
|
|
int he = endpoints[subset][c][1];
|
|
|
|
// FIXME: the spec is apparently wrong? this matches ARM's and Google's decoder
|
|
//if ((dec_mode == cDecodeModeSRGB8) && (c <= 2))
|
|
// See https://github.com/ARM-software/astc-encoder/issues/447
|
|
// See latest spec with recent (2023-2024) fixes:
|
|
// https://raw.githubusercontent.com/KhronosGroup/DataFormat/refs/heads/main/astc.txt
|
|
// "For _LDR endpoint modes_, each color component C is calculated from the corresponding 8 - bit endpoint components C~0~and C~1~as follows" - does this mean alpha too? I guess so. (8/15/2025.)
|
|
|
|
// 2/22/2026: See ARM errata 3922301 "ASTC decompression incorrectly rounds linear color endpoints when using unorm8 decode mode". (We currently always assume unorm8 decode mode.)
|
|
// Our ASTC/XUASTC encoders default to the sRGB decode profile, not linear, so at least our default behavior isn't impacted by this.
|
|
// https://documentation-service.arm.com/static/67ca1a5ece2747241fced502?utm_source=chatgpt.com
|
|
if (dec_mode == cDecodeModeSRGB8)
|
|
{
|
|
le = (le << 8) | 0x80;
|
|
he = (he << 8) | 0x80;
|
|
}
|
|
else
|
|
{
|
|
le = (le << 8) | le;
|
|
he = (he << 8) | he;
|
|
}
|
|
|
|
uint32_t k = weight_interpolate(le, he, w);
|
|
|
|
// FIXME (old comment - before 2023/2024 ARM etc. spec fixes): This is what the spec says to do in LDR mode, but this is not what ARM's decoder does
|
|
// See decompress_symbolic_block(), decode_texel() and unorm16_to_sf16.
|
|
// It seems to effectively divide by 65535.0 and convert to FP16, then back to float, mul by 255.0, add .5 and then convert to 8-bit.
|
|
((uint8_t*)pPixels)[pixel_index * 4 + c] = (uint8_t)(k >> 8);
|
|
}
|
|
}
|
|
|
|
} // x
|
|
} // y
|
|
}
|
|
|
|
return success;
|
|
}
|
|
|
|
bool is_block_xuastc_ldr(const log_astc_block& log_blk)
|
|
{
|
|
if (log_blk.m_error_flag)
|
|
return false;
|
|
|
|
if (log_blk.m_solid_color_flag_ldr)
|
|
return true;
|
|
|
|
if (log_blk.m_solid_color_flag_hdr)
|
|
return false;
|
|
|
|
if (log_blk.m_num_partitions > 3)
|
|
return false;
|
|
|
|
if ((log_blk.m_dual_plane) && (log_blk.m_num_partitions > 1))
|
|
return false;
|
|
|
|
// TODO: Check partition pattern ID against unique set.
|
|
|
|
for (uint32_t i = 1; i < log_blk.m_num_partitions; i++)
|
|
if (log_blk.m_color_endpoint_modes[0] != log_blk.m_color_endpoint_modes[i])
|
|
return false;
|
|
|
|
switch (log_blk.m_color_endpoint_modes[0])
|
|
{
|
|
case CEM_LDR_LUM_DIRECT:
|
|
case CEM_LDR_LUM_ALPHA_DIRECT:
|
|
case CEM_LDR_RGB_BASE_SCALE:
|
|
case CEM_LDR_RGB_DIRECT:
|
|
case CEM_LDR_RGB_BASE_PLUS_OFFSET:
|
|
case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A:
|
|
case CEM_LDR_RGBA_DIRECT:
|
|
case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
|
|
{
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// ~2x faster than decode_block(), but XUASTC LDR only.
|
|
// pUpsampled_weights_to_use must be at block res, [0,64], single plane blocks ONLY
|
|
bool decode_block_xuastc_ldr(const log_astc_block& log_blk, void* pPixels, uint32_t blk_width, uint32_t blk_height, decode_mode dec_mode,
|
|
const uint8_t* pUpsampled_weights_to_use, uint32_t start_x, uint32_t start_y, uint32_t end_x, uint32_t end_y)
|
|
{
|
|
if (!end_x)
|
|
end_x = blk_width;
|
|
|
|
if (!end_y)
|
|
end_y = blk_height;
|
|
|
|
assert(start_x < end_x);
|
|
assert(start_y < end_y);
|
|
assert(end_x <= blk_width);
|
|
assert(end_y <= blk_height);
|
|
|
|
assert(g_dequant_tables.m_endpoints[0].m_ISE_to_val.size());
|
|
assert((dec_mode == cDecodeModeSRGB8) || (dec_mode == cDecodeModeLDR8));
|
|
assert(is_valid_block_size(blk_width, blk_height));
|
|
assert(!log_blk.m_error_flag && !log_blk.m_solid_color_flag_hdr);
|
|
|
|
if (!log_blk.m_solid_color_flag_ldr)
|
|
{
|
|
assert(((log_blk.m_num_partitions >= 1) && (log_blk.m_num_partitions <= 3)));
|
|
assert((log_blk.m_grid_width >= 2) & (log_blk.m_grid_height >= 2));
|
|
assert((log_blk.m_grid_width <= blk_width) && (log_blk.m_grid_height <= blk_height));
|
|
assert((log_blk.m_grid_width * log_blk.m_grid_height) <= MAX_GRID_WEIGHTS);
|
|
assert((log_blk.m_num_partitions > 1) || (log_blk.m_partition_id == 0));
|
|
}
|
|
|
|
assert(is_block_xuastc_ldr(log_blk));
|
|
|
|
const uint32_t num_blk_pixels = blk_width * blk_height;
|
|
|
|
// Handle solid color blocks
|
|
if (log_blk.m_solid_color_flag_ldr)
|
|
{
|
|
// Convert LDR pixels to 8-bits
|
|
uint32_t x;
|
|
|
|
((uint8_t*)&x)[0] = (uint8_t)(log_blk.m_solid_color[0] >> 8);
|
|
((uint8_t*)&x)[1] = (uint8_t)(log_blk.m_solid_color[1] >> 8);
|
|
((uint8_t*)&x)[2] = (uint8_t)(log_blk.m_solid_color[2] >> 8);
|
|
((uint8_t*)&x)[3] = (uint8_t)(log_blk.m_solid_color[3] >> 8);
|
|
|
|
uint32_t* pDst = (uint32_t*)pPixels;
|
|
|
|
uint32_t i = 0;
|
|
while ((i + 3) < num_blk_pixels)
|
|
{
|
|
pDst[i] = x;
|
|
pDst[i + 1] = x;
|
|
pDst[i + 2] = x;
|
|
pDst[i + 3] = x;
|
|
|
|
i += 4;
|
|
}
|
|
|
|
while (i < num_blk_pixels)
|
|
pDst[i++] = x;
|
|
|
|
return true;
|
|
}
|
|
|
|
const dequant_table& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(log_blk.m_endpoint_ise_range);
|
|
const uint8_t* pEndpoint_dequant = endpoint_dequant_tab.m_ISE_to_val.data();
|
|
|
|
const dequant_table& weight_dequant_tab = g_dequant_tables.get_weight_tab(log_blk.m_weight_ise_range);
|
|
const uint8_t* pWeight_dequant = weight_dequant_tab.m_ISE_to_val.data();
|
|
|
|
// Check CEM's
|
|
const uint32_t num_cem_vals = get_num_cem_values(log_blk.m_color_endpoint_modes[0]);
|
|
const uint32_t total_cem_vals = num_cem_vals * log_blk.m_num_partitions;
|
|
|
|
assert(total_cem_vals <= MAX_ENDPOINTS);
|
|
|
|
// Dequantized endpoints to [0,255]
|
|
uint8_t dequantized_endpoints[MAX_ENDPOINTS];
|
|
|
|
for (uint32_t i = 0; i < total_cem_vals; i++)
|
|
{
|
|
assert(log_blk.m_endpoints[i] < endpoint_dequant_tab.m_ISE_to_val.size_u32());
|
|
dequantized_endpoints[i] = pEndpoint_dequant[log_blk.m_endpoints[i]];
|
|
}
|
|
|
|
// Decode CEM's
|
|
int endpoints[4][4][2]; // [subset][comp][l/h]
|
|
|
|
uint32_t endpoint_val_index = 0;
|
|
const uint32_t cem_index = log_blk.m_color_endpoint_modes[0];
|
|
|
|
uint32_t alpha_mask = 0xFF;
|
|
|
|
for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)
|
|
{
|
|
assert(log_blk.m_color_endpoint_modes[subset] == cem_index);
|
|
|
|
decode_endpoint(cem_index, &endpoints[subset][0], &dequantized_endpoints[endpoint_val_index]);
|
|
|
|
alpha_mask &= endpoints[subset][3][0];
|
|
alpha_mask &= endpoints[subset][3][1];
|
|
|
|
endpoint_val_index += num_cem_vals;
|
|
}
|
|
|
|
const bool any_alpha = alpha_mask != 255;
|
|
|
|
// Dequantize weights to [0,64]
|
|
uint8_t upsampled_weights[2][12 * 12];
|
|
|
|
const uint32_t total_weight_vals = (log_blk.m_dual_plane ? 2 : 1) * log_blk.m_grid_width * log_blk.m_grid_height;
|
|
|
|
// Upsample weight grid. [0,64] weights
|
|
const uint8_t(*pUpsampled_weights)[12 * 12];
|
|
|
|
uint8_t dequantized_weights[2][12 * 12];
|
|
|
|
// For simplicity, ignore any passed in weights if dual plane
|
|
if ((pUpsampled_weights_to_use) && (!log_blk.m_dual_plane))
|
|
{
|
|
// Caller is jamming in already unpacked weights for the first plane to save time
|
|
pUpsampled_weights = reinterpret_cast<const uint8_t(*)[12 * 12]>(pUpsampled_weights_to_use);
|
|
}
|
|
else
|
|
{
|
|
if (log_blk.m_dual_plane)
|
|
{
|
|
for (uint32_t i = 0; i < total_weight_vals; i++)
|
|
{
|
|
const uint32_t plane_index = i & 1;
|
|
const uint32_t grid_index = i >> 1;
|
|
|
|
assert(log_blk.m_weights[i] < weight_dequant_tab.m_ISE_to_val.size_u32());
|
|
dequantized_weights[plane_index][grid_index] = pWeight_dequant[log_blk.m_weights[i]];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (uint32_t i = 0; i < total_weight_vals; i++)
|
|
{
|
|
assert(log_blk.m_weights[i] < weight_dequant_tab.m_ISE_to_val.size_u32());
|
|
dequantized_weights[0][i] = pWeight_dequant[log_blk.m_weights[i]];
|
|
}
|
|
}
|
|
|
|
pUpsampled_weights = &dequantized_weights[0];
|
|
|
|
if ((log_blk.m_grid_width < blk_width) || (log_blk.m_grid_height < blk_height))
|
|
{
|
|
upsample_weight_grid_xuastc_ldr(blk_width, blk_height,
|
|
log_blk.m_grid_width, log_blk.m_grid_height,
|
|
&dequantized_weights[0][0], &upsampled_weights[0][0],
|
|
log_blk.m_dual_plane ? &dequantized_weights[1][0] : nullptr, log_blk.m_dual_plane ? &upsampled_weights[1][0] : nullptr);
|
|
|
|
pUpsampled_weights = &upsampled_weights[0];
|
|
}
|
|
}
|
|
|
|
// Decode texels
|
|
const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;
|
|
|
|
const uint8_t *pPart = &g_texel_partitions[log_blk.m_partition_id][0][0]; // [seed][y][x]
|
|
|
|
const bool large_block = (num_blk_pixels >= 31);
|
|
uint32_t part_shift = (log_blk.m_num_partitions == 3) ? 2 : 0;
|
|
part_shift += large_block * 4;
|
|
|
|
//uint32_t pixel_index = 0;
|
|
|
|
if (log_blk.m_num_partitions == 1)
|
|
{
|
|
// alpha, 1 subset
|
|
int le0 = endpoints[0][0][0], he0 = endpoints[0][0][1];
|
|
int le1 = endpoints[0][1][0], he1 = endpoints[0][1][1];
|
|
int le2 = endpoints[0][2][0], he2 = endpoints[0][2][1];
|
|
int le3 = endpoints[0][3][0], he3 = endpoints[0][3][1];
|
|
|
|
if (dec_mode == cDecodeModeSRGB8)
|
|
{
|
|
le0 = (le0 << 8) | 0x80; he0 = (he0 << 8) | 0x80;
|
|
le1 = (le1 << 8) | 0x80; he1 = (he1 << 8) | 0x80;
|
|
le2 = (le2 << 8) | 0x80; he2 = (he2 << 8) | 0x80;
|
|
le3 = (le3 << 8) | 0x80; he3 = (he3 << 8) | 0x80;
|
|
}
|
|
else
|
|
{
|
|
le0 = (le0 << 8) | le0; he0 = (he0 << 8) | he0;
|
|
le1 = (le1 << 8) | le1; he1 = (he1 << 8) | he1;
|
|
le2 = (le2 << 8) | le2; he2 = (he2 << 8) | he2;
|
|
le3 = (le3 << 8) | le3; he3 = (he3 << 8) | he3;
|
|
}
|
|
|
|
// no subsets
|
|
if (!any_alpha)
|
|
{
|
|
if (!log_blk.m_dual_plane)
|
|
{
|
|
for (uint32_t y = start_y; y < end_y; y++)
|
|
{
|
|
for (uint32_t x = start_x; x < end_x; x++)
|
|
{
|
|
const uint32_t pixel_index = x + y * blk_width;
|
|
|
|
const uint32_t w0 = pUpsampled_weights[0][pixel_index];
|
|
const uint32_t w1 = pUpsampled_weights[0][pixel_index];
|
|
const uint32_t w2 = pUpsampled_weights[0][pixel_index];
|
|
|
|
const uint32_t k0 = weight_interpolate(le0, he0, w0);
|
|
const uint32_t k1 = weight_interpolate(le1, he1, w1);
|
|
const uint32_t k2 = weight_interpolate(le2, he2, w2);
|
|
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 3] = 255;
|
|
} // x
|
|
} // y
|
|
}
|
|
else
|
|
{
|
|
for (uint32_t y = start_y; y < end_y; y++)
|
|
{
|
|
for (uint32_t x = start_x; x < end_x; x++)
|
|
{
|
|
const uint32_t pixel_index = x + y * blk_width;
|
|
|
|
const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index];
|
|
|
|
const uint32_t k0 = weight_interpolate(le0, he0, w0);
|
|
const uint32_t k1 = weight_interpolate(le1, he1, w1);
|
|
const uint32_t k2 = weight_interpolate(le2, he2, w2);
|
|
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 3] = 255;
|
|
} // x
|
|
} // y
|
|
}
|
|
}
|
|
else // (!any_alpha)
|
|
{
|
|
for (uint32_t y = start_y; y < end_y; y++)
|
|
{
|
|
for (uint32_t x = start_x; x < end_x; x++)
|
|
{
|
|
const uint32_t pixel_index = x + y * blk_width;
|
|
|
|
const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w3 = pUpsampled_weights[(3 == ccs) ? 1 : 0][pixel_index];
|
|
|
|
const uint32_t k0 = weight_interpolate(le0, he0, w0);
|
|
const uint32_t k1 = weight_interpolate(le1, he1, w1);
|
|
const uint32_t k2 = weight_interpolate(le2, he2, w2);
|
|
const uint32_t k3 = weight_interpolate(le3, he3, w3);
|
|
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 3] = (uint8_t)(k3 >> 8);
|
|
|
|
} // x
|
|
} // y
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (uint32_t subset = 0; subset < log_blk.m_num_partitions; subset++)
|
|
{
|
|
int le0 = endpoints[subset][0][0], he0 = endpoints[subset][0][1];
|
|
int le1 = endpoints[subset][1][0], he1 = endpoints[subset][1][1];
|
|
int le2 = endpoints[subset][2][0], he2 = endpoints[subset][2][1];
|
|
int le3 = endpoints[subset][3][0], he3 = endpoints[subset][3][1];
|
|
|
|
if (dec_mode == cDecodeModeSRGB8)
|
|
{
|
|
le0 = (le0 << 8) | 0x80; he0 = (he0 << 8) | 0x80;
|
|
le1 = (le1 << 8) | 0x80; he1 = (he1 << 8) | 0x80;
|
|
le2 = (le2 << 8) | 0x80; he2 = (he2 << 8) | 0x80;
|
|
le3 = (le3 << 8) | 0x80; he3 = (he3 << 8) | 0x80;
|
|
}
|
|
else
|
|
{
|
|
le0 = (le0 << 8) | le0; he0 = (he0 << 8) | he0;
|
|
le1 = (le1 << 8) | le1; he1 = (he1 << 8) | he1;
|
|
le2 = (le2 << 8) | le2; he2 = (he2 << 8) | he2;
|
|
le3 = (le3 << 8) | le3; he3 = (he3 << 8) | he3;
|
|
}
|
|
|
|
endpoints[subset][0][0] = le0, endpoints[subset][0][1] = he0;
|
|
endpoints[subset][1][0] = le1, endpoints[subset][1][1] = he1;
|
|
endpoints[subset][2][0] = le2, endpoints[subset][2][1] = he2;
|
|
endpoints[subset][3][0] = le3, endpoints[subset][3][1] = he3;
|
|
}
|
|
|
|
// subsets
|
|
if (!any_alpha)
|
|
{
|
|
// no alpha, sRGB
|
|
for (uint32_t y = start_y; y < end_y; y++)
|
|
{
|
|
for (uint32_t x = start_x; x < end_x; x++)
|
|
{
|
|
const uint32_t pixel_index = x + y * blk_width;
|
|
|
|
const uint32_t v = pPart[y * 12 + x];
|
|
const uint32_t subset = (v >> part_shift) & 3;
|
|
|
|
const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index];
|
|
|
|
int le0 = endpoints[subset][0][0], he0 = endpoints[subset][0][1];
|
|
int le1 = endpoints[subset][1][0], he1 = endpoints[subset][1][1];
|
|
int le2 = endpoints[subset][2][0], he2 = endpoints[subset][2][1];
|
|
|
|
const uint32_t k0 = weight_interpolate(le0, he0, w0);
|
|
const uint32_t k1 = weight_interpolate(le1, he1, w1);
|
|
const uint32_t k2 = weight_interpolate(le2, he2, w2);
|
|
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 3] = 255;
|
|
} // x
|
|
} // y
|
|
}
|
|
else
|
|
{
|
|
// alpha
|
|
for (uint32_t y = start_y; y < end_y; y++)
|
|
{
|
|
for (uint32_t x = start_x; x < end_x; x++)
|
|
{
|
|
const uint32_t pixel_index = x + y * blk_width;
|
|
|
|
const uint32_t v = pPart[y * 12 + x];
|
|
const uint32_t subset = (v >> part_shift) & 3;
|
|
|
|
const uint32_t w0 = pUpsampled_weights[(0 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w1 = pUpsampled_weights[(1 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w2 = pUpsampled_weights[(2 == ccs) ? 1 : 0][pixel_index];
|
|
const uint32_t w3 = pUpsampled_weights[(3 == ccs) ? 1 : 0][pixel_index];
|
|
|
|
int le0 = endpoints[subset][0][0], he0 = endpoints[subset][0][1];
|
|
int le1 = endpoints[subset][1][0], he1 = endpoints[subset][1][1];
|
|
int le2 = endpoints[subset][2][0], he2 = endpoints[subset][2][1];
|
|
int le3 = endpoints[subset][3][0], he3 = endpoints[subset][3][1];
|
|
|
|
const uint32_t k0 = weight_interpolate(le0, he0, w0);
|
|
const uint32_t k1 = weight_interpolate(le1, he1, w1);
|
|
const uint32_t k2 = weight_interpolate(le2, he2, w2);
|
|
const uint32_t k3 = weight_interpolate(le3, he3, w3);
|
|
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 0] = (uint8_t)(k0 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 1] = (uint8_t)(k1 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 2] = (uint8_t)(k2 >> 8);
|
|
((uint8_t*)pPixels)[pixel_index * 4 + 3] = (uint8_t)(k3 >> 8);
|
|
|
|
} // x
|
|
} // y
|
|
|
|
}
|
|
|
|
} // if (log_blk.m_num_partitions == 1)
|
|
|
|
return true;
|
|
}
|
|
|
|
//------------------------------------------------
|
|
// Physical to logical block decoding
|
|
|
|
// unsigned 128-bit int, with some signed helpers
|
|
class uint128
|
|
{
|
|
uint64_t m_lo, m_hi;
|
|
|
|
public:
|
|
uint128() = default;
|
|
inline uint128(uint64_t lo) : m_lo(lo), m_hi(0) { }
|
|
inline uint128(uint64_t lo, uint64_t hi) : m_lo(lo), m_hi(hi) { }
|
|
inline uint128(const uint128& other) : m_lo(other.m_lo), m_hi(other.m_hi) { }
|
|
|
|
inline uint128& set_signed(int64_t lo) { m_lo = lo; m_hi = (lo < 0) ? UINT64_MAX : 0; return *this; }
|
|
inline uint128& set(uint64_t lo) { m_lo = lo; m_hi = 0; return *this; }
|
|
|
|
inline explicit operator uint8_t () const { return (uint8_t)m_lo; }
|
|
inline explicit operator uint16_t () const { return (uint16_t)m_lo; }
|
|
inline explicit operator uint32_t () const { return (uint32_t)m_lo; }
|
|
inline explicit operator uint64_t () const { return m_lo; }
|
|
|
|
inline uint128& operator= (const uint128& rhs) { m_lo = rhs.m_lo; m_hi = rhs.m_hi; return *this; }
|
|
inline uint128& operator= (const uint64_t val) { m_lo = val; m_hi = 0; return *this; }
|
|
|
|
inline uint64_t get_low() const { return m_lo; }
|
|
inline uint64_t& get_low() { return m_lo; }
|
|
|
|
inline uint64_t get_high() const { return m_hi; }
|
|
inline uint64_t& get_high() { return m_hi; }
|
|
|
|
inline bool operator== (const uint128& rhs) const { return (m_lo == rhs.m_lo) && (m_hi == rhs.m_hi); }
|
|
inline bool operator!= (const uint128& rhs) const { return (m_lo != rhs.m_lo) || (m_hi != rhs.m_hi); }
|
|
|
|
inline bool operator< (const uint128& rhs) const
|
|
{
|
|
if (m_hi < rhs.m_hi)
|
|
return true;
|
|
|
|
if (m_hi == rhs.m_hi)
|
|
{
|
|
if (m_lo < rhs.m_lo)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
inline bool operator> (const uint128& rhs) const { return (rhs < *this); }
|
|
|
|
inline bool operator<= (const uint128& rhs) const { return (*this == rhs) || (*this < rhs); }
|
|
inline bool operator>= (const uint128& rhs) const { return (*this == rhs) || (*this > rhs); }
|
|
|
|
inline bool is_zero() const { return (m_lo == 0) && (m_hi == 0); }
|
|
inline bool is_all_ones() const { return (m_lo == UINT64_MAX) && (m_hi == UINT64_MAX); }
|
|
inline bool is_non_zero() const { return (m_lo != 0) || (m_hi != 0); }
|
|
inline explicit operator bool() const { return is_non_zero(); }
|
|
inline bool is_signed() const { return ((int64_t)m_hi) < 0; }
|
|
|
|
inline bool signed_less(const uint128& rhs) const
|
|
{
|
|
const bool l_signed = is_signed(), r_signed = rhs.is_signed();
|
|
|
|
if (l_signed == r_signed)
|
|
return *this < rhs;
|
|
|
|
if (l_signed && !r_signed)
|
|
return true;
|
|
|
|
assert(!l_signed && r_signed);
|
|
return false;
|
|
}
|
|
|
|
inline bool signed_greater(const uint128& rhs) const { return rhs.signed_less(*this); }
|
|
inline bool signed_less_equal(const uint128& rhs) const { return !rhs.signed_less(*this); }
|
|
inline bool signed_greater_equal(const uint128& rhs) const { return !signed_less(rhs); }
|
|
|
|
double get_double() const
|
|
{
|
|
double res = 0;
|
|
|
|
if (m_hi)
|
|
res = (double)m_hi * pow(2.0f, 64.0f);
|
|
|
|
res += (double)m_lo;
|
|
|
|
return res;
|
|
}
|
|
|
|
double get_signed_double() const
|
|
{
|
|
if (is_signed())
|
|
return -(uint128(*this).abs().get_double());
|
|
else
|
|
return get_double();
|
|
}
|
|
|
|
inline uint128 abs() const
|
|
{
|
|
uint128 res(*this);
|
|
if (res.is_signed())
|
|
res = -res;
|
|
return res;
|
|
}
|
|
|
|
inline uint128& operator<<= (int shift)
|
|
{
|
|
assert(shift >= 0);
|
|
if (shift < 0)
|
|
return *this;
|
|
|
|
m_hi = (shift >= 64) ? ((shift >= 128) ? 0 : (m_lo << (shift - 64))) : (m_hi << shift);
|
|
|
|
if ((shift) && (shift < 64))
|
|
m_hi |= (m_lo >> (64 - shift));
|
|
|
|
m_lo = (shift >= 64) ? 0 : (m_lo << shift);
|
|
|
|
return *this;
|
|
}
|
|
|
|
inline uint128 operator<< (int shift) const { uint128 res(*this); res <<= shift; return res; }
|
|
|
|
inline uint128& operator>>= (int shift)
|
|
{
|
|
assert(shift >= 0);
|
|
if (shift < 0)
|
|
return *this;
|
|
|
|
m_lo = (shift >= 64) ? ((shift >= 128) ? 0 : (m_hi >> (shift - 64))) : (m_lo >> shift);
|
|
|
|
if ((shift) && (shift < 64))
|
|
m_lo |= (m_hi << (64 - shift));
|
|
|
|
m_hi = (shift >= 64) ? 0 : (m_hi >> shift);
|
|
|
|
return *this;
|
|
}
|
|
|
|
inline uint128 operator>> (int shift) const { uint128 res(*this); res >>= shift; return res; }
|
|
|
|
inline uint128 signed_shift_right(int shift) const
|
|
{
|
|
uint128 res(*this);
|
|
res >>= shift;
|
|
|
|
if (is_signed())
|
|
{
|
|
uint128 x(0U);
|
|
x = ~x;
|
|
x >>= shift;
|
|
res |= (~x);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
inline uint128& operator |= (const uint128& rhs) { m_lo |= rhs.m_lo; m_hi |= rhs.m_hi; return *this; }
|
|
inline uint128 operator | (const uint128& rhs) const { uint128 res(*this); res |= rhs; return res; }
|
|
|
|
inline uint128& operator &= (const uint128& rhs) { m_lo &= rhs.m_lo; m_hi &= rhs.m_hi; return *this; }
|
|
inline uint128 operator & (const uint128& rhs) const { uint128 res(*this); res &= rhs; return res; }
|
|
|
|
inline uint128& operator ^= (const uint128& rhs) { m_lo ^= rhs.m_lo; m_hi ^= rhs.m_hi; return *this; }
|
|
inline uint128 operator ^ (const uint128& rhs) const { uint128 res(*this); res ^= rhs; return res; }
|
|
|
|
inline uint128 operator ~() const { return uint128(~m_lo, ~m_hi); }
|
|
|
|
inline uint128 operator -() const { uint128 res(~*this); if (++res.m_lo == 0) ++res.m_hi; return res; }
|
|
|
|
// prefix
|
|
inline uint128 operator ++()
|
|
{
|
|
if (++m_lo == 0)
|
|
++m_hi;
|
|
return *this;
|
|
}
|
|
|
|
// postfix
|
|
inline uint128 operator ++(int)
|
|
{
|
|
uint128 res(*this);
|
|
if (++m_lo == 0)
|
|
++m_hi;
|
|
return res;
|
|
}
|
|
|
|
// prefix
|
|
inline uint128 operator --()
|
|
{
|
|
const uint64_t t = m_lo;
|
|
if (--m_lo > t)
|
|
--m_hi;
|
|
return *this;
|
|
}
|
|
|
|
// postfix
|
|
inline uint128 operator --(int)
|
|
{
|
|
const uint64_t t = m_lo;
|
|
uint128 res(*this);
|
|
if (--m_lo > t)
|
|
--m_hi;
|
|
return res;
|
|
}
|
|
|
|
inline uint128& operator+= (const uint128& rhs)
|
|
{
|
|
const uint64_t t = m_lo + rhs.m_lo;
|
|
m_hi = m_hi + rhs.m_hi + (t < m_lo);
|
|
m_lo = t;
|
|
return *this;
|
|
}
|
|
|
|
inline uint128 operator+ (const uint128& rhs) const { uint128 res(*this); res += rhs; return res; }
|
|
|
|
inline uint128& operator-= (const uint128& rhs)
|
|
{
|
|
const uint64_t t = m_lo - rhs.m_lo;
|
|
m_hi = m_hi - rhs.m_hi - (t > m_lo);
|
|
m_lo = t;
|
|
return *this;
|
|
}
|
|
|
|
inline uint128 operator- (const uint128& rhs) const { uint128 res(*this); res -= rhs; return res; }
|
|
|
|
// computes bit by bit, very slow
|
|
uint128& operator*=(const uint128& rhs)
|
|
{
|
|
uint128 temp(*this), result(0U);
|
|
|
|
for (uint128 bitmask(rhs); bitmask; bitmask >>= 1, temp <<= 1)
|
|
if (bitmask.get_low() & 1)
|
|
result += temp;
|
|
|
|
*this = result;
|
|
return *this;
|
|
}
|
|
|
|
uint128 operator*(const uint128& rhs) const { uint128 res(*this); res *= rhs; return res; }
|
|
|
|
// computes bit by bit, very slow
|
|
friend uint128 divide(const uint128& dividend, const uint128& divisor, uint128& remainder)
|
|
{
|
|
remainder = 0;
|
|
|
|
if (!divisor)
|
|
{
|
|
assert(0);
|
|
return ~uint128(0U);
|
|
}
|
|
|
|
uint128 quotient(0), one(1);
|
|
|
|
for (int i = 127; i >= 0; i--)
|
|
{
|
|
remainder = (remainder << 1) | ((dividend >> i) & one);
|
|
if (remainder >= divisor)
|
|
{
|
|
remainder -= divisor;
|
|
quotient |= (one << i);
|
|
}
|
|
}
|
|
|
|
return quotient;
|
|
}
|
|
|
|
uint128 operator/(const uint128& rhs) const { uint128 remainder, res; res = divide(*this, rhs, remainder); return res; }
|
|
uint128 operator/=(const uint128& rhs) { uint128 remainder; *this = divide(*this, rhs, remainder); return *this; }
|
|
|
|
uint128 operator%(const uint128& rhs) const { uint128 remainder; divide(*this, rhs, remainder); return remainder; }
|
|
uint128 operator%=(const uint128& rhs) { uint128 remainder; divide(*this, rhs, remainder); *this = remainder; return *this; }
|
|
|
|
void print_hex(FILE* pFile) const
|
|
{
|
|
fprintf(pFile, "0x%016llx%016llx", (unsigned long long int)m_hi, (unsigned long long int)m_lo);
|
|
}
|
|
|
|
void format_unsigned(std::string& res) const
|
|
{
|
|
basisu::vector<uint8_t> digits;
|
|
digits.reserve(39 + 1);
|
|
|
|
uint128 k(*this), ten(10);
|
|
do
|
|
{
|
|
uint128 r;
|
|
k = divide(k, ten, r);
|
|
digits.push_back((uint8_t)r);
|
|
} while (k);
|
|
|
|
for (int i = (int)digits.size() - 1; i >= 0; i--)
|
|
res += ('0' + digits[i]);
|
|
}
|
|
|
|
void format_signed(std::string& res) const
|
|
{
|
|
uint128 val(*this);
|
|
|
|
if (val.is_signed())
|
|
{
|
|
res.push_back('-');
|
|
val = -val;
|
|
}
|
|
|
|
val.format_unsigned(res);
|
|
}
|
|
|
|
void print_unsigned(FILE* pFile)
|
|
{
|
|
std::string str;
|
|
format_unsigned(str);
|
|
fprintf(pFile, "%s", str.c_str());
|
|
}
|
|
|
|
void print_signed(FILE* pFile)
|
|
{
|
|
std::string str;
|
|
format_signed(str);
|
|
fprintf(pFile, "%s", str.c_str());
|
|
}
|
|
|
|
uint128 get_reversed_bits() const
|
|
{
|
|
uint128 res;
|
|
|
|
const uint32_t* pSrc = (const uint32_t*)this;
|
|
uint32_t* pDst = (uint32_t*)&res;
|
|
|
|
pDst[0] = rev_dword(pSrc[3]);
|
|
pDst[1] = rev_dword(pSrc[2]);
|
|
pDst[2] = rev_dword(pSrc[1]);
|
|
pDst[3] = rev_dword(pSrc[0]);
|
|
|
|
return res;
|
|
}
|
|
|
|
uint128 get_byteswapped() const
|
|
{
|
|
uint128 res;
|
|
|
|
const uint8_t* pSrc = (const uint8_t*)this;
|
|
uint8_t* pDst = (uint8_t*)&res;
|
|
|
|
for (uint32_t i = 0; i < 16; i++)
|
|
pDst[i] = pSrc[15 - i];
|
|
|
|
return res;
|
|
}
|
|
|
|
inline uint64_t get_bits64(uint32_t bit_ofs, uint32_t bit_len) const
|
|
{
|
|
assert(bit_ofs < 128);
|
|
assert(bit_len && (bit_len <= 64) && ((bit_ofs + bit_len) <= 128));
|
|
|
|
uint128 res(*this);
|
|
res >>= bit_ofs;
|
|
|
|
const uint64_t bitmask = (bit_len == 64) ? UINT64_MAX : ((1ull << bit_len) - 1);
|
|
return res.get_low() & bitmask;
|
|
}
|
|
|
|
inline uint32_t get_bits(uint32_t bit_ofs, uint32_t bit_len) const
|
|
{
|
|
assert(bit_len <= 32);
|
|
return (uint32_t)get_bits64(bit_ofs, bit_len);
|
|
}
|
|
|
|
inline uint32_t next_bits(uint32_t& bit_ofs, uint32_t len) const
|
|
{
|
|
assert(len && (len <= 32));
|
|
uint32_t x = get_bits(bit_ofs, len);
|
|
bit_ofs += len;
|
|
return x;
|
|
}
|
|
|
|
inline uint128& set_bits(uint64_t val, uint32_t bit_ofs, uint32_t num_bits)
|
|
{
|
|
assert(bit_ofs < 128);
|
|
assert(num_bits && (num_bits <= 64) && ((bit_ofs + num_bits) <= 128));
|
|
|
|
uint128 bitmask(1);
|
|
bitmask = (bitmask << num_bits) - 1;
|
|
assert(uint128(val) <= bitmask);
|
|
|
|
bitmask <<= bit_ofs;
|
|
*this &= ~bitmask;
|
|
|
|
*this = *this | (uint128(val) << bit_ofs);
|
|
return *this;
|
|
}
|
|
};
|
|
|
|
static bool decode_void_extent(const uint128& bits, log_astc_block& log_blk)
|
|
{
|
|
if (bits.get_bits(10, 2) != 0b11)
|
|
return false;
|
|
|
|
uint32_t bit_ofs = 12;
|
|
const uint32_t min_s = bits.next_bits(bit_ofs, 13);
|
|
const uint32_t max_s = bits.next_bits(bit_ofs, 13);
|
|
const uint32_t min_t = bits.next_bits(bit_ofs, 13);
|
|
const uint32_t max_t = bits.next_bits(bit_ofs, 13);
|
|
assert(bit_ofs == 64);
|
|
|
|
const bool all_extents_all_ones = (min_s == 0x1FFF) && (max_s == 0x1FFF) && (min_t == 0x1FFF) && (max_t == 0x1FFF);
|
|
|
|
if (!all_extents_all_ones && ((min_s >= max_s) || (min_t >= max_t)))
|
|
return false;
|
|
|
|
const bool hdr_flag = bits.get_bits(9, 1) != 0;
|
|
|
|
if (hdr_flag)
|
|
log_blk.m_solid_color_flag_hdr = true;
|
|
else
|
|
log_blk.m_solid_color_flag_ldr = true;
|
|
|
|
log_blk.m_solid_color[0] = (uint16_t)bits.get_bits(64, 16);
|
|
log_blk.m_solid_color[1] = (uint16_t)bits.get_bits(80, 16);
|
|
log_blk.m_solid_color[2] = (uint16_t)bits.get_bits(96, 16);
|
|
log_blk.m_solid_color[3] = (uint16_t)bits.get_bits(112, 16);
|
|
|
|
if (log_blk.m_solid_color_flag_hdr)
|
|
{
|
|
for (uint32_t c = 0; c < 4; c++)
|
|
if (is_half_inf_or_nan(log_blk.m_solid_color[c]))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
struct astc_dec_row
|
|
{
|
|
int8_t Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
|
|
};
|
|
|
|
static const astc_dec_row s_dec_rows[10] =
|
|
{
|
|
// Dp_ofs, P_ofs, W_ofs, W_size, H_ofs, H_size, W_bias, H_bias, p0_ofs, p1_ofs, p2_ofs;
|
|
{ 10, 9, 7, 2, 5, 2, 4, 2, 4, 0, 1 }, // 4 2
|
|
{ 10, 9, 7, 2, 5, 2, 8, 2, 4, 0, 1 }, // 8 2
|
|
{ 10, 9, 5, 2, 7, 2, 2, 8, 4, 0, 1 }, // 2 8
|
|
{ 10, 9, 5, 2, 7, 1, 2, 6, 4, 0, 1 }, // 2 6
|
|
|
|
{ 10, 9, 7, 1, 5, 2, 2, 2, 4, 0, 1 }, // 2 2
|
|
{ 10, 9, 0, 0, 5, 2, 12, 2, 4, 2, 3 }, // 12 2
|
|
{ 10, 9, 5, 2, 0, 0, 2, 12, 4, 2, 3 }, // 2 12
|
|
{ 10, 9, 0, 0, 0, 0, 6, 10, 4, 2, 3 }, // 6 10
|
|
|
|
{ 10, 9, 0, 0, 0, 0, 10, 6, 4, 2, 3 }, // 10 6
|
|
{ -1, -1, 5, 2, 9, 2, 6, 6, 4, 2, 3 }, // 6 6
|
|
};
|
|
|
|
static bool decode_config(const uint128& bits, log_astc_block& log_blk)
|
|
{
|
|
// Reserved
|
|
if (bits.get_bits(0, 4) == 0)
|
|
return false;
|
|
|
|
// Reserved
|
|
if ((bits.get_bits(0, 2) == 0) && (bits.get_bits(6, 3) == 0b111))
|
|
{
|
|
if (bits.get_bits(2, 4) != 0b1111)
|
|
return false;
|
|
}
|
|
|
|
// Void extent
|
|
if (bits.get_bits(0, 9) == 0b111111100)
|
|
return decode_void_extent(bits, log_blk);
|
|
|
|
// Check rows
|
|
const uint32_t x0_2 = bits.get_bits(0, 2), x2_2 = bits.get_bits(2, 2);
|
|
const uint32_t x5_4 = bits.get_bits(5, 4), x8_1 = bits.get_bits(8, 1);
|
|
const uint32_t x7_2 = bits.get_bits(7, 2);
|
|
|
|
int row_index = -1;
|
|
if (x0_2 == 0)
|
|
{
|
|
if (x7_2 == 0b00)
|
|
row_index = 5;
|
|
else if (x7_2 == 0b01)
|
|
row_index = 6;
|
|
else if (x5_4 == 0b1100)
|
|
row_index = 7;
|
|
else if (x5_4 == 0b1101)
|
|
row_index = 8;
|
|
else if (x7_2 == 0b10)
|
|
row_index = 9;
|
|
}
|
|
else
|
|
{
|
|
if (x2_2 == 0b00)
|
|
row_index = 0;
|
|
else if (x2_2 == 0b01)
|
|
row_index = 1;
|
|
else if (x2_2 == 0b10)
|
|
row_index = 2;
|
|
else if ((x2_2 == 0b11) && (x8_1 == 0))
|
|
row_index = 3;
|
|
else if ((x2_2 == 0b11) && (x8_1 == 1))
|
|
row_index = 4;
|
|
}
|
|
if (row_index < 0)
|
|
return false;
|
|
|
|
const astc_dec_row& r = s_dec_rows[row_index];
|
|
|
|
bool P = false, Dp = false;
|
|
uint32_t W = r.W_bias, H = r.H_bias;
|
|
|
|
if (r.P_ofs >= 0)
|
|
P = bits.get_bits(r.P_ofs, 1) != 0;
|
|
|
|
if (r.Dp_ofs >= 0)
|
|
Dp = bits.get_bits(r.Dp_ofs, 1) != 0;
|
|
|
|
if (r.W_size)
|
|
W += bits.get_bits(r.W_ofs, r.W_size);
|
|
|
|
if (r.H_size)
|
|
H += bits.get_bits(r.H_ofs, r.H_size);
|
|
|
|
assert((W >= MIN_GRID_DIM) && (W <= MAX_BLOCK_DIM));
|
|
assert((H >= MIN_GRID_DIM) && (H <= MAX_BLOCK_DIM));
|
|
|
|
int p0 = bits.get_bits(r.p0_ofs, 1);
|
|
int p1 = bits.get_bits(r.p1_ofs, 1);
|
|
int p2 = bits.get_bits(r.p2_ofs, 1);
|
|
|
|
uint32_t p = p0 | (p1 << 1) | (p2 << 2);
|
|
if (p < 2)
|
|
return false;
|
|
|
|
log_blk.m_grid_width = (uint8_t)W;
|
|
log_blk.m_grid_height = (uint8_t)H;
|
|
|
|
log_blk.m_weight_ise_range = (uint8_t)((p - 2) + (P * BISE_10_LEVELS));
|
|
assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);
|
|
|
|
log_blk.m_dual_plane = Dp;
|
|
|
|
return true;
|
|
}
|
|
|
|
static inline uint32_t read_le_dword(const uint8_t* pBytes)
|
|
{
|
|
return (pBytes[0]) | (pBytes[1] << 8U) | (pBytes[2] << 16U) | (pBytes[3] << 24U);
|
|
}
|
|
|
|
// See 18.12.Integer Sequence Encoding - tables computed by executing the decoder functions with all possible 8/7-bit inputs.
|
|
static const uint8_t s_trit_decode[256][5] =
|
|
{
|
|
{0,0,0,0,0},{1,0,0,0,0},{2,0,0,0,0},{0,0,2,0,0},{0,1,0,0,0},{1,1,0,0,0},{2,1,0,0,0},{1,0,2,0,0},
|
|
{0,2,0,0,0},{1,2,0,0,0},{2,2,0,0,0},{2,0,2,0,0},{0,2,2,0,0},{1,2,2,0,0},{2,2,2,0,0},{2,0,2,0,0},
|
|
{0,0,1,0,0},{1,0,1,0,0},{2,0,1,0,0},{0,1,2,0,0},{0,1,1,0,0},{1,1,1,0,0},{2,1,1,0,0},{1,1,2,0,0},
|
|
{0,2,1,0,0},{1,2,1,0,0},{2,2,1,0,0},{2,1,2,0,0},{0,0,0,2,2},{1,0,0,2,2},{2,0,0,2,2},{0,0,2,2,2},
|
|
{0,0,0,1,0},{1,0,0,1,0},{2,0,0,1,0},{0,0,2,1,0},{0,1,0,1,0},{1,1,0,1,0},{2,1,0,1,0},{1,0,2,1,0},
|
|
{0,2,0,1,0},{1,2,0,1,0},{2,2,0,1,0},{2,0,2,1,0},{0,2,2,1,0},{1,2,2,1,0},{2,2,2,1,0},{2,0,2,1,0},
|
|
{0,0,1,1,0},{1,0,1,1,0},{2,0,1,1,0},{0,1,2,1,0},{0,1,1,1,0},{1,1,1,1,0},{2,1,1,1,0},{1,1,2,1,0},
|
|
{0,2,1,1,0},{1,2,1,1,0},{2,2,1,1,0},{2,1,2,1,0},{0,1,0,2,2},{1,1,0,2,2},{2,1,0,2,2},{1,0,2,2,2},
|
|
{0,0,0,2,0},{1,0,0,2,0},{2,0,0,2,0},{0,0,2,2,0},{0,1,0,2,0},{1,1,0,2,0},{2,1,0,2,0},{1,0,2,2,0},
|
|
{0,2,0,2,0},{1,2,0,2,0},{2,2,0,2,0},{2,0,2,2,0},{0,2,2,2,0},{1,2,2,2,0},{2,2,2,2,0},{2,0,2,2,0},
|
|
{0,0,1,2,0},{1,0,1,2,0},{2,0,1,2,0},{0,1,2,2,0},{0,1,1,2,0},{1,1,1,2,0},{2,1,1,2,0},{1,1,2,2,0},
|
|
{0,2,1,2,0},{1,2,1,2,0},{2,2,1,2,0},{2,1,2,2,0},{0,2,0,2,2},{1,2,0,2,2},{2,2,0,2,2},{2,0,2,2,2},
|
|
{0,0,0,0,2},{1,0,0,0,2},{2,0,0,0,2},{0,0,2,0,2},{0,1,0,0,2},{1,1,0,0,2},{2,1,0,0,2},{1,0,2,0,2},
|
|
{0,2,0,0,2},{1,2,0,0,2},{2,2,0,0,2},{2,0,2,0,2},{0,2,2,0,2},{1,2,2,0,2},{2,2,2,0,2},{2,0,2,0,2},
|
|
{0,0,1,0,2},{1,0,1,0,2},{2,0,1,0,2},{0,1,2,0,2},{0,1,1,0,2},{1,1,1,0,2},{2,1,1,0,2},{1,1,2,0,2},
|
|
{0,2,1,0,2},{1,2,1,0,2},{2,2,1,0,2},{2,1,2,0,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,0,2,2,2},
|
|
{0,0,0,0,1},{1,0,0,0,1},{2,0,0,0,1},{0,0,2,0,1},{0,1,0,0,1},{1,1,0,0,1},{2,1,0,0,1},{1,0,2,0,1},
|
|
{0,2,0,0,1},{1,2,0,0,1},{2,2,0,0,1},{2,0,2,0,1},{0,2,2,0,1},{1,2,2,0,1},{2,2,2,0,1},{2,0,2,0,1},
|
|
{0,0,1,0,1},{1,0,1,0,1},{2,0,1,0,1},{0,1,2,0,1},{0,1,1,0,1},{1,1,1,0,1},{2,1,1,0,1},{1,1,2,0,1},
|
|
{0,2,1,0,1},{1,2,1,0,1},{2,2,1,0,1},{2,1,2,0,1},{0,0,1,2,2},{1,0,1,2,2},{2,0,1,2,2},{0,1,2,2,2},
|
|
{0,0,0,1,1},{1,0,0,1,1},{2,0,0,1,1},{0,0,2,1,1},{0,1,0,1,1},{1,1,0,1,1},{2,1,0,1,1},{1,0,2,1,1},
|
|
{0,2,0,1,1},{1,2,0,1,1},{2,2,0,1,1},{2,0,2,1,1},{0,2,2,1,1},{1,2,2,1,1},{2,2,2,1,1},{2,0,2,1,1},
|
|
{0,0,1,1,1},{1,0,1,1,1},{2,0,1,1,1},{0,1,2,1,1},{0,1,1,1,1},{1,1,1,1,1},{2,1,1,1,1},{1,1,2,1,1},
|
|
{0,2,1,1,1},{1,2,1,1,1},{2,2,1,1,1},{2,1,2,1,1},{0,1,1,2,2},{1,1,1,2,2},{2,1,1,2,2},{1,1,2,2,2},
|
|
{0,0,0,2,1},{1,0,0,2,1},{2,0,0,2,1},{0,0,2,2,1},{0,1,0,2,1},{1,1,0,2,1},{2,1,0,2,1},{1,0,2,2,1},
|
|
{0,2,0,2,1},{1,2,0,2,1},{2,2,0,2,1},{2,0,2,2,1},{0,2,2,2,1},{1,2,2,2,1},{2,2,2,2,1},{2,0,2,2,1},
|
|
{0,0,1,2,1},{1,0,1,2,1},{2,0,1,2,1},{0,1,2,2,1},{0,1,1,2,1},{1,1,1,2,1},{2,1,1,2,1},{1,1,2,2,1},
|
|
{0,2,1,2,1},{1,2,1,2,1},{2,2,1,2,1},{2,1,2,2,1},{0,2,1,2,2},{1,2,1,2,2},{2,2,1,2,2},{2,1,2,2,2},
|
|
{0,0,0,1,2},{1,0,0,1,2},{2,0,0,1,2},{0,0,2,1,2},{0,1,0,1,2},{1,1,0,1,2},{2,1,0,1,2},{1,0,2,1,2},
|
|
{0,2,0,1,2},{1,2,0,1,2},{2,2,0,1,2},{2,0,2,1,2},{0,2,2,1,2},{1,2,2,1,2},{2,2,2,1,2},{2,0,2,1,2},
|
|
{0,0,1,1,2},{1,0,1,1,2},{2,0,1,1,2},{0,1,2,1,2},{0,1,1,1,2},{1,1,1,1,2},{2,1,1,1,2},{1,1,2,1,2},
|
|
{0,2,1,1,2},{1,2,1,1,2},{2,2,1,1,2},{2,1,2,1,2},{0,2,2,2,2},{1,2,2,2,2},{2,2,2,2,2},{2,1,2,2,2}
|
|
};
|
|
|
|
static const uint8_t s_quint_decode[128][3] =
|
|
{
|
|
{0,0,0},{1,0,0},{2,0,0},{3,0,0},{4,0,0},{0,4,0},{4,4,0},{4,4,4},
|
|
{0,1,0},{1,1,0},{2,1,0},{3,1,0},{4,1,0},{1,4,0},{4,4,1},{4,4,4},
|
|
{0,2,0},{1,2,0},{2,2,0},{3,2,0},{4,2,0},{2,4,0},{4,4,2},{4,4,4},
|
|
{0,3,0},{1,3,0},{2,3,0},{3,3,0},{4,3,0},{3,4,0},{4,4,3},{4,4,4},
|
|
{0,0,1},{1,0,1},{2,0,1},{3,0,1},{4,0,1},{0,4,1},{4,0,4},{0,4,4},
|
|
{0,1,1},{1,1,1},{2,1,1},{3,1,1},{4,1,1},{1,4,1},{4,1,4},{1,4,4},
|
|
{0,2,1},{1,2,1},{2,2,1},{3,2,1},{4,2,1},{2,4,1},{4,2,4},{2,4,4},
|
|
{0,3,1},{1,3,1},{2,3,1},{3,3,1},{4,3,1},{3,4,1},{4,3,4},{3,4,4},
|
|
{0,0,2},{1,0,2},{2,0,2},{3,0,2},{4,0,2},{0,4,2},{2,0,4},{3,0,4},
|
|
{0,1,2},{1,1,2},{2,1,2},{3,1,2},{4,1,2},{1,4,2},{2,1,4},{3,1,4},
|
|
{0,2,2},{1,2,2},{2,2,2},{3,2,2},{4,2,2},{2,4,2},{2,2,4},{3,2,4},
|
|
{0,3,2},{1,3,2},{2,3,2},{3,3,2},{4,3,2},{3,4,2},{2,3,4},{3,3,4},
|
|
{0,0,3},{1,0,3},{2,0,3},{3,0,3},{4,0,3},{0,4,3},{0,0,4},{1,0,4},
|
|
{0,1,3},{1,1,3},{2,1,3},{3,1,3},{4,1,3},{1,4,3},{0,1,4},{1,1,4},
|
|
{0,2,3},{1,2,3},{2,2,3},{3,2,3},{4,2,3},{2,4,3},{0,2,4},{1,2,4},
|
|
{0,3,3},{1,3,3},{2,3,3},{3,3,3},{4,3,3},{3,4,3},{0,3,4},{1,3,4}
|
|
};
|
|
|
|
static void decode_trit_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
|
|
{
|
|
assert((num_vals >= 1) && (num_vals <= 5));
|
|
uint32_t m[5] = { 0 }, T = 0;
|
|
|
|
static const uint8_t s_t_bits[5] = { 2, 2, 1, 2, 1 };
|
|
|
|
for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
|
|
{
|
|
if (bits_per_val)
|
|
m[c] = bits.next_bits(bit_ofs, bits_per_val);
|
|
T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
|
|
T_ofs += s_t_bits[c];
|
|
}
|
|
|
|
const uint8_t (&p_trits)[5] = s_trit_decode[T];
|
|
|
|
for (uint32_t i = 0; i < num_vals; i++)
|
|
pVals[i] = (uint8_t)((p_trits[i] << bits_per_val) | m[i]);
|
|
}
|
|
|
|
static void decode_quint_block(uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t& bit_ofs, uint32_t bits_per_val)
|
|
{
|
|
assert((num_vals >= 1) && (num_vals <= 3));
|
|
uint32_t m[3] = { 0 }, T = 0;
|
|
|
|
static const uint8_t s_t_bits[3] = { 3, 2, 2 };
|
|
|
|
for (uint32_t T_ofs = 0, c = 0; c < num_vals; c++)
|
|
{
|
|
if (bits_per_val)
|
|
m[c] = bits.next_bits(bit_ofs, bits_per_val);
|
|
T |= (bits.next_bits(bit_ofs, s_t_bits[c]) << T_ofs);
|
|
T_ofs += s_t_bits[c];
|
|
}
|
|
|
|
const uint8_t (&p_quints)[3] = s_quint_decode[T];
|
|
|
|
for (uint32_t i = 0; i < num_vals; i++)
|
|
pVals[i] = (uint8_t)((p_quints[i] << bits_per_val) | m[i]);
|
|
}
|
|
|
|
static void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint128& bits, uint32_t bit_ofs)
|
|
{
|
|
assert(num_vals && (ise_range < TOTAL_ISE_RANGES));
|
|
|
|
const uint32_t bits_per_val = g_ise_range_table[ise_range][0];
|
|
|
|
if (g_ise_range_table[ise_range][1])
|
|
{
|
|
// Trits+bits, 5 vals per block, 7 bits extra per block
|
|
const uint32_t total_blocks = (num_vals + 4) / 5;
|
|
for (uint32_t b = 0; b < total_blocks; b++)
|
|
{
|
|
const uint32_t num_vals_in_block = std::min<int>(num_vals - 5 * b, 5);
|
|
decode_trit_block(pVals + 5 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
|
|
}
|
|
}
|
|
else if (g_ise_range_table[ise_range][2])
|
|
{
|
|
// Quints+bits, 3 vals per block, 8 bits extra per block
|
|
const uint32_t total_blocks = (num_vals + 2) / 3;
|
|
for (uint32_t b = 0; b < total_blocks; b++)
|
|
{
|
|
const uint32_t num_vals_in_block = std::min<int>(num_vals - 3 * b, 3);
|
|
decode_quint_block(pVals + 3 * b, num_vals_in_block, bits, bit_ofs, bits_per_val);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
assert(bits_per_val);
|
|
|
|
// Only bits
|
|
for (uint32_t i = 0; i < num_vals; i++)
|
|
pVals[i] = (uint8_t)bits.next_bits(bit_ofs, bits_per_val);
|
|
}
|
|
}
|
|
|
|
void decode_bise(uint32_t ise_range, uint8_t* pVals, uint32_t num_vals, const uint8_t* pBits128, uint32_t bit_ofs)
|
|
{
|
|
const uint128 bits(
|
|
(uint64_t)read_le_dword(pBits128) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t))) << 32),
|
|
(uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pBits128 + sizeof(uint32_t) * 3)) << 32));
|
|
|
|
return decode_bise(ise_range, pVals, num_vals, bits, bit_ofs);
|
|
}
|
|
|
|
// Decodes a physical ASTC block to a logical ASTC block.
|
|
// blk_width/blk_height are only used to validate the weight grid's dimensions.
|
|
bool unpack_block(const void* pASTC_block, log_astc_block& log_blk, uint32_t blk_width, uint32_t blk_height)
|
|
{
|
|
assert(is_valid_block_size(blk_width, blk_height));
|
|
|
|
const uint8_t* pS = (uint8_t*)pASTC_block;
|
|
|
|
log_blk.clear();
|
|
log_blk.m_error_flag = true;
|
|
|
|
const uint128 bits(
|
|
(uint64_t)read_le_dword(pS) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t))) << 32),
|
|
(uint64_t)read_le_dword(pS + sizeof(uint32_t) * 2) | (((uint64_t)read_le_dword(pS + sizeof(uint32_t) * 3)) << 32));
|
|
|
|
const uint128 rev_bits(bits.get_reversed_bits());
|
|
|
|
if (!decode_config(bits, log_blk))
|
|
return false;
|
|
|
|
if (log_blk.m_solid_color_flag_hdr || log_blk.m_solid_color_flag_ldr)
|
|
{
|
|
// Void extent
|
|
log_blk.m_error_flag = false;
|
|
return true;
|
|
}
|
|
|
|
// Check grid dimensions
|
|
if ((log_blk.m_grid_width > blk_width) || (log_blk.m_grid_height > blk_height))
|
|
return false;
|
|
|
|
// Now we have the grid width/height, dual plane, weight ISE range
|
|
|
|
const uint32_t total_grid_weights = (log_blk.m_dual_plane ? 2 : 1) * (log_blk.m_grid_width * log_blk.m_grid_height);
|
|
const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_blk.m_weight_ise_range);
|
|
|
|
// 18.24 Illegal Encodings
|
|
if ((!total_grid_weights) || (total_grid_weights > MAX_GRID_WEIGHTS) || (total_weight_bits < 24) || (total_weight_bits > 96))
|
|
return false;
|
|
|
|
const uint32_t end_of_weight_bit_ofs = 128 - total_weight_bits;
|
|
|
|
uint32_t total_extra_bits = 0;
|
|
|
|
// Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane.
|
|
|
|
log_blk.m_num_partitions = (uint8_t)(bits.get_bits(11, 2) + 1);
|
|
if (log_blk.m_num_partitions == 1)
|
|
log_blk.m_color_endpoint_modes[0] = (uint8_t)(bits.get_bits(13, 4)); // read CEM bits
|
|
else
|
|
{
|
|
// 2 or more partitions
|
|
if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4))
|
|
return false;
|
|
|
|
log_blk.m_partition_id = (uint16_t)bits.get_bits(13, 10);
|
|
|
|
uint32_t cem_bits = bits.get_bits(23, 6);
|
|
|
|
if ((cem_bits & 3) == 0)
|
|
{
|
|
// All CEM's the same
|
|
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
|
|
log_blk.m_color_endpoint_modes[i] = (uint8_t)(cem_bits >> 2);
|
|
}
|
|
else
|
|
{
|
|
// CEM's different, but within up to 2 adjacent classes
|
|
const uint32_t first_cem_index = ((cem_bits & 3) - 1) * 4;
|
|
|
|
total_extra_bits = 3 * log_blk.m_num_partitions - 4;
|
|
|
|
if ((total_weight_bits + total_extra_bits) > 128)
|
|
return false;
|
|
|
|
uint32_t cem_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
|
|
|
|
uint32_t c[4] = { 0 }, m[4] = { 0 };
|
|
|
|
cem_bits >>= 2;
|
|
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++, cem_bits >>= 1)
|
|
c[i] = cem_bits & 1;
|
|
|
|
switch (log_blk.m_num_partitions)
|
|
{
|
|
case 2:
|
|
{
|
|
m[0] = cem_bits & 3;
|
|
m[1] = bits.next_bits(cem_bit_pos, 2);
|
|
break;
|
|
}
|
|
case 3:
|
|
{
|
|
m[0] = cem_bits & 1;
|
|
m[0] |= (bits.next_bits(cem_bit_pos, 1) << 1);
|
|
m[1] = bits.next_bits(cem_bit_pos, 2);
|
|
m[2] = bits.next_bits(cem_bit_pos, 2);
|
|
break;
|
|
}
|
|
case 4:
|
|
{
|
|
for (uint32_t i = 0; i < 4; i++)
|
|
m[i] = bits.next_bits(cem_bit_pos, 2);
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
assert(cem_bit_pos == end_of_weight_bit_ofs);
|
|
|
|
for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
|
|
{
|
|
log_blk.m_color_endpoint_modes[i] = (uint8_t)(first_cem_index + (c[i] * 4) + m[i]);
|
|
assert(log_blk.m_color_endpoint_modes[i] <= 15);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Now we have all the CEM indices.
|
|
|
|
if (log_blk.m_dual_plane)
|
|
{
|
|
// Read CCS bits, beneath any CEM bits
|
|
total_extra_bits += 2;
|
|
|
|
if (total_extra_bits > end_of_weight_bit_ofs)
|
|
return false;
|
|
|
|
uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
|
|
log_blk.m_color_component_selector = (uint8_t)(bits.get_bits(ccs_bit_pos, 2));
|
|
}
|
|
|
|
uint32_t config_bit_pos = 11 + 2; // config+num_parts
|
|
if (log_blk.m_num_partitions == 1)
|
|
config_bit_pos += 4; // CEM bits
|
|
else
|
|
config_bit_pos += 10 + 6; // part_id+CEM bits
|
|
|
|
// config+num_parts+total_extra_bits (CEM extra+CCS)
|
|
uint32_t total_config_bits = config_bit_pos + total_extra_bits;
|
|
|
|
// Compute number of remaining bits in block
|
|
const int num_remaining_bits = 128 - (int)total_config_bits - (int)total_weight_bits;
|
|
if (num_remaining_bits < 0)
|
|
return false;
|
|
|
|
// Compute total number of ISE encoded color endpoint mode values
|
|
uint32_t total_cem_vals = 0;
|
|
for (uint32_t j = 0; j < log_blk.m_num_partitions; j++)
|
|
total_cem_vals += get_num_cem_values(log_blk.m_color_endpoint_modes[j]);
|
|
|
|
if (total_cem_vals > MAX_ENDPOINTS)
|
|
return false;
|
|
|
|
// Infer endpoint ISE range based off the # of values we need to encode, and the # of remaining bits in the block
|
|
// TODO: Optimize
|
|
int endpoint_ise_range = -1;
|
|
for (int k = 20; k > 0; k--)
|
|
{
|
|
int b = get_ise_sequence_bits(total_cem_vals, k);
|
|
if (b <= num_remaining_bits)
|
|
{
|
|
endpoint_ise_range = k;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// See 23.24 Illegal Encodings, [0,5] is the minimum ISE encoding for endpoints
|
|
if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
|
|
return false;
|
|
|
|
log_blk.m_endpoint_ise_range = (uint8_t)endpoint_ise_range;
|
|
|
|
// Decode endpoints forwards in block
|
|
decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos);
|
|
|
|
// Decode grid weights backwards in block
|
|
decode_bise(log_blk.m_weight_ise_range, log_blk.m_weights, total_grid_weights, rev_bits, 0);
|
|
|
|
log_blk.m_error_flag = false;
|
|
|
|
return true;
|
|
}
|
|
|
|
// Misc. helpers
|
|
|
|
uint8_t get_weight(const log_astc_block& log_block, uint32_t plane_index, uint32_t i)
|
|
{
|
|
const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1;
|
|
assert(plane_index < num_planes);
|
|
assert(i < (uint32_t)(log_block.m_grid_width * log_block.m_grid_height));
|
|
|
|
const uint32_t idx = i * num_planes + plane_index;
|
|
assert(idx < MAX_GRID_WEIGHTS);
|
|
|
|
return log_block.m_weights[idx];
|
|
}
|
|
|
|
uint8_t &get_weight(log_astc_block& log_block, uint32_t plane_index, uint32_t i)
|
|
{
|
|
const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1;
|
|
assert(plane_index < num_planes);
|
|
assert(i < (uint32_t)(log_block.m_grid_width * log_block.m_grid_height));
|
|
|
|
const uint32_t idx = i * num_planes + plane_index;
|
|
assert(idx < MAX_GRID_WEIGHTS);
|
|
|
|
return log_block.m_weights[idx];
|
|
}
|
|
|
|
void extract_weights(const log_astc_block& log_block, uint8_t* pWeights, uint32_t plane_index)
|
|
{
|
|
const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1;
|
|
assert(plane_index < num_planes);
|
|
|
|
const uint32_t num_weights = log_block.m_grid_width * log_block.m_grid_height;
|
|
for (uint32_t i = 0; i < num_weights; i++)
|
|
pWeights[i] = log_block.m_weights[i * num_planes + plane_index];
|
|
}
|
|
|
|
void set_weights(log_astc_block& log_block, const uint8_t* pWeights, uint32_t plane_index)
|
|
{
|
|
const uint32_t num_planes = log_block.m_dual_plane ? 2 : 1;
|
|
assert(plane_index < num_planes);
|
|
|
|
const uint32_t num_weights = log_block.m_grid_width * log_block.m_grid_height;
|
|
for (uint32_t i = 0; i < num_weights; i++)
|
|
log_block.m_weights[i * num_planes + plane_index] = pWeights[i];
|
|
}
|
|
|
|
uint32_t get_total_weights(const log_astc_block& log_block)
|
|
{
|
|
return (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);
|
|
}
|
|
|
|
// Returns a pointer to the beginning of a partition's/subset's endpoint values.
|
|
uint8_t *get_endpoints(log_astc_block& log_block, uint32_t partition_index)
|
|
{
|
|
assert(partition_index < log_block.m_num_partitions);
|
|
|
|
uint32_t ofs = 0;
|
|
|
|
for (uint32_t i = 0; i != partition_index; ++i)
|
|
ofs += get_num_cem_values(log_block.m_color_endpoint_modes[i]);
|
|
|
|
assert(ofs < MAX_ENDPOINTS);
|
|
|
|
return log_block.m_endpoints + ofs;
|
|
}
|
|
|
|
const uint8_t* get_endpoints(const log_astc_block& log_block, uint32_t partition_index)
|
|
{
|
|
assert(partition_index < log_block.m_num_partitions);
|
|
|
|
uint32_t ofs = 0;
|
|
|
|
for (uint32_t i = 0; i != partition_index; ++i)
|
|
ofs += get_num_cem_values(log_block.m_color_endpoint_modes[i]);
|
|
|
|
assert(ofs < MAX_ENDPOINTS);
|
|
|
|
return log_block.m_endpoints + ofs;
|
|
}
|
|
|
|
const char* get_cem_name(uint32_t cem_index)
|
|
{
|
|
static const char *s_cem_names[16] =
|
|
{
|
|
"CEM_LDR_LUM_DIRECT (0)",
|
|
"CEM_LDR_LUM_BASE_PLUS_OFS (1)",
|
|
"CEM_HDR_LUM_LARGE_RANGE (2)",
|
|
"CEM_HDR_LUM_SMALL_RANGE (3)",
|
|
"CEM_LDR_LUM_ALPHA_DIRECT (4)",
|
|
"CEM_LDR_LUM_ALPHA_BASE_PLUS_OFS (5)",
|
|
"CEM_LDR_RGB_BASE_SCALE (6)",
|
|
"CEM_HDR_RGB_BASE_SCALE (7)",
|
|
"CEM_LDR_RGB_DIRECT (8)",
|
|
"CEM_LDR_RGB_BASE_PLUS_OFFSET (9)",
|
|
"CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A (10)",
|
|
"CEM_HDR_RGB (11)",
|
|
"CEM_LDR_RGBA_DIRECT (12)",
|
|
"CEM_LDR_RGBA_BASE_PLUS_OFFSET (13)",
|
|
"CEM_HDR_RGB_LDR_ALPHA (14)",
|
|
"CEM_HDR_RGB_HDR_ALPHA (15)"
|
|
};
|
|
|
|
assert(cem_index < std::size(s_cem_names));
|
|
const char *p = s_cem_names[cem_index];
|
|
assert(p);
|
|
return p;
|
|
}
|
|
|
|
bool cem_is_ldr_direct(uint32_t cem_index)
|
|
{
|
|
return (cem_index == CEM_LDR_RGB_DIRECT) || (cem_index == CEM_LDR_RGBA_DIRECT);
|
|
}
|
|
|
|
bool cem_is_ldr_base_scale(uint32_t cem_index)
|
|
{
|
|
return (cem_index == CEM_LDR_RGB_BASE_SCALE) || (cem_index == CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A);
|
|
}
|
|
|
|
bool cem_is_ldr_base_plus_ofs(uint32_t cem_index)
|
|
{
|
|
return (cem_index == CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == CEM_LDR_RGBA_BASE_PLUS_OFFSET);
|
|
}
|
|
|
|
bool cem_supports_bc(uint32_t cem)
|
|
{
|
|
switch (cem)
|
|
{
|
|
case CEM_LDR_RGB_DIRECT:
|
|
case CEM_LDR_RGBA_DIRECT:
|
|
case CEM_LDR_RGB_BASE_PLUS_OFFSET:
|
|
case CEM_LDR_RGBA_BASE_PLUS_OFFSET:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// input:
|
|
// a=[0,255]
|
|
// b=[0,255]
|
|
// output:
|
|
// a=from, converted to -32 to 31
|
|
// b=to, shifted right by 1 and 1 bit added to MSB, so [0,255]
|
|
void bit_transfer_signed_dec(int& a, int& b)
|
|
{
|
|
assert((a >= 0) && (a <= 255));
|
|
assert((b >= 0) && (b <= 255));
|
|
|
|
b >>= 1;
|
|
b |= (a & 0x80);
|
|
|
|
a >>= 1;
|
|
a &= 0x3F;
|
|
if ((a & 0x20) != 0)
|
|
a -= 0x40;
|
|
}
|
|
|
|
// transfers a bit from b to a, prepares a for encoding
|
|
// input:
|
|
// a=[-32,31] (6-bits, 2's complement)
|
|
// b=[0,255] (8-bits)
|
|
// output:
|
|
// a=[0,255] (preserve top 2 bits)
|
|
// b=[0,255]
|
|
void bit_transfer_signed_enc(int& a, int& b)
|
|
{
|
|
assert((a >= -32) && (a <= 31));
|
|
assert((b >= 0) && (b <= 255));
|
|
|
|
// extract MSB of b
|
|
bool bit_to_transfer = (b & 0x80) != 0;
|
|
b = (b << 1) & 0xFF; // 7 bits to 8
|
|
|
|
a &= 0x3F; // 6 bits
|
|
a <<= 1; // 6 to 7 bits
|
|
if (bit_to_transfer)
|
|
a |= 0x80; // set MSB
|
|
}
|
|
|
|
// RGB or RGBA direct
|
|
bool cem8_or_12_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index)
|
|
{
|
|
assert((cem_index == CEM_LDR_RGB_DIRECT) || (cem_index == CEM_LDR_RGBA_DIRECT));
|
|
(void)(cem_index);
|
|
|
|
const auto& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(endpoint_ise_index).m_ISE_to_val;
|
|
|
|
uint8_t dequantized_endpoints[6];
|
|
for (uint32_t i = 0; i < 6; i++)
|
|
dequantized_endpoints[i] = endpoint_dequant_tab[pEndpoint_vals[i]];
|
|
|
|
uint32_t s0 = dequantized_endpoints[0] + dequantized_endpoints[2] + dequantized_endpoints[4];
|
|
uint32_t s1 = dequantized_endpoints[1] + dequantized_endpoints[3] + dequantized_endpoints[5];
|
|
|
|
return s1 < s0;
|
|
}
|
|
|
|
// RGB or RGBA base plus offset
|
|
bool cem9_or_13_used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index)
|
|
{
|
|
assert((cem_index == CEM_LDR_RGB_BASE_PLUS_OFFSET) || (cem_index == CEM_LDR_RGBA_BASE_PLUS_OFFSET));
|
|
(void)(cem_index);
|
|
|
|
const auto& endpoint_dequant_tab = g_dequant_tables.get_endpoint_tab(endpoint_ise_index).m_ISE_to_val;
|
|
|
|
int dequantized_endpoints[6];
|
|
for (uint32_t i = 0; i < 6; i++)
|
|
dequantized_endpoints[i] = endpoint_dequant_tab[pEndpoint_vals[i]];
|
|
|
|
bit_transfer_signed_dec(dequantized_endpoints[1], dequantized_endpoints[0]);
|
|
bit_transfer_signed_dec(dequantized_endpoints[3], dequantized_endpoints[2]);
|
|
bit_transfer_signed_dec(dequantized_endpoints[5], dequantized_endpoints[4]);
|
|
|
|
int s = dequantized_endpoints[1] + dequantized_endpoints[3] + dequantized_endpoints[5];
|
|
|
|
return s < 0;
|
|
}
|
|
|
|
bool used_blue_contraction(uint32_t cem_index, const uint8_t* pEndpoint_vals, uint32_t endpoint_ise_index)
|
|
{
|
|
assert(is_cem_ldr(cem_index));
|
|
|
|
bool used_blue_contraction_flag = false;
|
|
|
|
if ((cem_index == 8) || (cem_index == 12))
|
|
used_blue_contraction_flag = cem8_or_12_used_blue_contraction(cem_index, pEndpoint_vals, endpoint_ise_index);
|
|
else if ((cem_index == 9) || (cem_index == 13))
|
|
used_blue_contraction_flag = cem9_or_13_used_blue_contraction(cem_index, pEndpoint_vals, endpoint_ise_index);
|
|
|
|
return used_blue_contraction_flag;
|
|
}
|
|
|
|
uint32_t get_base_cem_without_alpha(uint32_t cem)
|
|
{
|
|
assert(is_cem_ldr(cem));
|
|
|
|
switch (cem)
|
|
{
|
|
case CEM_LDR_LUM_ALPHA_DIRECT: return CEM_LDR_LUM_DIRECT;
|
|
case CEM_LDR_RGBA_DIRECT: return CEM_LDR_RGB_DIRECT;
|
|
case CEM_LDR_RGB_BASE_SCALE_PLUS_TWO_A: return CEM_LDR_RGB_BASE_SCALE;
|
|
case CEM_LDR_RGBA_BASE_PLUS_OFFSET: return CEM_LDR_RGB_BASE_PLUS_OFFSET;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return cem;
|
|
}
|
|
|
|
int apply_delta_to_bise_endpoint_val(uint32_t endpoint_ise_range, int ise_val, int delta)
|
|
{
|
|
if (delta == 0)
|
|
return ise_val;
|
|
|
|
uint32_t num_ise_levels = astc_helpers::get_ise_levels(endpoint_ise_range);
|
|
|
|
const auto& ISE_to_rank = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_ISE_to_rank;
|
|
const auto& rank_to_ISE = astc_helpers::g_dequant_tables.get_endpoint_tab(endpoint_ise_range).m_rank_to_ISE;
|
|
|
|
int cur_rank = ISE_to_rank[ise_val];
|
|
int new_rank = basisu::clamp<int>(cur_rank + delta, 0, (int)num_ise_levels - 1);
|
|
|
|
return rank_to_ISE[new_rank];
|
|
}
|
|
|
|
void get_astc_block_size_by_index(uint32_t index, uint32_t& width, uint32_t& height)
|
|
{
|
|
assert(index < NUM_ASTC_BLOCK_SIZES);
|
|
|
|
width = g_astc_block_sizes[index][0];
|
|
height = g_astc_block_sizes[index][1];
|
|
}
|
|
|
|
int find_astc_block_size_index(uint32_t width, uint32_t height)
|
|
{
|
|
for (uint32_t i = 0; i < NUM_ASTC_BLOCK_SIZES; i++)
|
|
if ((width == g_astc_block_sizes[i][0]) && (height == g_astc_block_sizes[i][1]))
|
|
return i;
|
|
|
|
return -1;
|
|
}
|
|
|
|
} // namespace astc_helpers
|
|
|
|
#endif //BASISU_ASTC_HELPERS_IMPLEMENTATION
|