Removing detex dependency

Adding new GPU texture format decoders for BC1/4/5, ETC2 EAC alpha, and BC7 mode 6
Adding GPU texture unpack failure checks and warnings
Adding support for 3 color blocks to transcoder, and making sure that BC3 blocks will never use 3 color (punchthrough alpha) blocks.
This commit is contained in:
Rich Geldreich
2019-04-30 13:47:11 -07:00
parent c35e288c28
commit 16c9e35c45
16 changed files with 681 additions and 1710 deletions

View File

@@ -61,9 +61,6 @@ set(BASISU_SRC_LIST ${COMMON_SRC_LIST}
basisu_ssim.cpp
basisu_tool.cpp
lodepng.cpp
detex/decompress_bc.c
detex/decompress_bc7.c
detex/decompress_eac.c
transcoder/basisu_transcoder.cpp
)

View File

@@ -169,10 +169,6 @@
<ClCompile Include="basisu_resampler.cpp" />
<ClCompile Include="basisu_resample_filters.cpp" />
<ClInclude Include="basisu_ssim.h" />
<ClInclude Include="detex\decompress_bc.h" />
<ClInclude Include="detex\decompress_bc7.h" />
<ClInclude Include="detex\decompress_eac.h" />
<ClInclude Include="detex\detex_common.h" />
<ClInclude Include="lodepng.h" />
<ClInclude Include="transcoder\basisu.h" />
<ClInclude Include="transcoder\basisu_transcoder.h" />
@@ -191,9 +187,6 @@
<ClCompile Include="basisu_pvrtc1_4.cpp" />
<ClCompile Include="basisu_global_selector_palette_helpers.cpp" />
<ClCompile Include="basisu_gpu_texture.cpp" />
<ClCompile Include="detex\decompress_bc.c" />
<ClCompile Include="detex\decompress_bc7.c" />
<ClCompile Include="detex\decompress_eac.c" />
<ClCompile Include="lodepng.cpp" />
<ClCompile Include="basisu_tool.cpp" />
</ItemGroup>

View File

@@ -14,18 +14,10 @@
<ClCompile Include="basisu_gpu_texture.cpp" />
<ClCompile Include="lodepng.cpp" />
<ClCompile Include="basisu_tool.cpp" />
<ClCompile Include="detex\decompress_bc.c">
<Filter>detex</Filter>
</ClCompile>
<ClCompile Include="detex\decompress_bc7.c">
<Filter>detex</Filter>
</ClCompile>
<ClCompile Include="detex\decompress_eac.c">
<Filter>detex</Filter>
</ClCompile>
<ClCompile Include="transcoder\basisu_transcoder.cpp">
<Filter>transcoder</Filter>
</ClCompile>
<ClCompile Include="basisu_ssim.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="basisu_basis_file.h" />
@@ -42,18 +34,6 @@
<ClInclude Include="lodepng.h" />
<ClInclude Include="transcoder\basisu_file_headers.h" />
<ClInclude Include="transcoder\basisu_global_selector_cb.h" />
<ClInclude Include="detex\decompress_eac.h">
<Filter>detex</Filter>
</ClInclude>
<ClInclude Include="detex\decompress_bc.h">
<Filter>detex</Filter>
</ClInclude>
<ClInclude Include="detex\decompress_bc7.h">
<Filter>detex</Filter>
</ClInclude>
<ClInclude Include="detex\detex_common.h">
<Filter>detex</Filter>
</ClInclude>
<ClInclude Include="transcoder\basisu_global_selector_palette.h">
<Filter>transcoder</Filter>
</ClInclude>
@@ -66,6 +46,7 @@
<ClInclude Include="transcoder\basisu_transcoder.h">
<Filter>transcoder</Filter>
</ClInclude>
<ClInclude Include="basisu_ssim.h" />
</ItemGroup>
<ItemGroup>
<None Include="transcoder\basisu_transcoder_tables_dxt1_6.inc">
@@ -82,8 +63,5 @@
<Filter Include="transcoder">
<UniqueIdentifier>{7a54aaad-1d10-4bdf-b8e9-c14ed2263ed8}</UniqueIdentifier>
</Filter>
<Filter Include="detex">
<UniqueIdentifier>{977e9455-f354-422a-b698-08778483328c}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

View File

@@ -28,6 +28,13 @@ namespace basisu
void error_printf(const char *pFmt, ...);
// Helpers
inline uint8_t clamp255(int32_t i)
{
return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i);
}
// Linear algebra
template <uint32_t N, typename T>
@@ -753,6 +760,8 @@ namespace basisu
bool bit() { return irand(0, 1) == 1; }
uint8_t byte() { return static_cast<uint8_t>(urand32()); }
// between [l,h)
float frand(float l, float h) { std::uniform_real_distribution<float> d(l, h); return d(m_mt); }

View File

@@ -13,77 +13,417 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "basisu_gpu_texture.h"
#include "detex/decompress_bc.h"
#include "detex/decompress_bc7.h"
#include "detex/decompress_eac.h"
#include "basisu_enc.h"
#include "basisu_pvrtc1_4.h"
namespace basisu
{
const int8_t g_etc2_eac_tables[16][8] =
{
{ -3, -6, -9, -15, 2, 5, 8, 14 }, { -3, -7, -10, -13, 2, 6, 9, 12 }, { -2, -5, -8, -13, 1, 4, 7, 12 }, { -2, -4, -6, -13, 1, 3, 5, 12 },
{ -3, -6, -8, -12, 2, 5, 7, 11 }, { -3, -7, -9, -11, 2, 6, 8, 10 }, { -4, -7, -8, -11, 3, 6, 7, 10 }, { -3, -5, -8, -11, 2, 4, 7, 10 },
{ -2, -6, -8, -10, 1, 5, 7, 9 }, { -2, -5, -8, -10, 1, 4, 7, 9 }, { -2, -4, -8, -10, 1, 3, 7, 9 }, { -2, -5, -7, -10, 1, 4, 6, 9 },
{ -3, -4, -7, -10, 2, 3, 6, 9 }, { -1, -2, -3, -10, 0, 1, 2, 9 }, { -4, -6, -8, -9, 3, 5, 7, 8 }, { -3, -5, -7, -9, 2, 4, 6, 8 }
};
struct eac_a8_block
{
uint16_t m_base : 8;
uint16_t m_table : 4;
uint16_t m_multiplier : 4;
uint8_t m_selectors[6];
inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
{
assert((x < 4) && (y < 4));
return static_cast<uint32_t>((selector_bits >> (45 - (y + x * 4) * 3)) & 7);
}
inline uint64_t get_selector_bits() const
{
uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) | ((uint64_t)m_selectors[2] << 24) | ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5];
return pixels;
}
};
void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels)
{
static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8");
const eac_a8_block *pBlock = static_cast<const eac_a8_block *>(pBlock_bits);
const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table];
const uint64_t selector_bits = pBlock->get_selector_bits();
const int32_t base = pBlock->m_base;
const int32_t mul = pBlock->m_multiplier;
pPixels[0].a = clamp255(base + pTable[pBlock->get_selector(0, 0, selector_bits)] * mul);
pPixels[1].a = clamp255(base + pTable[pBlock->get_selector(1, 0, selector_bits)] * mul);
pPixels[2].a = clamp255(base + pTable[pBlock->get_selector(2, 0, selector_bits)] * mul);
pPixels[3].a = clamp255(base + pTable[pBlock->get_selector(3, 0, selector_bits)] * mul);
pPixels[4].a = clamp255(base + pTable[pBlock->get_selector(0, 1, selector_bits)] * mul);
pPixels[5].a = clamp255(base + pTable[pBlock->get_selector(1, 1, selector_bits)] * mul);
pPixels[6].a = clamp255(base + pTable[pBlock->get_selector(2, 1, selector_bits)] * mul);
pPixels[7].a = clamp255(base + pTable[pBlock->get_selector(3, 1, selector_bits)] * mul);
pPixels[8].a = clamp255(base + pTable[pBlock->get_selector(0, 2, selector_bits)] * mul);
pPixels[9].a = clamp255(base + pTable[pBlock->get_selector(1, 2, selector_bits)] * mul);
pPixels[10].a = clamp255(base + pTable[pBlock->get_selector(2, 2, selector_bits)] * mul);
pPixels[11].a = clamp255(base + pTable[pBlock->get_selector(3, 2, selector_bits)] * mul);
pPixels[12].a = clamp255(base + pTable[pBlock->get_selector(0, 3, selector_bits)] * mul);
pPixels[13].a = clamp255(base + pTable[pBlock->get_selector(1, 3, selector_bits)] * mul);
pPixels[14].a = clamp255(base + pTable[pBlock->get_selector(2, 3, selector_bits)] * mul);
pPixels[15].a = clamp255(base + pTable[pBlock->get_selector(3, 3, selector_bits)] * mul);
}
struct bc1_block
{
enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
uint8_t m_low_color[cTotalEndpointBytes];
uint8_t m_high_color[cTotalEndpointBytes];
uint8_t m_selectors[cTotalSelectorBytes];
inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b)
{
r = (c >> 11) & 31;
g = (c >> 5) & 63;
b = c & 31;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
}
inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * 2)) & 3; }
};
// Returns true if the block uses 3 color punchthrough alpha mode.
bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha)
{
static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8");
const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits);
const uint32_t l = pBlock->get_low_color();
const uint32_t h = pBlock->get_high_color();
color_rgba c[4];
uint32_t r0, g0, b0, r1, g1, b1;
bc1_block::unpack_color(l, r0, g0, b0);
bc1_block::unpack_color(h, r1, g1, b1);
bool used_punchthrough = false;
if (l > h)
{
c[0].set_noclamp_rgba(r0, g0, b0, 255);
c[1].set_noclamp_rgba(r1, g1, b1, 255);
c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
}
else
{
c[0].set_noclamp_rgba(r0, g0, b0, 255);
c[1].set_noclamp_rgba(r1, g1, b1, 255);
c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
c[3].set_noclamp_rgba(0, 0, 0, 0);
used_punchthrough = true;
}
if (set_alpha)
{
for (uint32_t y = 0; y < 4; y++, pPixels += 4)
{
pPixels[0] = c[pBlock->get_selector(0, y)];
pPixels[1] = c[pBlock->get_selector(1, y)];
pPixels[2] = c[pBlock->get_selector(2, y)];
pPixels[3] = c[pBlock->get_selector(3, y)];
}
}
else
{
for (uint32_t y = 0; y < 4; y++, pPixels += 4)
{
pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]);
pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]);
pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]);
pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]);
}
}
return used_punchthrough;
}
struct bc4_block
{
enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
uint8_t m_endpoints[2];
uint8_t m_selectors[cTotalSelectorBytes];
inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
inline uint64_t get_selector_bits() const
{
return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) |
(((uint64_t)m_selectors[4]) << 32U) |
(((uint64_t)m_selectors[5]) << 40U);
}
inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const
{
assert((x < 4U) && (y < 4U));
return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
}
static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h)
{
pDst[0] = static_cast<uint8_t>(l);
pDst[1] = static_cast<uint8_t>(h);
pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
pDst[6] = 0;
pDst[7] = 255;
return 6;
}
static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h)
{
pDst[0] = static_cast<uint8_t>(l);
pDst[1] = static_cast<uint8_t>(h);
pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
return 8;
}
static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h)
{
if (l > h)
return get_block_values8(pDst, l, h);
else
return get_block_values6(pDst, l, h);
}
};
void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride)
{
static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8");
const bc4_block *pBlock = static_cast<const bc4_block *>(pBlock_bits);
uint8_t sel_values[8];
bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
const uint64_t selector_bits = pBlock->get_selector_bits();
for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U))
{
pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];
pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];
pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];
pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];
}
}
// Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3.
bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels)
{
bool success = true;
if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(bc4_block), pPixels, true))
success = false;
unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba));
return success;
}
// writes RG
void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels)
{
unpack_bc4(pBlock_bits, &pPixels[0].r, sizeof(color_rgba));
unpack_bc4((const uint8_t *)pBlock_bits + sizeof(bc4_block), &pPixels[0].g, sizeof(color_rgba));
}
struct bc7_mode_6
{
struct
{
uint64_t m_mode : 7;
uint64_t m_r0 : 7;
uint64_t m_r1 : 7;
uint64_t m_g0 : 7;
uint64_t m_g1 : 7;
uint64_t m_b0 : 7;
uint64_t m_b1 : 7;
uint64_t m_a0 : 7;
uint64_t m_a1 : 7;
uint64_t m_p0 : 1;
} m_lo;
union
{
struct
{
uint64_t m_p1 : 1;
uint64_t m_s00 : 3;
uint64_t m_s10 : 4;
uint64_t m_s20 : 4;
uint64_t m_s30 : 4;
uint64_t m_s01 : 4;
uint64_t m_s11 : 4;
uint64_t m_s21 : 4;
uint64_t m_s31 : 4;
uint64_t m_s02 : 4;
uint64_t m_s12 : 4;
uint64_t m_s22 : 4;
uint64_t m_s32 : 4;
uint64_t m_s03 : 4;
uint64_t m_s13 : 4;
uint64_t m_s23 : 4;
uint64_t m_s33 : 4;
} m_hi;
uint64_t m_hi_bits;
};
};
static const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
// The transcoder only outputs mode 6 at the moment, so this is easy.
bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels)
{
static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16");
const bc7_mode_6 &block = *static_cast<const bc7_mode_6 *>(pBlock_bits);
if (block.m_lo.m_mode != (1 << 6))
return false;
const uint32_t r0 = (block.m_lo.m_r0 << 1) | block.m_lo.m_p0;
const uint32_t g0 = (block.m_lo.m_g0 << 1) | block.m_lo.m_p0;
const uint32_t b0 = (block.m_lo.m_b0 << 1) | block.m_lo.m_p0;
const uint32_t a0 = (block.m_lo.m_a0 << 1) | block.m_lo.m_p0;
const uint32_t r1 = (block.m_lo.m_r1 << 1) | block.m_hi.m_p1;
const uint32_t g1 = (block.m_lo.m_g1 << 1) | block.m_hi.m_p1;
const uint32_t b1 = (block.m_lo.m_b1 << 1) | block.m_hi.m_p1;
const uint32_t a1 = (block.m_lo.m_a1 << 1) | block.m_hi.m_p1;
color_rgba vals[16];
for (uint32_t i = 0; i < 16; i++)
{
const uint32_t w = g_bc7_weights4[i];
const uint32_t iw = 64 - w;
vals[i].set_noclamp_rgba(
(r0 * iw + r1 * w + 32) >> 6,
(g0 * iw + g1 * w + 32) >> 6,
(b0 * iw + b1 * w + 32) >> 6,
(a0 * iw + a1 * w + 32) >> 6);
}
pPixels[0] = vals[block.m_hi.m_s00];
pPixels[1] = vals[block.m_hi.m_s10];
pPixels[2] = vals[block.m_hi.m_s20];
pPixels[3] = vals[block.m_hi.m_s30];
pPixels[4] = vals[block.m_hi.m_s01];
pPixels[5] = vals[block.m_hi.m_s11];
pPixels[6] = vals[block.m_hi.m_s21];
pPixels[7] = vals[block.m_hi.m_s31];
pPixels[8] = vals[block.m_hi.m_s02];
pPixels[9] = vals[block.m_hi.m_s12];
pPixels[10] = vals[block.m_hi.m_s22];
pPixels[11] = vals[block.m_hi.m_s32];
pPixels[12] = vals[block.m_hi.m_s03];
pPixels[13] = vals[block.m_hi.m_s13];
pPixels[14] = vals[block.m_hi.m_s23];
pPixels[15] = vals[block.m_hi.m_s33];
return true;
}
// Unpacks to RGBA, R, RG, or A
void unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels)
{
switch (fmt)
{
case cBC1:
{
if (detexGetModeBC1((uint8_t*)pBlock))
detexDecompressBlockBC1A((uint8_t*)pBlock, 0, (uint8_t*)pPixels);
else
detexDecompressBlockBC1((uint8_t*)pBlock, 0, (uint8_t*)pPixels);
unpack_bc1(pBlock, pPixels, true);
break;
}
case cBC3:
{
detexDecompressBlockBC3((uint8_t*)pBlock, 0, (uint8_t*)pPixels);
break;
return unpack_bc3(pBlock, pPixels);
}
case cBC4:
{
// Unpack to R
detexDecompressBlockBC4((uint8_t*)pBlock, 0, (uint8_t*)pPixels, sizeof(color_rgba));
unpack_bc4(pBlock, &pPixels[0].r, sizeof(color_rgba));
break;
}
case cBC5:
{
// Unpack to RG
detexDecompressBlockBC4((uint8_t*)pBlock, 0, (uint8_t*)pPixels, sizeof(color_rgba));
detexDecompressBlockBC4((uint8_t*)pBlock + sizeof(uint64_t), 0, (uint8_t*)pPixels + 1, sizeof(color_rgba));
unpack_bc5(pBlock, pPixels);
break;
}
case cBC7:
{
detexDecompressBlockBPTC((const uint8_t*)pBlock, UINT32_MAX, 0, (uint8_t*)pPixels);
break;
return unpack_bc7_mode6(pBlock, pPixels);
}
// Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color)
case cETC2_RGB:
case cETC1:
case cETC1S:
{
unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels);
return unpack_etc1(*static_cast<const etc_block*>(pBlock), pPixels);
break;
}
case cETC2_RGBA:
{
unpack_etc1(static_cast<const etc_block*>(pBlock)[1], pPixels);
detexDecompressBlockETC2_EAC((const uint8_t*)pBlock, (uint8_t*)pPixels + 3, sizeof(color_rgba));
if (!unpack_etc1(static_cast<const etc_block*>(pBlock)[1], pPixels))
return false;
unpack_etc2_eac(pBlock, pPixels);
break;
}
case cETC2_ALPHA:
{
// Unpack to A
detexDecompressBlockETC2_EAC((const uint8_t*)pBlock, (uint8_t*)pPixels + 3, sizeof(color_rgba));
unpack_etc2_eac(pBlock, pPixels);
break;
}
default:
{
assert(0);
// TODO
break;
return false;
}
}
return true;
}
bool gpu_image::unpack(image& img, bool pvrtc_wrap_addressing) const
@@ -120,19 +460,22 @@ namespace basisu
for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++)
pixels[i] = g_black_color;
bool success = true;
for (uint32_t by = 0; by < m_blocks_y; by++)
{
for (uint32_t bx = 0; bx < m_blocks_x; bx++)
{
const void* pBlock = get_block_ptr(bx, by);
unpack_block(m_fmt, pBlock, pixels);
if (!unpack_block(m_fmt, pBlock, pixels))
success = false;
img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height);
} // bx
} // by
return true;
return success;
}
static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A };

View File

@@ -120,6 +120,13 @@ namespace basisu
// GPU texture block unpacking
void unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels);
void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels);
bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha);
void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride);
bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels);
void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels);
bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels);
bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels);
} // namespace basisu

View File

@@ -471,6 +471,9 @@ static bool unpack_and_validate_mode(command_line_params &opts, bool validate_fl
return false;
}
uint32_t total_unpack_warnings = 0;
uint32_t total_pvrtc_nonpow2_warnings = 0;
for (uint32_t file_index = 0; file_index < opts.m_input_filenames.size(); file_index++)
{
const char *pInput_filename = opts.m_input_filenames[file_index].c_str();
@@ -554,9 +557,7 @@ static bool unpack_and_validate_mode(command_line_params &opts, bool validate_fl
for (uint32_t image_index = 0; image_index < fileinfo.m_total_images; image_index++)
gpu_images[tex_fmt][image_index].resize(fileinfo.m_image_mipmap_levels[image_index]);
}
bool pvrtc_nonpow2_warning = false;
// Now transcode the file to all supported texture formats and save mipmapped KTX files
for (uint32_t image_index = 0; image_index < fileinfo.m_total_images; image_index++)
{
@@ -578,13 +579,10 @@ static bool unpack_and_validate_mode(command_line_params &opts, bool validate_fl
{
if (!is_pow2(level_info.m_width) || !is_pow2(level_info.m_height))
{
if (!pvrtc_nonpow2_warning)
{
pvrtc_nonpow2_warning = true;
total_pvrtc_nonpow2_warnings++;
printf("Warning: Will not transcode image %u level %u res %ux%u to PVRTC1 (one or more dimension is not a power of 2)\n", image_index, level_index, level_info.m_width, level_info.m_height);
}
printf("Warning: Will not transcode image %u level %u res %ux%u to PVRTC1 (one or more dimension is not a power of 2)\n", image_index, level_index, level_info.m_width, level_info.m_height);
// Can't transcode this image level to PVRTC because it's not a pow2 (we're going to support transcoding non-pow2 to the next larger pow2 soon)
continue;
}
@@ -653,8 +651,8 @@ static bool unpack_and_validate_mode(command_line_params &opts, bool validate_fl
image u;
if (!gi[level_index].unpack(u))
{
error_printf("Failed unpacking GPU texture data (%u %u %u)\n", format_iter, image_index, level_index);
return false;
printf("Warning: Failed unpacking GPU texture data (%u %u %u). Unpacking as much as possible.\n", format_iter, image_index, level_index);
total_unpack_warnings++;
}
//u.crop(level_info.m_orig_width, level_info.m_orig_height);
@@ -682,11 +680,18 @@ static bool unpack_and_validate_mode(command_line_params &opts, bool validate_fl
} // image_index
} // format_iter
} // if (!validate_flag)
} // image_index
printf("Success\n");
if (total_pvrtc_nonpow2_warnings)
printf("Warning: %u images could not be transcoded to PVRTC1 because one or both dimensions were not a power of 2\n", total_pvrtc_nonpow2_warnings);
if (total_unpack_warnings)
printf("ATTENTION: %u total images had invalid GPU texture data!\n", total_unpack_warnings);
else
printf("Success\n");
return true;
}
@@ -792,7 +797,7 @@ static bool compare_mode(command_line_params &opts)
int main(int argc, const char **argv)
{
basisu_encoder_init();
printf("Basis Universal GPU Texture Compressor v" BASISU_TOOL_VERSION ", Copyright (C) 2017-2019 Binomial LLC, All rights reserved\n");
if (argc == 1)

View File

@@ -1,413 +0,0 @@
/*
Copyright (c) 2015 Harm Hanemaaijer <fgenfb@yahoo.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// RG: 4/27/19 - Lots of fixes to BC1-5 decompression code.
#include "decompress_bc.h"
static inline int detexDivideBy3(int x)
{
return x / 3;
}
static inline int detexDivideBy5(int x)
{
return x / 5;
}
static inline int detexDivideBy7(int x)
{
return x / 7;
}
/* Decompress a 64-bit 4x4 pixel texture block compressed using the BC1 */
/* format. */
bool detexDecompressBlockBC1(const uint8_t* DETEX_RESTRICT bitstring,
uint32_t flags, uint8_t* DETEX_RESTRICT pixel_buffer)
{
(void)flags;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(__BYTE_ORDER__)
uint32_t colors = *(uint32_t*)& bitstring[0];
#else
uint32_t colors = ((uint32_t)bitstring[0] << 24) |
((uint32_t)bitstring[1] << 16) |
((uint32_t)bitstring[2] << 8) | bitstring[3];
#endif
// Decode the two 5-6-5 RGB colors.
int color_r[4], color_g[4], color_b[4];
color_b[0] = (colors & 0x0000001F) << 3;
color_g[0] = (colors & 0x000007E0) >> (5 - 2);
color_r[0] = (colors & 0x0000F800) >> (11 - 3);
color_b[1] = (colors & 0x001F0000) >> (16 - 3);
color_g[1] = (colors & 0x07E00000) >> (21 - 2);
color_r[1] = (colors & 0xF8000000) >> (27 - 3);
color_b[0] |= (color_b[0] >> 5);
color_g[0] |= (color_g[0] >> 6);
color_r[0] |= (color_r[0] >> 5);
color_b[1] |= (color_b[1] >> 5);
color_g[1] |= (color_g[1] >> 6);
color_r[1] |= (color_r[1] >> 5);
if ((colors & 0xFFFF) > ((colors & 0xFFFF0000) >> 16))
{
color_r[2] = detexDivideBy3(2 * color_r[0] + color_r[1]);
color_g[2] = detexDivideBy3(2 * color_g[0] + color_g[1]);
color_b[2] = detexDivideBy3(2 * color_b[0] + color_b[1]);
color_r[3] = detexDivideBy3(color_r[0] + 2 * color_r[1]);
color_g[3] = detexDivideBy3(color_g[0] + 2 * color_g[1]);
color_b[3] = detexDivideBy3(color_b[0] + 2 * color_b[1]);
}
else
{
color_r[2] = (color_r[0] + color_r[1]) / 2;
color_g[2] = (color_g[0] + color_g[1]) / 2;
color_b[2] = (color_b[0] + color_b[1]) / 2;
color_r[3] = color_g[3] = color_b[3] = 0;
}
uint32_t pixels = *(uint32_t*)&bitstring[4];
for (int i = 0; i < 16; i++)
{
int pixel = (pixels >> (i * 2)) & 0x3;
*(uint32_t*)(pixel_buffer + i * 4) = color_r[pixel] | (color_g[pixel] << 8) | (color_b[pixel] << 16) | (0xFF << 24);
}
return true;
}
uint32_t detexGetModeBC1(const uint8_t * bitstring)
{
uint32_t colors = *(uint32_t*)bitstring;
if ((colors & 0xFFFF) > ((colors & 0xFFFF0000) >> 16))
return 0;
else
return 1;
}
/* Decompress a 64-bit 4x4 pixel texture block compressed using the BC1A */
/* format. */
bool detexDecompressBlockBC1A(const uint8_t * DETEX_RESTRICT bitstring,
uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(__BYTE_ORDER__)
uint32_t colors = *(uint32_t*)& bitstring[0];
#else
uint32_t colors = ((uint32_t)bitstring[0] << 24) |
((uint32_t)bitstring[1] << 16) |
((uint32_t)bitstring[2] << 8) | bitstring[3];
#endif
bool opaque = ((colors & 0xFFFF) > ((colors & 0xFFFF0000) >> 16));
if (opaque && (flags & DETEX_DECOMPRESS_FLAG_NON_OPAQUE_ONLY))
return false;
if (!opaque && (flags & DETEX_DECOMPRESS_FLAG_OPAQUE_ONLY))
return false;
// Decode the two 5-6-5 RGB colors.
int color_r[4], color_g[4], color_b[4], color_a[4];
color_b[0] = (colors & 0x0000001F) << 3;
color_g[0] = (colors & 0x000007E0) >> (5 - 2);
color_r[0] = (colors & 0x0000F800) >> (11 - 3);
color_b[1] = (colors & 0x001F0000) >> (16 - 3);
color_g[1] = (colors & 0x07E00000) >> (21 - 2);
color_r[1] = (colors & 0xF8000000) >> (27 - 3);
color_b[0] |= (color_b[0] >> 5);
color_g[0] |= (color_g[0] >> 6);
color_r[0] |= (color_r[0] >> 5);
color_b[1] |= (color_b[1] >> 5);
color_g[1] |= (color_g[1] >> 6);
color_r[1] |= (color_r[1] >> 5);
color_a[0] = color_a[1] = color_a[2] = color_a[3] = 0xFF;
if (opaque)
{
color_r[2] = detexDivideBy3(2 * color_r[0] + color_r[1]);
color_g[2] = detexDivideBy3(2 * color_g[0] + color_g[1]);
color_b[2] = detexDivideBy3(2 * color_b[0] + color_b[1]);
color_r[3] = detexDivideBy3(color_r[0] + 2 * color_r[1]);
color_g[3] = detexDivideBy3(color_g[0] + 2 * color_g[1]);
color_b[3] = detexDivideBy3(color_b[0] + 2 * color_b[1]);
}
else
{
color_r[2] = (color_r[0] + color_r[1]) / 2;
color_g[2] = (color_g[0] + color_g[1]) / 2;
color_b[2] = (color_b[0] + color_b[1]) / 2;
color_r[3] = color_g[3] = color_b[3] = color_a[3] = 0;
}
uint32_t pixels = *(uint32_t*)& bitstring[4];
for (int i = 0; i < 16; i++)
{
int pixel = (pixels >> (i * 2)) & 0x3;
*(uint32_t*)(pixel_buffer + i * 4) = color_r[pixel] | (color_g[pixel] << 8) | (color_b[pixel] << 16) | (color_a[pixel] << 24);
}
return true;
}
/* Decompress a 64-bit 4x4 pixel texture block compressed using the BC2 */
/* format. */
bool detexDecompressBlockBC2(const uint8_t * DETEX_RESTRICT bitstring,
uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(__BYTE_ORDER__)
uint32_t colors = *(uint32_t*)& bitstring[8];
#else
uint32_t colors = ((uint32_t)bitstring[8] << 24) |
((uint32_t)bitstring[9] << 16) |
((uint32_t)bitstring[10] << 8) | bitstring[11];
#endif
if ((colors & 0xFFFF) <= ((colors & 0xFFFF0000) >> 16) &&
(flags & DETEX_DECOMPRESS_FLAG_ENCODE))
// GeForce 6 and 7 series produce wrong result in this case.
return false;
int color_r[4], color_g[4], color_b[4];
color_b[0] = (colors & 0x0000001F) << 3;
color_g[0] = (colors & 0x000007E0) >> (5 - 2);
color_r[0] = (colors & 0x0000F800) >> (11 - 3);
color_b[1] = (colors & 0x001F0000) >> (16 - 3);
color_g[1] = (colors & 0x07E00000) >> (21 - 2);
color_r[1] = (colors & 0xF8000000) >> (27 - 3);
color_b[0] |= (color_b[0] >> 5);
color_g[0] |= (color_g[0] >> 6);
color_r[0] |= (color_r[0] >> 5);
color_b[1] |= (color_b[1] >> 5);
color_g[1] |= (color_g[1] >> 6);
color_r[1] |= (color_r[1] >> 5);
color_r[2] = detexDivideBy3(2 * color_r[0] + color_r[1]);
color_g[2] = detexDivideBy3(2 * color_g[0] + color_g[1]);
color_b[2] = detexDivideBy3(2 * color_b[0] + color_b[1]);
color_r[3] = detexDivideBy3(color_r[0] + 2 * color_r[1]);
color_g[3] = detexDivideBy3(color_g[0] + 2 * color_g[1]);
color_b[3] = detexDivideBy3(color_b[0] + 2 * color_b[1]);
uint32_t pixels = *(uint32_t*)& bitstring[12];
uint64_t alpha_pixels = *(uint64_t*)& bitstring[0];
for (int i = 0; i < 16; i++)
{
int pixel = (pixels >> (i * 2)) & 0x3;
int alpha = ((alpha_pixels >> (i * 4)) & 0xF) * 255 / 15;
*(uint32_t*)(pixel_buffer + i * 4) = color_r[pixel] | (color_g[pixel] << 8) | (color_b[pixel] << 16) | (alpha << 24);
}
return true;
}
/* Decompress a 64-bit 4x4 pixel texture block compressed using the BC3 */
/* format. */
bool detexDecompressBlockBC3(const uint8_t * DETEX_RESTRICT bitstring,
uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer)
{
int alpha0 = bitstring[0];
int alpha1 = bitstring[1];
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(__BYTE_ORDER__)
uint32_t colors = *(uint32_t*)& bitstring[8];
#else
uint32_t colors = ((uint32_t)bitstring[8] << 24) |
((uint32_t)bitstring[9] << 16) |
((uint32_t)bitstring[10] << 8) | bitstring[11];
#endif
if ((colors & 0xFFFF) <= ((colors & 0xFFFF0000) >> 16) &&
(flags & DETEX_DECOMPRESS_FLAG_ENCODE))
// GeForce 6 and 7 series produce wrong result in this case.
return false;
int color_r[4], color_g[4], color_b[4];
color_b[0] = (colors & 0x0000001F) << 3;
color_g[0] = (colors & 0x000007E0) >> (5 - 2);
color_r[0] = (colors & 0x0000F800) >> (11 - 3);
color_b[1] = (colors & 0x001F0000) >> (16 - 3);
color_g[1] = (colors & 0x07E00000) >> (21 - 2);
color_r[1] = (colors & 0xF8000000) >> (27 - 3);
color_b[0] |= (color_b[0] >> 5);
color_g[0] |= (color_g[0] >> 6);
color_r[0] |= (color_r[0] >> 5);
color_b[1] |= (color_b[1] >> 5);
color_g[1] |= (color_g[1] >> 6);
color_r[1] |= (color_r[1] >> 5);
color_r[2] = detexDivideBy3(2 * color_r[0] + color_r[1]);
color_g[2] = detexDivideBy3(2 * color_g[0] + color_g[1]);
color_b[2] = detexDivideBy3(2 * color_b[0] + color_b[1]);
color_r[3] = detexDivideBy3(color_r[0] + 2 * color_r[1]);
color_g[3] = detexDivideBy3(color_g[0] + 2 * color_g[1]);
color_b[3] = detexDivideBy3(color_b[0] + 2 * color_b[1]);
uint32_t pixels = *(uint32_t*)& bitstring[12];
uint64_t alpha_bits = (uint32_t)bitstring[2] |
((uint32_t)bitstring[3] << 8) |
((uint64_t) * (uint32_t*)& bitstring[4] << 16);
for (int i = 0; i < 16; i++)
{
int pixel = (pixels >> (i * 2)) & 0x3;
int code = (alpha_bits >> (i * 3)) & 0x7;
int alpha = 0;
if (alpha0 > alpha1)
{
switch (code)
{
case 0:
alpha = alpha0;
break;
case 1:
alpha = alpha1;
break;
case 2:
alpha = detexDivideBy7(6 * alpha0 + 1 * alpha1);
break;
case 3:
alpha = detexDivideBy7(5 * alpha0 + 2 * alpha1);
break;
case 4:
alpha = detexDivideBy7(4 * alpha0 + 3 * alpha1);
break;
case 5:
alpha = detexDivideBy7(3 * alpha0 + 4 * alpha1);
break;
case 6:
alpha = detexDivideBy7(2 * alpha0 + 5 * alpha1);
break;
case 7:
alpha = detexDivideBy7(1 * alpha0 + 6 * alpha1);
break;
}
}
else
{
switch (code)
{
case 0:
alpha = alpha0;
break;
case 1:
alpha = alpha1;
break;
case 2:
alpha = detexDivideBy5(4 * alpha0 + 1 * alpha1);
break;
case 3:
alpha = detexDivideBy5(3 * alpha0 + 2 * alpha1);
break;
case 4:
alpha = detexDivideBy5(2 * alpha0 + 3 * alpha1);
break;
case 5:
alpha = detexDivideBy5(1 * alpha0 + 4 * alpha1);
break;
case 6:
alpha = 0;
break;
case 7:
alpha = 0xFF;
break;
}
}
*(uint32_t*)(pixel_buffer + i * 4) = color_r[pixel] | (color_g[pixel] << 8) | (color_b[pixel] << 16) | (alpha << 24);
}
return true;
}
bool detexDecompressBlockBC4(const uint8_t * DETEX_RESTRICT bitstring, uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer, int stride)
{
(void)flags;
int alpha0 = bitstring[0];
int alpha1 = bitstring[1];
uint64_t alpha_bits = (uint32_t)bitstring[2] | ((uint32_t)bitstring[3] << 8) | ((uint64_t) * (uint32_t*)& bitstring[4] << 16);
for (int i = 0; i < 16; i++)
{
int code = (alpha_bits >> (i * 3)) & 0x7;
int alpha = 0;
if (alpha0 > alpha1)
{
switch (code)
{
case 0:
alpha = alpha0;
break;
case 1:
alpha = alpha1;
break;
case 2:
alpha = detexDivideBy7(6 * alpha0 + 1 * alpha1);
break;
case 3:
alpha = detexDivideBy7(5 * alpha0 + 2 * alpha1);
break;
case 4:
alpha = detexDivideBy7(4 * alpha0 + 3 * alpha1);
break;
case 5:
alpha = detexDivideBy7(3 * alpha0 + 4 * alpha1);
break;
case 6:
alpha = detexDivideBy7(2 * alpha0 + 5 * alpha1);
break;
case 7:
alpha = detexDivideBy7(1 * alpha0 + 6 * alpha1);
break;
}
}
else
{
switch (code)
{
case 0:
alpha = alpha0;
break;
case 1:
alpha = alpha1;
break;
case 2:
alpha = detexDivideBy5(4 * alpha0 + 1 * alpha1);
break;
case 3:
alpha = detexDivideBy5(3 * alpha0 + 2 * alpha1);
break;
case 4:
alpha = detexDivideBy5(2 * alpha0 + 3 * alpha1);
break;
case 5:
alpha = detexDivideBy5(1 * alpha0 + 4 * alpha1);
break;
case 6:
alpha = 0;
break;
case 7:
alpha = 0xFF;
break;
}
}
*(uint8_t*)(pixel_buffer + i * stride) = (uint8_t)alpha;
}
return true;
}

View File

@@ -1,35 +0,0 @@
/*
Copyright (c) 2015 Harm Hanemaaijer <fgenfb@yahoo.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#pragma once
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include "detex_common.h"
#ifdef __cplusplus
extern "C" {
#endif
uint32_t detexGetModeBC1(const uint8_t* bitstring);
bool detexDecompressBlockBC1(const uint8_t* DETEX_RESTRICT bitstring, uint32_t flags, uint8_t* DETEX_RESTRICT pixel_buffer);
bool detexDecompressBlockBC1A(const uint8_t* DETEX_RESTRICT bitstring, uint32_t flags, uint8_t* DETEX_RESTRICT pixel_buffer);
bool detexDecompressBlockBC2(const uint8_t* DETEX_RESTRICT bitstring, uint32_t flags, uint8_t* DETEX_RESTRICT pixel_buffer);
bool detexDecompressBlockBC3(const uint8_t* DETEX_RESTRICT bitstring, uint32_t flags, uint8_t* DETEX_RESTRICT pixel_buffer);
bool detexDecompressBlockBC4(const uint8_t* DETEX_RESTRICT bitstring, uint32_t flags, uint8_t* DETEX_RESTRICT pixel_buffer, int stride);
#ifdef __cplusplus
}
#endif

View File

@@ -1,816 +0,0 @@
/*
Copyright (c) 2015 Harm Hanemaaijer <fgenfb@yahoo.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// RG: 4/27/19 - Lots of fixes to BC7 decompression code.
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include "decompress_bc7.h"
// Integer division using look-up tables, used by BC1/2/3 and RGTC (BC4/5)
// decompression.
typedef struct {
uint64_t data0;
uint64_t data1;
int index;
} detexBlock128;
uint32_t detexBlock128ExtractBits(detexBlock128 *block, int nu_bits) {
uint32_t value = 0;
for (int i = 0; i < nu_bits; i++) {
if (block->index < 64) {
int shift = block->index - i;
if (shift < 0)
value |= (block->data0 & ((uint64_t)1 << block->index)) << (-shift);
else
value |= (block->data0 & ((uint64_t)1 << block->index)) >> shift;
}
else {
int shift = ((block->index - 64) - i);
if (shift < 0)
value |= (block->data1 & ((uint64_t)1 << (block->index - 64))) << (-shift);
else
value |= (block->data1 & ((uint64_t)1 << (block->index - 64))) >> shift;
}
block->index++;
}
// if (block->index > 128)
// printf("Block overflow (%d)\n", block->index);
return value;
}
static DETEX_INLINE_ONLY uint32_t detexPixel32GetR8(uint32_t pixel) {
return pixel & 0xFF;
}
static DETEX_INLINE_ONLY uint32_t detexPixel32GetG8(uint32_t pixel) {
return (pixel & 0xFF00) >> 8;
}
static DETEX_INLINE_ONLY uint32_t detexPixel32GetB8(uint32_t pixel) {
return (pixel & 0xFF0000) >> 16;
}
static DETEX_INLINE_ONLY uint32_t detexPixel32GetA8(uint32_t pixel) {
return (pixel & 0xFF000000) >> 24;
}
static DETEX_INLINE_ONLY uint32_t detexPack32R8(int r) {
return (uint32_t)r;
}
static DETEX_INLINE_ONLY uint32_t detexPack32G8(int g) {
return (uint32_t)g << 8;
}
static DETEX_INLINE_ONLY uint32_t detexPack32B8(int b) {
return (uint32_t)b << 16;
}
static DETEX_INLINE_ONLY uint32_t detexPack32A8(int a) {
return (uint32_t)a << 24;
}
static DETEX_INLINE_ONLY uint32_t detexPack32RGBA8(int r, int g, int b, int a) {
return (uint32_t)r | ((uint32_t)g << 8) | ((uint32_t)b << 16) |
((uint32_t)a << 24);
}
uint32_t detexBlock128ExtractBits(detexBlock128 *block, int nu_bits);
/* Return bitfield from bit0 to bit1 from 64-bit bitstring. */
static DETEX_INLINE_ONLY uint32_t detexGetBits64(uint64_t data, int bit0, int bit1) {
uint64_t mask;
if (bit1 == 63)
mask = UINT64_MAX;
else
mask = ((uint64_t)1 << (bit1 + 1)) - 1;
return (uint32_t)((data & mask) >> bit0);
}
/* Return reversed bitfield (bit1 to bit0) from 64-bit bitstring. */
// Assumes bit0 > bit1.
// Reverse the bits.
/* Clear bit0 to bit1 of 64-bit bitstring. */
/* Set bit0 to bit1 of 64-bit bitstring. */
const uint8_t detex_bptc_table_P2[64 * 16] = {
0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,
0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,
0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,
0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1,
0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,
0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1,
0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,
0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1,
0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,
0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0,
0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0,
0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,
0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,
0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,
0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,
0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,
0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,
0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0,
0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0,
0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,
0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1,
0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0,
0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0,
0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0,
0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0,
0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,
0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1,
0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,
0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0,
0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0,
0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,
0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1,
0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1,
0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,
0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0,
0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,
0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0,
0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1,
0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1,
0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0,
0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0,
0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1,
0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,
0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,
0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1,
0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,
0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0,
0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0,
0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1
};
const uint8_t detex_bptc_table_P3[64 * 16] = {
0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2,
0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1,
0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1,
0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1,
0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2,
0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2,
0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1,
0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1,
0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,
0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,
0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,
0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,
0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,
0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2,
0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2,
0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0,
0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2,
0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0,
0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2,
0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1,
0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2,
0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1,
0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2,
0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0,
0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0,
0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2,
0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0,
0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1,
0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2,
0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2,
0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1,
0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1,
0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2,
0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1,
0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2,
0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0,
0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0,
0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,
0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0,
0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1,
0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1,
0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2,
0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1,
0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2,
0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1,
0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1,
0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1,
0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1,
0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2,
0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1,
0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2,
0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2,
0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2,
0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2,
0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2,
0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2,
0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2,
0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2,
0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2,
0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2,
0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1,
0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2,
0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2,
0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0,
};
const uint8_t detex_bptc_table_anchor_index_second_subset[64] = {
15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,
15, 2, 8, 2, 2, 8, 8,15,
2, 8, 2, 2, 8, 8, 2, 2,
15,15, 6, 8, 2, 8,15,15,
2, 8, 2, 2, 2,15,15, 6,
6, 2, 6, 8,15,15, 2, 2,
15,15,15,15,15, 2, 2,15
};
const uint8_t detex_bptc_table_anchor_index_second_subset_of_three[64] = {
3, 3,15,15, 8, 3,15,15,
8, 8, 6, 6, 6, 5, 3, 3,
3, 3, 8,15, 3, 3, 6,10,
5, 8, 8, 6, 8, 5,15,15,
8,15, 3, 5, 6,10, 8,15,
15, 3,15, 5,15,15,15,15,
3,15, 5, 5, 5, 8, 5,10,
5,10, 8,13,15,12, 3, 3
};
const uint8_t detex_bptc_table_anchor_index_third_subset[64] = {
15, 8, 8, 3,15,15, 3, 8,
15,15,15,15,15,15,15, 8,
15, 8,15, 3,15, 8,15, 8,
3,15, 6,10,15,15,10, 8,
15, 3,15,10,10, 8, 9,10,
6,15, 8,15, 3, 6, 6, 8,
15, 3,15,15,15,15,15,15,
15,15,15,15, 3,15,15, 8
};
const uint16_t detex_bptc_table_aWeight2[4] = {
0, 21, 43, 64
};
const uint16_t detex_bptc_table_aWeight3[8] = {
0, 9, 18, 27, 37, 46, 55, 64
};
const uint16_t detex_bptc_table_aWeight4[16] = {
0, 4, 9, 13, 17, 21, 26, 30,
34, 38, 43, 47, 51, 55, 60, 64
};
// BPTC mode layout:
//
// Number of subsets = { 3, 2, 3, 2, 1, 1, 1, 2 };
// Partition bits = { 4, 6, 6, 6, 0, 0, 0, 6 };
// Rotation bits = { 0, 0, 0, 0, 2, 2, 0, 0 };
// Mode 4 has one index selection bit.
//
// #subsets color alpha before color index after color index after After Index
// alpha pbits bits (*)
// Mode 0 3 4 0 1 + 4 = 5 5 + 6 * 3 * 4 = 77 77 + 6 = 83 + 48 - 3 = 128
// Mode 1 2 6 0 2 + 6 = 8 8 + 4 * 3 * 6 = 80 80 + 2 = 82 + 48 - 2 = 128
// Mode 2 3 5 0 3 + 6 = 9 9 + 6 * 3 * 5 = 99 99 99 + 32 - 3 = 128
// Mode 3 2 7 0 4 + 6 = 10 10 + 4 * 3 * 7 = 94 94 + 4 = 98 + 32 - 2 = 128
// Mode 4 1 5 6 5 + 2 + 1 = 8 8 + 2 * 3 * 5 = 38 37 + 2 * 6 = 50 50 + 80 - 2 = 128
// Mode 5 1 7 8 6 + 2 = 8 8 + 2 * 3 * 7 = 50 50 + 2 * 8 = 66 66 + 64 - 2 = 128
// Mode 6 1 7 7 7 7 + 2 * 3 * 7 = 49 49 + 2 * 7 = 63 + 2 = 65 + 64 - 1 = 128
// Mode 7 2 5 5 8 + 6 = 14 14 + 4 * 3 * 5 = 74 74 + 4 * 5 = 94 + 4 = 98 + 32 - 2 = 128
//
// (*) For formats without alpha, the number of index bits is reduced by #subsets anchor bits.
// For formats with alpha, the number of index bits is reduced by 2 * #subsets by the anchor bits.
static const uint8_t color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 };
// Note: precision includes P-bits!
static const uint8_t color_precision_plus_pbit_table[8] = { 5, 7, 5, 8, 5, 7, 8, 6 };
static DETEX_INLINE_ONLY uint8_t GetColorComponentPrecision(int mode) {
return color_precision_table[mode];
}
static DETEX_INLINE_ONLY uint8_t GetColorComponentPrecisionPlusPbit(int mode) {
return color_precision_plus_pbit_table[mode];
}
static const int8_t alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 };
// Note: precision include P-bits!
static const uint8_t alpha_precision_plus_pbit_table[8] = { 0, 0, 0, 0, 6, 8, 8, 6 };
static DETEX_INLINE_ONLY uint8_t GetAlphaComponentPrecision(int mode) {
return alpha_precision_table[mode];
}
static DETEX_INLINE_ONLY uint8_t GetAlphaComponentPrecisionPlusPbit(int mode) {
return alpha_precision_plus_pbit_table[mode];
}
static const int8_t components_in_qword0_table[8] = { 2, -1, 1, 1, 3, 3, 3, 2 };
/* Extract endpoint colors. */
static void ExtractEndpoints(int mode, int nu_subsets, detexBlock128 * DETEX_RESTRICT block,
uint8_t * DETEX_RESTRICT endpoint_array) {
// Optimized version avoiding the use of block_extract_bits().
int components_in_qword0 = components_in_qword0_table[mode];
uint64_t data = block->data0 >> block->index;
uint8_t precision = GetColorComponentPrecision(mode);
uint8_t mask = (1 << precision) - 1;
int total_bits_per_component = nu_subsets * 2 * precision;
for (int i = 0; i < components_in_qword0; i++) // For each color component.
for (int j = 0; j < nu_subsets; j++) // For each subset.
for (int k = 0; k < 2; k++) { // For each endpoint.
endpoint_array[j * 8 + k * 4 + i] = data & mask;
data >>= precision;
}
block->index += components_in_qword0 * total_bits_per_component;
if (components_in_qword0 < 3) {
// Handle the color component that crosses the boundary between data0 and data1
data = block->data0 >> block->index;
data |= block->data1 << (64 - block->index);
int i = components_in_qword0;
for (int j = 0; j < nu_subsets; j++) // For each subset.
for (int k = 0; k < 2; k++) { // For each endpoint.
endpoint_array[j * 8 + k * 4 + i] = data & mask;
data >>= precision;
}
block->index += total_bits_per_component;
}
if (components_in_qword0 < 2) {
// Handle the color component that is wholly in data1.
data = block->data1 >> (block->index - 64);
int i = 2;
for (int j = 0; j < nu_subsets; j++) // For each subset.
for (int k = 0; k < 2; k++) { // For each endpoint.
endpoint_array[j * 8 + k * 4 + i] = data & mask;
data >>= precision;
}
block->index += total_bits_per_component;
}
// Alpha component.
if (GetAlphaComponentPrecision(mode) > 0) {
// For mode 7, the alpha data is wholly in data1.
// For modes 4 and 6, the alpha data is wholly in data0.
// For mode 5, the alpha data is in data0 and data1.
if (mode == 7)
data = block->data1 >> (block->index - 64);
else if (mode == 5)
data = (block->data0 >> block->index) | ((block->data1 & 0x3) << 14);
else
data = block->data0 >> block->index;
uint8_t alpha_precision = GetAlphaComponentPrecision(mode);
uint8_t msk = (1 << alpha_precision) - 1;
for (int j = 0; j < nu_subsets; j++)
for (int k = 0; k < 2; k++) { // For each endpoint.
endpoint_array[j * 8 + k * 4 + 3] = data & msk;
data >>= alpha_precision;
}
block->index += nu_subsets * 2 * alpha_precision;
}
}
static const uint8_t mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 };
static void FullyDecodeEndpoints(uint8_t * DETEX_RESTRICT endpoint_array, int nu_subsets,
int mode, detexBlock128 * DETEX_RESTRICT block) {
if (mode_has_p_bits[mode]) {
// Mode 1 (shared P-bits) handled elsewhere.
// Extract end-point P-bits.
uint32_t bits;
if (block->index < 64)
{
bits = (uint32_t)(block->data0 >> block->index);
if ((block->index + nu_subsets * 2) > 64)
{
bits |= (block->data1 << (64 - block->index));
}
}
else
bits = (uint32_t)(block->data1 >> (block->index - 64));
for (int i = 0; i < nu_subsets * 2; i++) {
endpoint_array[i * 4 + 0] <<= 1;
endpoint_array[i * 4 + 1] <<= 1;
endpoint_array[i * 4 + 2] <<= 1;
endpoint_array[i * 4 + 3] <<= 1;
endpoint_array[i * 4 + 0] |= (bits & 1);
endpoint_array[i * 4 + 1] |= (bits & 1);
endpoint_array[i * 4 + 2] |= (bits & 1);
endpoint_array[i * 4 + 3] |= (bits & 1);
bits >>= 1;
}
block->index += nu_subsets * 2;
}
int color_prec = GetColorComponentPrecisionPlusPbit(mode);
int alpha_prec = GetAlphaComponentPrecisionPlusPbit(mode);
for (int i = 0; i < nu_subsets * 2; i++) {
// Color_component_precision & alpha_component_precision includes pbit
// left shift endpoint components so that their MSB lies in bit 7
endpoint_array[i * 4 + 0] <<= (8 - color_prec);
endpoint_array[i * 4 + 1] <<= (8 - color_prec);
endpoint_array[i * 4 + 2] <<= (8 - color_prec);
endpoint_array[i * 4 + 3] <<= (8 - alpha_prec);
// Replicate each component's MSB into the LSBs revealed by the left-shift operation above.
endpoint_array[i * 4 + 0] |= (endpoint_array[i * 4 + 0] >> color_prec);
endpoint_array[i * 4 + 1] |= (endpoint_array[i * 4 + 1] >> color_prec);
endpoint_array[i * 4 + 2] |= (endpoint_array[i * 4 + 2] >> color_prec);
endpoint_array[i * 4 + 3] |= (endpoint_array[i * 4 + 3] >> alpha_prec);
}
if (mode <= 3) {
for (int i = 0; i < nu_subsets * 2; i++)
endpoint_array[i * 4 + 3] = 0xFF;
}
}
static uint8_t Interpolate(uint8_t e0, uint8_t e1, uint8_t index, uint8_t indexprecision) {
if (indexprecision == 2)
return (uint8_t)(((64 - detex_bptc_table_aWeight2[index]) * (uint16_t)e0
+ detex_bptc_table_aWeight2[index] * (uint16_t)e1 + 32) >> 6);
else
if (indexprecision == 3)
return (uint8_t)(((64 - detex_bptc_table_aWeight3[index]) * (uint16_t)e0
+ detex_bptc_table_aWeight3[index] * (uint16_t)e1 + 32) >> 6);
else // indexprecision == 4
return (uint8_t)(((64 - detex_bptc_table_aWeight4[index]) * (uint16_t)e0
+ detex_bptc_table_aWeight4[index] * (uint16_t)e1 + 32) >> 6);
}
static const uint8_t bptc_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 };
static DETEX_INLINE_ONLY int GetColorIndexBitcount(int mode, int index_selection_bit) {
// If the index selection bit is set for mode 4, return 3, otherwise 2.
return bptc_color_index_bitcount[mode] + index_selection_bit;
}
static uint8_t bptc_alpha_index_bitcount[8] = { 3, 3, 2, 2, 3, 2, 4, 2 };
static DETEX_INLINE_ONLY int GetAlphaIndexBitcount(int mode, int index_selection_bit) {
// If the index selection bit is set for mode 4, return 2, otherwise 3.
return bptc_alpha_index_bitcount[mode] - index_selection_bit;
}
static const uint8_t bptc_NS[8] = { 3, 2, 3, 2, 1, 1, 1, 2 };
static DETEX_INLINE_ONLY int GetNumberOfSubsets(int mode) {
return bptc_NS[mode];
}
static const uint8_t PB[8] = { 4, 6, 6, 6, 0, 0, 0, 6 };
static DETEX_INLINE_ONLY int GetNumberOfPartitionBits(int mode) {
return PB[mode];
}
static const uint8_t RB[8] = { 0, 0, 0, 0, 2, 2, 0, 0 };
static DETEX_INLINE_ONLY int GetNumberOfRotationBits(int mode) {
return RB[mode];
}
// Functions to extract parameters. */
static int ExtractMode(detexBlock128 *block) {
for (int i = 0; i < 8; i++)
if (block->data0 & ((uint64_t)1 << i)) {
block->index = i + 1;
return i;
}
// Illegal.
return -1;
}
static DETEX_INLINE_ONLY int ExtractPartitionSetID(detexBlock128 *block, int mode) {
return detexBlock128ExtractBits(block, GetNumberOfPartitionBits(mode));
}
static DETEX_INLINE_ONLY int GetPartitionIndex(int nu_subsets, int partition_set_id, int i) {
if (nu_subsets == 1)
return 0;
if (nu_subsets == 2)
return detex_bptc_table_P2[partition_set_id * 16 + i];
return detex_bptc_table_P3[partition_set_id * 16 + i];
}
static DETEX_INLINE_ONLY int ExtractRotationBits(detexBlock128 *block, int mode) {
return detexBlock128ExtractBits(block, GetNumberOfRotationBits(mode));
}
static DETEX_INLINE_ONLY int GetAnchorIndex(int partition_set_id, int partition, int nu_subsets) {
if (partition == 0)
return 0;
if (nu_subsets == 2)
return detex_bptc_table_anchor_index_second_subset[partition_set_id];
if (partition == 1)
return detex_bptc_table_anchor_index_second_subset_of_three[partition_set_id];
return detex_bptc_table_anchor_index_third_subset[partition_set_id];
}
static const uint8_t IB[8] = { 3, 3, 2, 2, 2, 2, 4, 2 };
static const uint8_t IB2[8] = { 0, 0, 0, 0, 3, 2, 0, 0 };
static const uint8_t mode_has_partition_bits[8] = { 1, 1, 1, 1, 0, 0, 0, 1 };
/* Decompress a 128-bit 4x4 pixel texture block compressed using BPTC mode 1. */
static bool DecompressBlockBPTCMode1(detexBlock128 * DETEX_RESTRICT block,
uint8_t * DETEX_RESTRICT pixel_buffer) {
uint64_t data0 = block->data0;
uint64_t data1 = block->data1;
int partition_set_id = detexGetBits64(data0, 2, 7);
uint8_t endpoint[2 * 2 * 3]; // 2 subsets.
endpoint[0] = (uint8_t)detexGetBits64(data0, 8, 13); // red, subset 0, endpoint 0
endpoint[3] = (uint8_t)detexGetBits64(data0, 14, 19); // red, subset 0, endpoint 1
endpoint[6] = (uint8_t)detexGetBits64(data0, 20, 25); // red, subset 1, endpoint 0
endpoint[9] = (uint8_t)detexGetBits64(data0, 26, 31); // red, subset 1, endpoint 1
endpoint[1] = (uint8_t)detexGetBits64(data0, 32, 37); // green, subset 0, endpoint 0
endpoint[4] = (uint8_t)detexGetBits64(data0, 38, 43); // green, subset 0, endpoint 1
endpoint[7] = (uint8_t)detexGetBits64(data0, 44, 49); // green, subset 1, endpoint 0
endpoint[10] = (uint8_t)detexGetBits64(data0, 50, 55); // green, subset 1, endpoint 1
endpoint[2] = (uint8_t)detexGetBits64(data0, 56, 61); // blue, subset 0, endpoint 0
endpoint[5] = (uint8_t)(detexGetBits64(data0, 62, 63) // blue, subset 0, endpoint 1
| (detexGetBits64(data1, 0, 3) << 2));
endpoint[8] = (uint8_t)detexGetBits64(data1, 4, 9); // blue, subset 1, endpoint 0
endpoint[11] = (uint8_t)detexGetBits64(data1, 10, 15); // blue, subset 1, endpoint 1
// Decode endpoints.
for (int i = 0; i < 2 * 2; i++) {
//component-wise left-shift
endpoint[i * 3 + 0] <<= 2;
endpoint[i * 3 + 1] <<= 2;
endpoint[i * 3 + 2] <<= 2;
}
// P-bit is shared.
uint8_t pbit_zero = (uint8_t)detexGetBits64(data1, 16, 16) << 1;
uint8_t pbit_one = (uint8_t)detexGetBits64(data1, 17, 17) << 1;
// RGB only pbits for mode 1, one for each subset.
for (int j = 0; j < 3; j++) {
endpoint[0 * 3 + j] |= pbit_zero;
endpoint[1 * 3 + j] |= pbit_zero;
endpoint[2 * 3 + j] |= pbit_one;
endpoint[3 * 3 + j] |= pbit_one;
}
for (int i = 0; i < 2 * 2; i++) {
// Replicate each component's MSB into the LSB.
endpoint[i * 3 + 0] |= endpoint[i * 3 + 0] >> 7;
endpoint[i * 3 + 1] |= endpoint[i * 3 + 1] >> 7;
endpoint[i * 3 + 2] |= endpoint[i * 3 + 2] >> 7;
}
uint8_t subset_index[16];
for (int i = 0; i < 16; i++)
// subset_index[i] is a number from 0 to 1.
subset_index[i] = detex_bptc_table_P2[partition_set_id * 16 + i];
uint8_t anchor_index[2];
anchor_index[0] = 0;
anchor_index[1] = detex_bptc_table_anchor_index_second_subset[partition_set_id];
uint8_t color_index[16];
// Extract primary index bits.
data1 >>= 18;
for (int i = 0; i < 16; i++)
if (i == anchor_index[subset_index[i]]) {
// Highest bit is zero.
color_index[i] = data1 & 3; // Get two bits.
data1 >>= 2;
}
else {
color_index[i] = data1 & 7; // Get three bits.
data1 >>= 3;
}
uint32_t *pixel32_buffer = (uint32_t *)pixel_buffer;
for (int i = 0; i < 16; i++) {
uint8_t endpoint_start[3];
uint8_t endpoint_end[3];
for (int j = 0; j < 3; j++) {
endpoint_start[j] = endpoint[2 * subset_index[i] * 3 + j];
endpoint_end[j] = endpoint[(2 * subset_index[i] + 1) * 3 + j];
}
uint32_t output;
output = detexPack32R8(Interpolate(endpoint_start[0], endpoint_end[0], color_index[i], 3));
output |= detexPack32G8(Interpolate(endpoint_start[1], endpoint_end[1], color_index[i], 3));
output |= detexPack32B8(Interpolate(endpoint_start[2], endpoint_end[2], color_index[i], 3));
output |= detexPack32A8(0xFF);
pixel32_buffer[i] = output;
}
return true;
}
/* Decompress a 128-bit 4x4 pixel texture block compressed using the BPTC */
/* (BC7) format. */
bool detexDecompressBlockBPTC(const uint8_t * DETEX_RESTRICT bitstring, uint32_t mode_mask,
uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer) {
detexBlock128 block;
block.data0 = *(uint64_t *)&bitstring[0];
block.data1 = *(uint64_t *)&bitstring[8];
block.index = 0;
int mode = ExtractMode(&block);
if (mode == -1)
return 0;
// Allow compression tied to specific modes (according to mode_mask).
if (!(mode_mask & ((int)1 << mode)))
return 0;
if (mode >= 4 && (flags & DETEX_DECOMPRESS_FLAG_OPAQUE_ONLY))
return 0;
if (mode < 4 && (flags & DETEX_DECOMPRESS_FLAG_NON_OPAQUE_ONLY))
return 0;
if (mode == 1)
return DecompressBlockBPTCMode1(&block, pixel_buffer);
int nu_subsets = 1;
int partition_set_id = 0;
if (mode_has_partition_bits[mode]) {
nu_subsets = GetNumberOfSubsets(mode);
partition_set_id = ExtractPartitionSetID(&block, mode);
}
int rotation = ExtractRotationBits(&block, mode);
int index_selection_bit = 0;
if (mode == 4)
index_selection_bit = detexBlock128ExtractBits(&block, 1);
int alpha_index_bitcount = GetAlphaIndexBitcount(mode, index_selection_bit);
int color_index_bitcount = GetColorIndexBitcount(mode, index_selection_bit);
uint8_t endpoint_array[3 * 2 * 4]; // Max. 3 subsets.
ExtractEndpoints(mode, nu_subsets, &block, endpoint_array);
FullyDecodeEndpoints(endpoint_array, nu_subsets, mode, &block);
uint8_t subset_index[16];
for (int i = 0; i < 16; i++)
// subset_index[i] is a number from 0 to 2, or 0 to 1, or 0 depending on the number of subsets.
subset_index[i] = (uint8_t)GetPartitionIndex(nu_subsets, partition_set_id, i);
uint8_t anchor_index[4] = {0,0,0,0}; // Only need max. 3 elements.
for (int i = 0; i < nu_subsets; i++)
anchor_index[i] = (uint8_t)GetAnchorIndex(partition_set_id, i, nu_subsets);
uint8_t color_index[16];
uint8_t alpha_index[16]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
// Extract primary index bits.
uint64_t data1;
if (block.index >= 64) {
// Because the index bits are all in the second 64-bit word, there is no need to use
// block_extract_bits().
// This implies the mode is not 4.
data1 = block.data1 >> (block.index - 64);
uint8_t mask1 = (1 << IB[mode]) - 1;
uint8_t mask2 = (1 << (IB[mode] - 1)) - 1;
for (int i = 0; i < 16; i++)
if (i == anchor_index[subset_index[i]]) {
// Highest bit is zero.
color_index[i] = data1 & mask2;
data1 >>= IB[mode] - 1;
alpha_index[i] = color_index[i];
}
else {
color_index[i] = data1 & mask1;
data1 >>= IB[mode];
alpha_index[i] = color_index[i];
}
}
else { // Implies mode 4.
// Because the bits cross the 64-bit word boundary, we have to be careful.
// Block index is 50 at this point.
uint64_t data = block.data0 >> 50;
data |= block.data1 << 14;
for (int i = 0; i < 16; i++)
if (i == anchor_index[subset_index[i]]) {
// Highest bit is zero.
if (index_selection_bit) { // Implies mode == 4.
alpha_index[i] = data & 0x1;
data >>= 1;
}
else {
color_index[i] = data & 0x1;
data >>= 1;
}
}
else {
if (index_selection_bit) { // Implies mode == 4.
alpha_index[i] = data & 0x3;
data >>= 2;
}
else {
color_index[i] = data & 0x3;
data >>= 2;
}
}
// Block index is 81 at this point.
data1 = block.data1 >> (81 - 64);
}
// Extract secondary index bits.
if (IB2[mode] > 0) {
uint8_t mask1 = (1 << IB2[mode]) - 1;
uint8_t mask2 = (1 << (IB2[mode] - 1)) - 1;
for (int i = 0; i < 16; i++)
if (i == anchor_index[subset_index[i]]) {
// Highest bit is zero.
if (index_selection_bit) {
color_index[i] = data1 & 0x3;
data1 >>= 2;
}
else {
// alpha_index[i] = block_extract_bits(&block, IB2[mode] - 1);
alpha_index[i] = data1 & mask2;
data1 >>= IB2[mode] - 1;
}
}
else {
if (index_selection_bit) {
color_index[i] = data1 & 0x7;
data1 >>= 3;
}
else {
// alpha_index[i] = block_extract_bits(&block, IB2[mode]);
alpha_index[i] = data1 & mask1;
data1 >>= IB2[mode];
}
}
}
uint32_t *pixel32_buffer = (uint32_t *)pixel_buffer;
for (int i = 0; i < 16; i++) {
uint8_t endpoint_start[4];
uint8_t endpoint_end[4];
for (int j = 0; j < 4; j++) {
endpoint_start[j] = endpoint_array[2 * subset_index[i] * 4 + j];
endpoint_end[j] = endpoint_array[(2 * subset_index[i] + 1) * 4 + j];
}
uint32_t output = 0;
output = detexPack32R8(Interpolate((uint8_t)endpoint_start[0], (uint8_t)endpoint_end[0], (uint8_t)color_index[i], (uint8_t)color_index_bitcount));
output |= detexPack32G8(Interpolate((uint8_t)endpoint_start[1], (uint8_t)endpoint_end[1], (uint8_t)color_index[i], (uint8_t)color_index_bitcount));
output |= detexPack32B8(Interpolate((uint8_t)endpoint_start[2], (uint8_t)endpoint_end[2], (uint8_t)color_index[i], (uint8_t)color_index_bitcount));
output |= detexPack32A8(Interpolate((uint8_t)endpoint_start[3], (uint8_t)endpoint_end[3], (uint8_t)alpha_index[i], (uint8_t)alpha_index_bitcount));
if (rotation > 0) {
if (rotation == 1)
output = detexPack32RGBA8(detexPixel32GetA8(output), detexPixel32GetG8(output),
detexPixel32GetB8(output), detexPixel32GetR8(output));
else
if (rotation == 2)
output = detexPack32RGBA8(detexPixel32GetR8(output), detexPixel32GetA8(output),
detexPixel32GetB8(output), detexPixel32GetG8(output));
else // rotation == 3
output = detexPack32RGBA8(detexPixel32GetR8(output), detexPixel32GetG8(output),
detexPixel32GetA8(output), detexPixel32GetB8(output));
}
pixel32_buffer[i] = output;
}
return true;
}
/* Modify compressed block to use specific colors. For later use. */
// Mode 3, 7 color bits.
// Color bits at index: 10
// Color bits end before index: 10 + 4 * 3 * 7 = 94
// printf("bptc_set_block_colors: Colors set for mode 3.\n");
// Mode 5, 7 color bits, 8 alpha bits.
// Color bits at index: 6 + 2 = 8
// Alpha bits at index: 8 + 2 * 3 * 7 = 50
// Alpha bits end before index: 50 + 2 * 8 = 66
// printf("bptc_set_block_colors: Colors set for mode 5.\n");
// Mode 5, 7 color bits, 7 alpha bits.
// Color bits at index 7.
// Alpha bits at index: 7 + 2 * 3 * 7 = 49
// Alpha bits end before index: 49 + 2 * 7 = 63
// printf("bptc_set_block_colors: Colors set for mode 6.\n");
/* Return the internal mode of the BPTC block. */
uint32_t detexGetModeBPTC(const uint8_t *bitstring) {
detexBlock128 block;
block.data0 = *(uint64_t *)&bitstring[0];
block.data1 = *(uint64_t *)&bitstring[8];
block.index = 0;
int mode = ExtractMode(&block);
return mode;
}

View File

@@ -1,27 +0,0 @@
/*
Copyright (c) 2015 Harm Hanemaaijer <fgenfb@yahoo.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#pragma once
#include "detex_common.h"
#ifdef __cplusplus
extern "C" {
#endif
bool detexDecompressBlockBPTC(const uint8_t * DETEX_RESTRICT bitstring, uint32_t mode_mask,
uint32_t flags, uint8_t * DETEX_RESTRICT pixel_buffer);
#ifdef __cplusplus
}
#endif

View File

@@ -1,96 +0,0 @@
/*
Copyright (c) 2015 Harm Hanemaaijer <fgenfb@yahoo.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "decompress_eac.h"
#define DETEX_PIXEL32_ALPHA_BYTE_OFFSET 0
static DETEX_INLINE_ONLY uint8_t detexClamp0To255(int x) {
int i = x;
if (i < 0)
i = 0;
else if (i > 255)
i = 255;
return (uint8_t)i;
}
const int8_t eac_modifier_table[16][8] = {
{ -3, -6, -9, -15, 2, 5, 8, 14 },
{ -3, -7, -10, -13, 2, 6, 9, 12 },
{ -2, -5, -8, -13, 1, 4, 7, 12 },
{ -2, -4, -6, -13, 1, 3, 5, 12 },
{ -3, -6, -8, -12, 2, 5, 7, 11 },
{ -3, -7, -9, -11, 2, 6, 8, 10 },
{ -4, -7, -8, -11, 3, 6, 7, 10 },
{ -3, -5, -8, -11, 2, 4, 7, 10 },
{ -2, -6, -8, -10, 1, 5, 7, 9 },
{ -2, -5, -8, -10, 1, 4, 7, 9 },
{ -2, -4, -8, -10, 1, 3, 7, 9 },
{ -2, -5, -7, -10, 1, 4, 6, 9 },
{ -3, -4, -7, -10, 2, 3, 6, 9 },
{ -1, -2, -3, -10, 0, 1, 2, 9 },
{ -4, -6, -8, -9, 3, 5, 7, 8 },
{ -3, -5, -7, -9, 2, 4, 6, 8 }
};
static DETEX_INLINE_ONLY int modifier_times_multiplier(int modifier, int multiplier)
{
return modifier * multiplier;
}
static DETEX_INLINE_ONLY void ProcessPixelEAC(uint8_t i, uint64_t pixels,
const int8_t * DETEX_RESTRICT modifier_table, int base_codeword, int multiplier,
uint8_t * DETEX_RESTRICT pixel_buffer, int bytes_per_pixel)
{
int modifier = modifier_table[(pixels >> (45 - i * 3)) & 7];
pixel_buffer[((i & 3) * 4 + ((i & 12) >> 2)) * bytes_per_pixel + DETEX_PIXEL32_ALPHA_BYTE_OFFSET] =
detexClamp0To255(base_codeword + modifier_times_multiplier(modifier, multiplier));
}
/* Decompress a 128-bit 4x4 pixel texture block compressed using the ETC2_EAC */
/* format. */
bool detexDecompressBlockETC2_EAC(const uint8_t * DETEX_RESTRICT bitstring, uint8_t * DETEX_RESTRICT pixel_buffer, int bytes_per_pixel)
{
//bool r = detexDecompressBlockETC2(&bitstring[8], mode_mask, flags, pixel_buffer);
//if (!r)
// return false;
// Decode the alpha part.
int base_codeword = bitstring[0];
const int8_t *modifier_table = eac_modifier_table[(bitstring[1] & 0x0F)];
int multiplier = (bitstring[1] & 0xF0) >> 4;
uint64_t pixels = ((uint64_t)bitstring[2] << 40) | ((uint64_t)bitstring[3] << 32) |
((uint64_t)bitstring[4] << 24)
| ((uint64_t)bitstring[5] << 16) | ((uint64_t)bitstring[6] << 8) | bitstring[7];
ProcessPixelEAC(0, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(1, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(2, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(3, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(4, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(5, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(6, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(7, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(8, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(9, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(10, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(11, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(12, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(13, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(14, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
ProcessPixelEAC(15, pixels, modifier_table, base_codeword, multiplier, pixel_buffer, bytes_per_pixel);
return true;
}

View File

@@ -1,37 +0,0 @@
/*
Copyright (c) 2015 Harm Hanemaaijer <fgenfb@yahoo.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#pragma once
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include "detex_common.h"
#ifdef __cplusplus
extern "C"
{
#endif
#ifndef DETEX_RESTRICT
#define DETEX_RESTRICT __restrict
#endif
extern const int8_t eac_modifier_table[16][8];
bool detexDecompressBlockETC2_EAC(const uint8_t * DETEX_RESTRICT bitstring, uint8_t * DETEX_RESTRICT pixel_buffer, int bytes_per_pixel);
#ifdef __cplusplus
}
#endif

View File

@@ -1,33 +0,0 @@
#pragma once
#ifdef _MSC_VER
#ifndef DETEX_INLINE_ONLY
#define DETEX_INLINE_ONLY __forceinline
#endif
#ifndef DETEX_RESTRICT
#define DETEX_RESTRICT __restrict
#endif
#else
#ifndef DETEX_INLINE_ONLY
#define DETEX_INLINE_ONLY
#endif
#ifndef DETEX_RESTRICT
#define DETEX_RESTRICT
#endif
#endif
enum {
/* Function returns false (invalid block) when the compressed block */
/* is in a format not allowed to be generated by an encoder. */
DETEX_DECOMPRESS_FLAG_ENCODE = 0x1,
/* For compression formats that have opaque and non-opaque modes, */
/* return false (invalid block) when the compressed block is encoded */
/* using a non-opaque mode. */
DETEX_DECOMPRESS_FLAG_OPAQUE_ONLY = 0x2,
/* For compression formats that have opaque and non-opaque modes, */
/* return false (invalid block) when the compressed block is encoded */
/* using an opaque mode. */
DETEX_DECOMPRESS_FLAG_NON_OPAQUE_ONLY = 0x4,
};

View File

@@ -947,153 +947,6 @@ namespace basist
uint16_t m_err;
};
#if BASISD_WRITE_NEW_DXT1_TABLES
static void create_etc1_to_dxt1_5_conversion_table()
{
FILE* pFile = fopen("basisu_decoder_tables_dxt1_5.inc", "w");
uint32_t n = 0;
for (int inten = 0; inten < 8; inten++)
{
for (uint32_t g = 0; g < 32; g++)
{
color32 block_colors[4];
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
{
const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
{
uint32_t best_lo = 0;
uint32_t best_hi = 0;
uint64_t best_err = UINT64_MAX;
for (uint32_t hi = 0; hi <= 31; hi++)
{
for (uint32_t lo = 0; lo <= 31; lo++)
{
uint32_t colors[4];
colors[0] = (lo << 3) | (lo >> 2);
colors[3] = (hi << 3) | (hi >> 2);
colors[1] = (colors[0] * 2 + colors[3]) / 3;
colors[2] = (colors[3] * 2 + colors[0]) / 3;
uint64_t total_err = 0;
for (uint32_t s = low_selector; s <= high_selector; s++)
{
int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
total_err += err * err;
}
if (total_err < best_err)
{
best_err = total_err;
best_lo = lo;
best_hi = hi;
}
}
}
assert(best_err <= 0xFFFF);
//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_err);
n++;
if ((n & 31) == 31)
fprintf(pFile, "\n");
} // m
} // sr
} // g
} // inten
fclose(pFile);
}
static void create_etc1_to_dxt1_6_conversion_table()
{
FILE* pFile = fopen("basisu_decoder_tables_dxt1_6.inc", "w");
uint32_t n = 0;
for (int inten = 0; inten < 8; inten++)
{
for (uint32_t g = 0; g < 32; g++)
{
color32 block_colors[4];
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
{
const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
{
uint32_t best_lo = 0;
uint32_t best_hi = 0;
uint64_t best_err = UINT64_MAX;
for (uint32_t hi = 0; hi <= 63; hi++)
{
for (uint32_t lo = 0; lo <= 63; lo++)
{
uint32_t colors[4];
colors[0] = (lo << 2) | (lo >> 4);
colors[3] = (hi << 2) | (hi >> 4);
colors[1] = (colors[0] * 2 + colors[3]) / 3;
colors[2] = (colors[3] * 2 + colors[0]) / 3;
uint64_t total_err = 0;
for (uint32_t s = low_selector; s <= high_selector; s++)
{
int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
total_err += err * err;
}
if (total_err < best_err)
{
best_err = total_err;
best_lo = lo;
best_hi = hi;
}
}
}
assert(best_err <= 0xFFFF);
//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_err);
n++;
if ((n & 31) == 31)
fprintf(pFile, "\n");
} // m
} // sr
} // g
} // inten
fclose(pFile);
}
#endif
#if BASISD_SUPPORT_DXT1
static dxt_selector_range g_etc1_to_dxt1_selector_ranges[] =
{
@@ -1138,32 +991,223 @@ namespace basist
#include "basisu_transcoder_tables_dxt1_5.inc"
};
// The idea for optimal BC1 single-color block encoding was first from ryg_dxt's real-time DXT1 encoder
static uint8_t g_bc1_match5[256][2], g_bc1_match6[256][2];
static void prepare_bc1_single_color_table(uint8_t *pTable, const uint8_t *pExpand, int size)
// First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt.
struct bc1_match_entry
{
uint8_t m_hi;
uint8_t m_lo;
};
static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; // selector 1, allow equals hi/lo
static void prepare_bc1_single_color_table(bc1_match_entry *pTable, const uint8_t *pExpand, int size, int sel, bool allow_equals)
{
int total_e = 0;
for (int i = 0; i < 256; i++)
{
int lowest_e = 256;
for (int min = 0; min < size; min++)
for (int lo = 0; lo < size; lo++)
{
for (int max = 0; max < size; max++)
for (int hi = 0; hi < size; hi++)
{
int min_e = pExpand[min], max_e = pExpand[max];
int e = abs(((max_e * 2 + min_e) / 3) - i) + ((abs(max_e - min_e) >> 5));
const int lo_e = pExpand[lo], hi_e = pExpand[hi];
int e;
if (!allow_equals)
{
if (lo == hi)
continue;
}
if (sel == 1)
{
// Selector 1
e = abs(((hi_e * 2 + lo_e) / 3) - i) + ((abs(hi_e - lo_e) >> 5));
}
else
{
assert(sel == 0);
// Selector 0
e = abs(hi_e - i) + ((abs(hi_e - lo_e) >> 5));
}
if (e < lowest_e)
{
pTable[i * 2 + 0] = static_cast<uint8_t>(max);
pTable[i * 2 + 1] = static_cast<uint8_t>(min);
pTable[i].m_hi = static_cast<uint8_t>(hi);
pTable[i].m_lo = static_cast<uint8_t>(lo);
lowest_e = e;
}
}
} // hi
} // lo
if (!allow_equals)
{
assert(pTable[i].m_lo != pTable[i].m_hi);
}
total_e += lowest_e;
}
}
#endif // BASISD_SUPPORT_DXT1
#if BASISD_WRITE_NEW_DXT1_TABLES
static void create_etc1_to_dxt1_5_conversion_table()
{
FILE* pFile = nullptr;
fopen_s(&pFile, "basisu_transcoder_tables_dxt1_5.inc", "w");
uint32_t n = 0;
for (int inten = 0; inten < 8; inten++)
{
for (uint32_t g = 0; g < 32; g++)
{
color32 block_colors[4];
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
{
const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
{
uint32_t best_lo = 0;
uint32_t best_hi = 0;
uint64_t best_err = UINT64_MAX;
for (uint32_t hi = 0; hi <= 31; hi++)
{
for (uint32_t lo = 0; lo <= 31; lo++)
{
//if (lo == hi) continue;
uint32_t colors[4];
colors[0] = (lo << 3) | (lo >> 2);
colors[3] = (hi << 3) | (hi >> 2);
colors[1] = (colors[0] * 2 + colors[3]) / 3;
colors[2] = (colors[3] * 2 + colors[0]) / 3;
uint64_t total_err = 0;
for (uint32_t s = low_selector; s <= high_selector; s++)
{
int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
total_err += err * err;
}
if (total_err < best_err)
{
best_err = total_err;
best_lo = lo;
best_hi = hi;
}
}
}
assert(best_err <= 0xFFFF);
//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
//assert(best_lo != best_hi);
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
n++;
if ((n & 31) == 31)
fprintf(pFile, "\n");
} // m
} // sr
} // g
} // inten
fclose(pFile);
}
static void create_etc1_to_dxt1_6_conversion_table()
{
FILE* pFile = nullptr;
fopen_s(&pFile, "basisu_transcoder_tables_dxt1_6.inc", "w");
uint32_t n = 0;
for (int inten = 0; inten < 8; inten++)
{
for (uint32_t g = 0; g < 32; g++)
{
color32 block_colors[4];
decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten);
for (uint32_t sr = 0; sr < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; sr++)
{
const uint32_t low_selector = g_etc1_to_dxt1_selector_ranges[sr].m_low;
const uint32_t high_selector = g_etc1_to_dxt1_selector_ranges[sr].m_high;
for (uint32_t m = 0; m < NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS; m++)
{
uint32_t best_lo = 0;
uint32_t best_hi = 0;
uint64_t best_err = UINT64_MAX;
for (uint32_t hi = 0; hi <= 63; hi++)
{
for (uint32_t lo = 0; lo <= 63; lo++)
{
//if (lo == hi) continue;
uint32_t colors[4];
colors[0] = (lo << 2) | (lo >> 4);
colors[3] = (hi << 2) | (hi >> 4);
colors[1] = (colors[0] * 2 + colors[3]) / 3;
colors[2] = (colors[3] * 2 + colors[0]) / 3;
uint64_t total_err = 0;
for (uint32_t s = low_selector; s <= high_selector; s++)
{
int err = block_colors[s].g - colors[g_etc1_to_dxt1_selector_mappings[m][s]];
total_err += err * err;
}
if (total_err < best_err)
{
best_err = total_err;
best_lo = lo;
best_hi = hi;
}
}
}
assert(best_err <= 0xFFFF);
//table[g + inten * 32].m_solutions[sr][m].m_lo = static_cast<uint8_t>(best_lo);
//table[g + inten * 32].m_solutions[sr][m].m_hi = static_cast<uint8_t>(best_hi);
//table[g + inten * 32].m_solutions[sr][m].m_err = static_cast<uint16_t>(best_err);
//assert(best_lo != best_hi);
fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, (uint32_t)best_err);
n++;
if ((n & 31) == 31)
fprintf(pFile, "\n");
} // m
} // sr
} // g
} // inten
fclose(pFile);
}
#endif
#if BASISD_SUPPORT_ETC2_EAC_A8
enum
{
@@ -1693,13 +1737,13 @@ namespace basist
uint8_t bc1_expand5[32];
for (int i = 0; i < 32; i++)
bc1_expand5[i] = static_cast<uint8_t>((i << 3) | (i >> 2));
prepare_bc1_single_color_table(&g_bc1_match5[0][0], bc1_expand5, 32);
prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, 1, true);
uint8_t bc1_expand6[64];
for (int i = 0; i < 64; i++)
bc1_expand6[i] = static_cast<uint8_t>((i << 2) | (i >> 4));
prepare_bc1_single_color_table(&g_bc1_match6[0][0], bc1_expand6, 64);
prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, 1, true);
for (uint32_t i = 0; i < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; i++)
{
uint32_t l = g_etc1_to_dxt1_selector_ranges[i].m_low;
@@ -1759,7 +1803,7 @@ namespace basist
}
#if BASISD_SUPPORT_DXT1
static void convert_etc1s_to_dxt1(dxt1_block * pDst_block, const decoder_etc_block * pSrc_block, const selector * pSelector)
static void convert_etc1s_to_dxt1(dxt1_block * pDst_block, const decoder_etc_block *pSrc_block, const selector * pSelector, bool use_threecolor_blocks)
{
#if !BASISD_WRITE_NEW_DXT1_TABLES
const uint32_t low_selector = pSelector->m_lo_selector;
@@ -1779,30 +1823,53 @@ namespace basist
const uint32_t b = block_colors[low_selector].b;
uint32_t mask = 0xAA;
uint32_t max16 = (g_bc1_match5[r][0] << 11) | (g_bc1_match6[g][0] << 5) | g_bc1_match5[b][0];
uint32_t min16 = (g_bc1_match5[r][1] << 11) | (g_bc1_match6[g][1] << 5) | g_bc1_match5[b][1];
uint32_t max16 = (g_bc1_match5_equals_1[r].m_hi << 11) | (g_bc1_match6_equals_1[g].m_hi << 5) | g_bc1_match5_equals_1[b].m_hi;
uint32_t min16 = (g_bc1_match5_equals_1[r].m_lo << 11) | (g_bc1_match6_equals_1[g].m_lo << 5) | g_bc1_match5_equals_1[b].m_lo;
if ((!use_threecolor_blocks) && (min16 == max16))
{
// This is an annoying edge case that impacts BC3.
// This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's.
mask = 0;
// Make l > h
if (min16 > 0)
min16--;
else
{
// l = h = 0
assert(min16 == max16 && max16 == 0);
max16 = 1;
min16 = 0;
mask = 0x55;
}
assert(max16 > min16);
}
if (max16 < min16)
{
std::swap(max16, min16);
mask ^= 0x55;
}
pDst_block->set_low_color(static_cast<uint16_t>(max16));
pDst_block->set_high_color(static_cast<uint16_t>(min16));
pDst_block->m_selectors[0] = static_cast<uint8_t>(mask);
pDst_block->m_selectors[1] = static_cast<uint8_t>(mask);
pDst_block->m_selectors[2] = static_cast<uint8_t>(mask);
pDst_block->m_selectors[3] = static_cast<uint8_t>(mask);
return;
}
const uint32_t selector_range_table = g_etc1_to_dxt1_selector_range_index[low_selector][high_selector];
//[32][8][RANGES][MAPPING]
const etc1_to_dxt1_56_solution* pTable_r = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
const etc1_to_dxt1_56_solution * pTable_g = &g_etc1_to_dxt_6[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
const etc1_to_dxt1_56_solution * pTable_b = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
const etc1_to_dxt1_56_solution *pTable_r = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.r) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
const etc1_to_dxt1_56_solution *pTable_g = &g_etc1_to_dxt_6[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
const etc1_to_dxt1_56_solution *pTable_b = &g_etc1_to_dxt_5[(inten_table * 32 + base_color.b) * (NUM_ETC1_TO_DXT1_SELECTOR_RANGES * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS];
uint32_t best_err = UINT_MAX;
uint32_t best_mapping = 0;
@@ -1829,10 +1896,34 @@ namespace basist
if (l == h)
{
pDst_block->m_selectors[0] = 0;
pDst_block->m_selectors[1] = 0;
pDst_block->m_selectors[2] = 0;
pDst_block->m_selectors[3] = 0;
uint8_t mask = 0;
if (!use_threecolor_blocks)
{
// This is an annoying edge case that impacts BC3.
// Make l > h
if (h > 0)
h--;
else
{
// l = h = 0
assert(l == h && h == 0);
h = 0;
l = 1;
mask = 0x55;
}
assert(l > h);
pDst_block->set_low_color(static_cast<uint16_t>(l));
pDst_block->set_high_color(static_cast<uint16_t>(h));
}
pDst_block->m_selectors[0] = mask;
pDst_block->m_selectors[1] = mask;
pDst_block->m_selectors[2] = mask;
pDst_block->m_selectors[3] = mask;
return;
}
@@ -3402,7 +3493,8 @@ namespace basist
return true;
}
bool basisu_lowlevel_transcoder::transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, uint32_t output_stride, bool pvrtc_wrap_addressing)
bool basisu_lowlevel_transcoder::transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt,
uint32_t output_stride, bool pvrtc_wrap_addressing, bool bc1_allow_threecolor_blocks)
{
const uint32_t num_macroblocks_x = (num_blocks_x + 1) >> 1;
const uint32_t num_macroblocks_y = (num_blocks_y + 1) >> 1;
@@ -3598,7 +3690,7 @@ namespace basist
void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (bx + by * num_blocks_x) * output_stride;
#if BASISD_SUPPORT_DXT1
convert_etc1s_to_dxt1(static_cast<dxt1_block*>(pDst_block), &block, pSelector);
convert_etc1s_to_dxt1(static_cast<dxt1_block*>(pDst_block), &block, pSelector, bc1_allow_threecolor_blocks);
#else
assert(0);
#endif
@@ -3931,7 +4023,8 @@ namespace basist
return true;
}
bool basisu_transcoder::transcode_slice(const void *pData, uint32_t data_size, uint32_t slice_index, void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks, block_format fmt, uint32_t output_stride, uint32_t decode_flags) const
bool basisu_transcoder::transcode_slice(const void *pData, uint32_t data_size, uint32_t slice_index, void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks, block_format fmt,
uint32_t output_stride, uint32_t decode_flags) const
{
if (decode_flags & cDecodeFlagsPVRTCDecodeToNextPow2)
{
@@ -3969,7 +4062,7 @@ namespace basist
return m_lowlevel_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y,
pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size,
fmt, output_stride, (decode_flags & cDecodeFlagsPVRTCWrapAddressing) != 0);
fmt, output_stride, (decode_flags & cDecodeFlagsPVRTCWrapAddressing) != 0, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0);
}
int basisu_transcoder::find_first_slice_index(const void *pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const
@@ -4264,8 +4357,8 @@ namespace basist
if (status)
{
// Now decode the color data
status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cBC1, 16, decode_flags);
// Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3.
status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks, cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks);
}
break;
}

View File

@@ -63,7 +63,7 @@ namespace basist
bool decode_tables(const uint8_t *pTable_data, uint32_t table_data_size);
bool transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, uint32_t output_stride, bool wrap_addressing);
bool transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, uint32_t output_stride, bool wrap_addressing, bool bc1_allow_threecolor_blocks);
private:
struct endpoint
@@ -212,12 +212,15 @@ namespace basist
{
// PVRTC1: texture will use wrap addressing vs. clamp (most PVRTC viewer tools assume wrap addressing, so we default to wrap although that can cause edge artifacts)
cDecodeFlagsPVRTCWrapAddressing = 1,
// PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2.
cDecodeFlagsPVRTCDecodeToNextPow2 = 2,
// When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format
cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4
cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4,
// Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet).
cDecodeFlagsBC1ForbidThreeColorBlocks = 8
};
// transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats.