diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..b13fa73
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,88 @@
+project(basisu)
+
+cmake_minimum_required(VERSION 3.0)
+option(BUILD_X64 "build 64-bit" TRUE)
+option(STATIC "static linking" FALSE)
+
+message("Initial BUILD_X64=${BUILD_X64}")
+message("Initial CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}")
+
+if( NOT CMAKE_BUILD_TYPE )
+ set( CMAKE_BUILD_TYPE Release )
+endif( NOT CMAKE_BUILD_TYPE )
+
+message( ${PROJECT_NAME} " build type: " ${CMAKE_BUILD_TYPE} )
+
+if (BUILD_X64)
+ message("Building 64-bit")
+else()
+ message("Building 32-bit")
+endif(BUILD_X64)
+
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g")
+
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
+
+set(CMAKE_CXX_FLAGS -std=c++11)
+set(GCC_COMPILE_FLAGS "-fvisibility=hidden -fvisibility-inlines-hidden -fPIC -fopenmp -fno-strict-aliasing -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 -Wall -Wextra -Wno-unused-local-typedefs -Wno-unused-value -Wno-unused-parameter -Wno-unused-but-set-variable -Wno-unused-variable -Wno-reorder")
+
+if (NOT BUILD_X64)
+ set(GCC_COMPILE_FLAGS "${GCC_COMPILE_FLAGS} -m32")
+endif()
+
+if (STATIC)
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -static-libgcc -static-libstdc++ -static")
+else()
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -Wl,-rpath .")
+endif(STATIC)
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GCC_COMPILE_FLAGS}")
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${GCC_COMPILE_FLAGS}")
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${GCC_COMPILE_FLAGS} -D_DEBUG")
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_COMPILE_FLAGS}")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${GCC_COMPILE_FLAGS}")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${GCC_COMPILE_FLAGS} -D_DEBUG")
+
+set(BASISU_SRC_LIST ${COMMON_SRC_LIST}
+ basisu_backend.cpp
+ basisu_basis_file.cpp
+ basisu_comp.cpp
+ basisu_enc.cpp
+ basisu_etc.cpp
+ basisu_frontend.cpp
+ basisu_global_selector_palette_helpers.cpp
+ basisu_gpu_texture.cpp
+ basisu_pvrtc1_4.cpp
+ basisu_resampler.cpp
+ basisu_resample_filters.cpp
+ basisu_ssim.cpp
+ basisu_tool.cpp
+ lodepng.cpp
+ detex/decompress_bc.c
+ detex/decompress_bc7.c
+ detex/decompress_eac.c
+ transcoder/basisu_transcoder.cpp
+ )
+
+if (APPLE)
+ set(BIN_DIRECTORY "bin_osx")
+else()
+ set(BIN_DIRECTORY "bin")
+endif()
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/${BIN_DIRECTORY})
+
+add_executable(basisu ${BASISU_SRC_LIST})
+target_link_libraries(basisu m pthread)
+
+if (CMAKE_BUILD_TYPE STREQUAL Release)
+ if (APPLE)
+ add_custom_command(TARGET basisu POST_BUILD COMMAND strip -X -x ${CMAKE_SOURCE_DIR}/${BIN_DIRECTORY}/basisu)
+ else()
+ add_custom_command(TARGET basisu POST_BUILD COMMAND strip -g -X -x ${CMAKE_SOURCE_DIR}/${BIN_DIRECTORY}/basisu)
+ endif()
+endif()
+
diff --git a/basisu.sln b/basisu.sln
new file mode 100644
index 0000000..dda6ffb
--- /dev/null
+++ b/basisu.sln
@@ -0,0 +1,31 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 16
+VisualStudioVersion = 16.0.28803.202
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "basisu", "basisu.vcxproj", "{59586A07-8E7E-411D-BC3D-387E039AA423}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|x64 = Debug|x64
+ Debug|x86 = Debug|x86
+ Release|x64 = Release|x64
+ Release|x86 = Release|x86
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {59586A07-8E7E-411D-BC3D-387E039AA423}.Debug|x64.ActiveCfg = Debug|x64
+ {59586A07-8E7E-411D-BC3D-387E039AA423}.Debug|x64.Build.0 = Debug|x64
+ {59586A07-8E7E-411D-BC3D-387E039AA423}.Debug|x86.ActiveCfg = Debug|Win32
+ {59586A07-8E7E-411D-BC3D-387E039AA423}.Debug|x86.Build.0 = Debug|Win32
+ {59586A07-8E7E-411D-BC3D-387E039AA423}.Release|x64.ActiveCfg = Release|x64
+ {59586A07-8E7E-411D-BC3D-387E039AA423}.Release|x64.Build.0 = Release|x64
+ {59586A07-8E7E-411D-BC3D-387E039AA423}.Release|x86.ActiveCfg = Release|Win32
+ {59586A07-8E7E-411D-BC3D-387E039AA423}.Release|x86.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ SolutionGuid = {5E583429-7830-4B3A-9DDE-F01B115CE0D8}
+ EndGlobalSection
+EndGlobal
diff --git a/basisu.vcxproj b/basisu.vcxproj
new file mode 100644
index 0000000..b3a0b32
--- /dev/null
+++ b/basisu.vcxproj
@@ -0,0 +1,208 @@
+
+
+
+
+ Debug
+ Win32
+
+
+ Release
+ Win32
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ {59586A07-8E7E-411D-BC3D-387E039AA423}
+ basisu
+ 10.0
+
+
+
+ Application
+ true
+ v142
+ MultiByte
+
+
+ Application
+ false
+ v142
+ true
+ MultiByte
+
+
+ Application
+ true
+ v142
+ MultiByte
+
+
+ Application
+ false
+ v142
+ true
+ MultiByte
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ $(SolutionDir)\bin
+
+
+ $(SolutionDir)\bin
+
+
+ $(SolutionDir)\bin
+
+
+ $(SolutionDir)\bin
+
+
+
+ Level4
+ Disabled
+ true
+ false
+ true
+
+
+
+
+ Console
+
+
+
+
+ Level4
+ Disabled
+ true
+ false
+ true
+
+
+ true
+
+
+ Console
+
+
+
+
+ Level4
+ MaxSpeed
+ true
+ true
+ true
+ false
+ true
+
+
+ NDEBUG;_MBCS;%(PreprocessorDefinitions)
+ false
+ AnySuitable
+
+
+ true
+ true
+ Console
+
+
+
+
+ Level4
+ MaxSpeed
+ true
+ true
+ true
+ false
+ true
+
+
+ NDEBUG;_MBCS;%(PreprocessorDefinitions)
+ false
+ true
+ AnySuitable
+
+
+ true
+ true
+ Console
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/basisu.vcxproj.filters b/basisu.vcxproj.filters
new file mode 100644
index 0000000..c023f4b
--- /dev/null
+++ b/basisu.vcxproj.filters
@@ -0,0 +1,89 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ detex
+
+
+ detex
+
+
+ detex
+
+
+ transcoder
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ detex
+
+
+ detex
+
+
+ detex
+
+
+ detex
+
+
+ transcoder
+
+
+ transcoder
+
+
+ transcoder
+
+
+ transcoder
+
+
+
+
+ transcoder
+
+
+ transcoder
+
+
+ transcoder
+
+
+
+
+ {7a54aaad-1d10-4bdf-b8e9-c14ed2263ed8}
+
+
+ {977e9455-f354-422a-b698-08778483328c}
+
+
+
\ No newline at end of file
diff --git a/basisu_backend.cpp b/basisu_backend.cpp
new file mode 100644
index 0000000..36b6f18
--- /dev/null
+++ b/basisu_backend.cpp
@@ -0,0 +1,1424 @@
+// basisu_backend.cpp
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// TODO: This code originally supported full ETC1 and ETC1S, so there's some legacy stuff in here.
+//
+#include "basisu_backend.h"
+
+#define DISABLE_CODEBOOK_REORDERING (0)
+#define BASISU_BACKEND_VERIFY(c) verify(c, __LINE__);
+
+namespace basisu
+{
+ const uint32_t TOTAL_MACROBLOCK_DIFF_BITS = 4;
+ const uint32_t TOTAL_MACROBLOCK_FLIP_BITS = 4;
+
+ // TODO
+ static void verify(bool condition, int line)
+ {
+ if (!condition)
+ {
+ fprintf(stderr, "basisu_backend: verify() failed at line %i!\n", line);
+ abort();
+ }
+ }
+
+ basisu_backend::basisu_backend()
+ {
+ clear();
+ }
+
+ void basisu_backend::clear()
+ {
+ m_pFront_end = NULL;
+ m_params.clear();
+ m_output.clear();
+ }
+
+ void basisu_backend::init(basisu_frontend *pFront_end, basisu_backend_params ¶ms, const basisu_backend_slice_desc_vec &slice_descs, const basist::etc1_global_selector_codebook *pGlobal_sel_codebook)
+ {
+ m_pFront_end = pFront_end;
+ m_params = params;
+ m_slices = slice_descs;
+ m_pGlobal_sel_codebook = pGlobal_sel_codebook;
+
+ debug_printf("basisu_backend::Init: Slices: %u, ETC1S: %u, DeltaSelectorRDOQualityThresh: %f, UseGlobalSelCodebook: %u, GlobalSelCodebookPalBits: %u, GlobalSelCodebookModBits: %u, Use hybrid selector codebooks: %u\n",
+ m_slices.size(),
+ params.m_etc1s,
+ params.m_delta_selector_rdo_quality_thresh,
+ params.m_use_global_sel_codebook,
+ params.m_global_sel_codebook_pal_bits,
+ params.m_global_sel_codebook_mod_bits,
+ params.m_use_hybrid_sel_codebooks);
+
+ for (uint32_t i = 0; i < m_slices.size(); i++)
+ {
+ debug_printf("Slice: %u, OrigWidth: %u, OrigHeight: %u, Width: %u, Height: %u, NumBlocksX: %u, NumBlocksY: %u, NumMacroBlocksX: %u, NumMacroBlocksY: %u, FirstBlockIndex: %u\n",
+ i,
+ m_slices[i].m_orig_width, m_slices[i].m_orig_height,
+ m_slices[i].m_width, m_slices[i].m_height,
+ m_slices[i].m_num_blocks_x, m_slices[i].m_num_blocks_y,
+ m_slices[i].m_num_macroblocks_x, m_slices[i].m_num_macroblocks_y,
+ m_slices[i].m_first_block_index);
+ }
+ }
+
+ void basisu_backend::create_endpoint_palette()
+ {
+ const basisu_frontend &r = *m_pFront_end;
+
+ m_endpoint_palette.resize(r.get_total_endpoint_clusters());
+ for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++)
+ {
+ etc1_endpoint_palette_entry &e = m_endpoint_palette[i];
+
+ e.m_color5_valid = r.get_endpoint_cluster_color_is_used(i, false);
+ if (e.m_color5_valid)
+ {
+ e.m_color5 = r.get_endpoint_cluster_unscaled_color(i, false);
+ e.m_inten5 = r.get_endpoint_cluster_inten_table(i, false);
+ }
+ else
+ {
+ BASISU_BACKEND_VERIFY(false);
+ }
+ }
+ }
+
+ void basisu_backend::create_selector_palette()
+ {
+ const basisu_frontend &r = *m_pFront_end;
+
+ m_selector_palette.resize(r.get_total_selector_clusters());
+
+ if (m_params.m_use_global_sel_codebook)
+ {
+ m_global_selector_palette_desc.resize(r.get_total_selector_clusters());
+
+ for (int i = 0; i < static_cast(r.get_total_selector_clusters()); i++)
+ {
+ basist::etc1_selector_palette_entry &selector_pal_entry = m_selector_palette[i];
+
+ etc1_global_selector_cb_entry_desc &pal_entry_desc = m_global_selector_palette_desc[i];
+ pal_entry_desc.m_pal_index = r.get_selector_cluster_global_selector_entry_ids()[i].m_palette_index;
+ pal_entry_desc.m_mod_index = r.get_selector_cluster_global_selector_entry_ids()[i].m_modifier.get_index();
+
+ pal_entry_desc.m_was_used = true;
+ if (m_params.m_use_hybrid_sel_codebooks)
+ pal_entry_desc.m_was_used = r.get_selector_cluster_uses_global_cb_vec()[i];
+
+ if (pal_entry_desc.m_was_used)
+ {
+ const etc_block &selector_bits = r.get_selector_cluster_selector_bits(i);
+ (void)selector_bits;
+
+ basist::etc1_selector_palette_entry global_pal_entry(m_pGlobal_sel_codebook->get_entry(r.get_selector_cluster_global_selector_entry_ids()[i]));
+
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ selector_pal_entry(x, y) = global_pal_entry(x, y);
+
+ assert(selector_bits.get_selector(x, y) == global_pal_entry(x, y));
+ }
+ }
+ }
+ else
+ {
+ const etc_block &selector_bits = r.get_selector_cluster_selector_bits(i);
+
+ for (uint32_t y = 0; y < 4; y++)
+ for (uint32_t x = 0; x < 4; x++)
+ selector_pal_entry[y * 4 + x] = static_cast(selector_bits.get_selector(x, y));
+ }
+ }
+ }
+ else
+ {
+ for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)
+ {
+ basist::etc1_selector_palette_entry &s = m_selector_palette[i];
+
+ const etc_block &selector_bits = r.get_selector_cluster_selector_bits(i);
+
+ for (uint32_t y = 0; y < 4; y++)
+ {
+ for (uint32_t x = 0; x < 4; x++)
+ {
+ s[y * 4 + x] = static_cast(selector_bits.get_selector(x, y));
+ }
+ }
+ }
+ }
+ }
+
+ // endpoint palette
+ // 5:5:5 and predicted 4:4:4 colors, 1 or 2 3-bit intensity table indices
+ // selector palette
+ // 4x4 2-bit selectors
+
+ // per-macroblock:
+ // 4 diff bits
+ // 4 flip bits
+ // Endpoint template index, 1-8 endpoint indices
+ // Alternately, if no template applies, we can send 4 ETC1S bits followed by 4-8 endpoint indices
+ // 4 selector indices
+
+ float basisu_backend::selector_zeng_similarity_func(uint32_t index_a, uint32_t index_b, void *pContext)
+ {
+ basisu_backend& backend = *static_cast(pContext);
+
+ const basist::etc1_selector_palette_entry &a = backend.m_selector_palette[index_a];
+ const basist::etc1_selector_palette_entry &b = backend.m_selector_palette[index_b];
+
+ float total = static_cast(a.calc_hamming_dist(b));
+
+ float weight = 1.0f - clamp(total * (1.0f / 32.0f), 0.0f, 1.0f);
+ return weight;
+ }
+
+ void basisu_backend::create_macroblocks()
+ {
+ const basisu_frontend &r = *m_pFront_end;
+
+ m_slice_macroblocks.resize(m_slices.size());
+
+ uint_vec all_endpoint_indices;
+ uint_vec all_selector_indices;
+
+ uint32_t total_template_exceptions = 0;
+
+ for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
+ {
+ const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
+
+ const uint32_t width = m_slices[slice_index].m_width;
+ const uint32_t height = m_slices[slice_index].m_height;
+ const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
+ const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
+
+ const uint32_t num_macroblocks_x = m_slices[slice_index].m_num_macroblocks_x;
+ const uint32_t num_macroblocks_y = m_slices[slice_index].m_num_macroblocks_y;
+
+ m_slice_macroblocks[slice_index].resize(num_macroblocks_x, num_macroblocks_y);
+
+ for (uint32_t macroblock_y = 0; macroblock_y < num_macroblocks_y; macroblock_y++)
+ {
+ const uint32_t y = macroblock_y * 2;
+
+ const int x_start = (macroblock_y & 1) ? (num_macroblocks_x - 1) : 0;
+ const int x_end = (macroblock_y & 1) ? -1 : num_macroblocks_x;
+ const int x_dir = (macroblock_y & 1) ? -1 : 1;
+
+ for (int macroblock_x = x_start; macroblock_x != x_end; macroblock_x += x_dir)
+ {
+ const uint32_t x = macroblock_x * 2;
+
+ uint32_t block_indices[4];
+ block_indices[0] = first_block_index + x + y * num_blocks_x;
+ block_indices[1] = first_block_index + minimum(x + 1, num_blocks_x - 1) + y * num_blocks_x;
+ block_indices[2] = first_block_index + x + minimum(y + 1, num_blocks_y - 1) * num_blocks_x;
+ block_indices[3] = first_block_index + minimum(x + 1, num_blocks_x - 1) + minimum(y + 1, num_blocks_y - 1) * num_blocks_x;
+
+ etc_block macroblock[4];
+ for (uint32_t i = 0; i < 4; i++)
+ macroblock[i] = r.get_output_block(block_indices[i]);
+
+ uint32_t flip_bits = 0;
+ uint32_t diff_bits = 0;
+ for (uint32_t k = 0; k < 4; k++)
+ {
+ flip_bits = (flip_bits << 1) | (macroblock[k].get_flip_bit() ? 1 : 0);
+ diff_bits = (diff_bits << 1) | (macroblock[k].get_diff_bit() ? 1 : 0);
+ }
+
+ etc1_macroblock m;
+
+ m.m_diff_bits = static_cast(diff_bits);
+ m.m_flip_bits = static_cast(flip_bits);
+
+ uint_vec endpoint_indices;
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ endpoint_indices.push_back(r.get_subblock_endpoint_cluster_index(block_indices[i], 0));
+ endpoint_indices.push_back(r.get_subblock_endpoint_cluster_index(block_indices[i], 1));
+
+ if (macroblock[i].get_diff_bit())
+ {
+ uint32_t e0 = r.get_subblock_endpoint_cluster_index(block_indices[i], 0);
+ uint32_t e1 = r.get_subblock_endpoint_cluster_index(block_indices[i], 1);
+
+ color_rgba c0(r.get_endpoint_cluster_unscaled_color(e0, false));
+ color_rgba c1(r.get_endpoint_cluster_unscaled_color(e1, false));
+
+ etc_block test_block;
+ if (!test_block.set_block_color5_check(c0, c1))
+ {
+ BASISU_BACKEND_VERIFY(0);
+ }
+ }
+
+ m.m_selector_indices.push_back(r.get_block_selector_cluster_index(block_indices[i]));
+ }
+
+ int_vec endpoint_palette;
+ uint8_t endpoint_palette_indices[8];
+ uint32_t n = 0;
+
+ for (uint32_t ty = 0; ty < 2; ty++)
+ {
+ for (uint32_t tx = 0; tx < 2; tx++)
+ {
+ for (uint32_t t = 0; t < 2; t++)
+ {
+ int endpoint_index = r.get_subblock_endpoint_cluster_index(block_indices[tx + ty * 2], t);
+
+ uint32_t p;
+ for (p = 0; p < endpoint_palette.size(); p++)
+ if (endpoint_palette[p] == endpoint_index)
+ break;
+
+ if (p >= endpoint_palette.size())
+ {
+ endpoint_palette.push_back(endpoint_index);
+ }
+
+ endpoint_palette_indices[n++] = static_cast(p);
+ }
+ }
+ }
+
+ uint32_t t;
+ for (t = 0; t < basist::TOTAL_ENDPOINT_INDEX_TEMPLATES; t++)
+ {
+ if (memcmp(endpoint_palette_indices, basist::g_endpoint_index_templates[t].m_local_indices, 8) == 0)
+ break;
+ }
+
+ // TODO: There shouldn't be any exceptions in ETC1S
+ if (t == basist::TOTAL_ENDPOINT_INDEX_TEMPLATES)
+ {
+ endpoint_palette.resize(0);
+ n = 0;
+ clear_obj(endpoint_palette_indices);
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ uint32_t endpoint_index0 = r.get_subblock_endpoint_cluster_index(block_indices[i], 0);
+ uint32_t endpoint_index1 = r.get_subblock_endpoint_cluster_index(block_indices[i], 1);
+
+ endpoint_palette_indices[n++] = static_cast(endpoint_palette.size());
+ endpoint_palette.push_back(endpoint_index0);
+
+ if (endpoint_index0 != endpoint_index1)
+ {
+ endpoint_palette.push_back(endpoint_index1);
+ }
+
+ endpoint_palette_indices[n++] = static_cast(endpoint_palette.size() - 1);
+ }
+
+ for (t = 0; t < basist::TOTAL_ENDPOINT_INDEX_TEMPLATES; t++)
+ {
+ if (memcmp(endpoint_palette_indices, basist::g_endpoint_index_templates[t].m_local_indices, 8) == 0)
+ break;
+ }
+
+ BASISU_BACKEND_VERIFY(t != basist::TOTAL_ENDPOINT_INDEX_TEMPLATES);
+
+ total_template_exceptions++;
+ }
+
+ m.m_template_index = t;
+ m.m_endpoint_indices = endpoint_palette;
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if (!macroblock[i].get_diff_bit())
+ continue;
+
+ uint32_t l0 = basist::g_endpoint_index_templates[t].m_local_indices[i * 2 + 0];
+ uint32_t l1 = basist::g_endpoint_index_templates[t].m_local_indices[i * 2 + 1];
+
+ uint32_t e0 = endpoint_palette[l0];
+ uint32_t e1 = endpoint_palette[l1];
+
+ //uint32_t e0 = r.get_subblock_endpoint_cluster_index(block_indices[i], 0);
+ //uint32_t e1 = r.get_subblock_endpoint_cluster_index(block_indices[i], 1);
+
+ color_rgba c0(r.get_endpoint_cluster_unscaled_color(e0, false));
+ color_rgba c1(r.get_endpoint_cluster_unscaled_color(e1, false));
+
+ etc_block test_block;
+ if (!test_block.set_block_color5_check(c0, c1))
+ {
+ BASISU_BACKEND_VERIFY(0);
+ }
+ }
+
+ m_slice_macroblocks[slice_index](macroblock_x, macroblock_y) = m;
+
+ for (uint32_t i = 0; i < endpoint_palette.size(); i++)
+ all_endpoint_indices.push_back(endpoint_palette[i]);
+
+ for (uint32_t i = 0; i < m.m_selector_indices.size(); i++)
+ all_selector_indices.push_back(m.m_selector_indices[i]);
+
+ } // macroblock_x
+
+ } // macroblock_y
+ } // slice
+
+ debug_printf("Total template exception: %u out of %u %3.1f%%\n", total_template_exceptions, get_total_macroblocks(), total_template_exceptions * 100.0f / get_total_macroblocks());
+
+#if DISABLE_CODEBOOK_REORDERING
+ m_endpoint_remap_table_old_to_new.resize(r.get_total_endpoint_clusters());
+ for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++)
+ m_endpoint_remap_table_old_to_new[i] = i;
+
+ m_selector_remap_table_old_to_new.resize(r.get_total_selector_clusters());
+ for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)
+ m_selector_remap_table_old_to_new[i] = i;
+#else
+ {
+ //create_zeng_reorder_table(r.get_total_endpoint_clusters(), all_endpoint_indices.size(), all_endpoint_indices.get_ptr(), m_endpoint_remap_table_old_to_new, NULL, NULL, 0.0f);
+
+ palette_index_reorderer reorderer;
+ reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0);
+ m_endpoint_remap_table_old_to_new = reorderer.get_remap_table();
+ }
+
+ // Maps old to new selector indices
+ {
+ //const float selector_similarity_func_weight = 1.0f;
+ //create_zeng_reorder_table(r.get_total_selector_clusters(), all_selector_indices.size(), all_selector_indices.get_ptr(), m_selector_remap_table_old_to_new, selector_zeng_similarity_func, this, selector_similarity_func_weight);
+ //create_zeng_reorder_table(r.get_total_selector_clusters(), all_selector_indices.size(), all_selector_indices.get_ptr(), m_selector_remap_table_old_to_new, NULL, NULL, 0.0f);
+
+ palette_index_reorderer reorderer;
+ reorderer.init((uint32_t)all_selector_indices.size(), &all_selector_indices[0], r.get_total_selector_clusters(), nullptr, nullptr, 0);
+ m_selector_remap_table_old_to_new = reorderer.get_remap_table();
+ }
+
+#endif
+ m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters());
+ for (uint32_t i = 0; i < m_endpoint_remap_table_old_to_new.size(); i++)
+ m_endpoint_remap_table_new_to_old[m_endpoint_remap_table_old_to_new[i]] = i;
+
+ // Maps new to old selector indices
+ m_selector_remap_table_new_to_old.resize(r.get_total_selector_clusters());
+ for (uint32_t i = 0; i < m_selector_remap_table_old_to_new.size(); i++)
+ m_selector_remap_table_new_to_old[m_selector_remap_table_old_to_new[i]] = i;
+
+ if (!m_params.m_use_global_sel_codebook)
+ optimize_selector_palette_order(all_selector_indices);
+ }
+
+ void basisu_backend::optimize_selector_palette_order(const uint_vec &all_selector_indices)
+ {
+ const basisu_frontend &r = *m_pFront_end;
+
+ uint_vec new_selector_hist(r.get_total_selector_clusters());
+ for (uint32_t i = 0; i < all_selector_indices.size(); i++)
+ new_selector_hist[m_selector_remap_table_old_to_new[all_selector_indices[i]]]++;
+
+ uint32_t max_hist_value = 0;
+ uint32_t max_hist_value_index = 0;
+ for (uint32_t i = 0; i < new_selector_hist.size(); i++)
+ {
+ if (new_selector_hist[i] > max_hist_value)
+ {
+ max_hist_value = new_selector_hist[i];
+ max_hist_value_index = i;
+ }
+ }
+
+ uint_vec optimized_selector_order;
+
+ const uint32_t N = 32;
+ for (uint32_t i = 0; i < r.get_total_selector_clusters(); i += N)
+ {
+ const uint32_t e = minimum(i + N, r.get_total_selector_clusters());
+
+ if (do_excl_ranges_overlap(i, e, static_cast(max_hist_value_index) - 16, static_cast(max_hist_value_index) + 16))
+ {
+ for (uint32_t j = i; j < e; j++)
+ optimized_selector_order.push_back(j);
+ continue;
+ }
+
+ basist::etc1_selector_palette_entry prev_entry(m_selector_palette[m_selector_remap_table_new_to_old[i]]);
+
+ optimized_selector_order.push_back(i);
+
+ uint_vec remaining_entries;
+ for (uint32_t j = i + 1; j < e; j++)
+ remaining_entries.push_back(j);
+
+ for (uint32_t j = i + 1; j < e; j++)
+ {
+ uint32_t best_dist = UINT32_MAX;
+ uint32_t best_entry = 0;
+
+ for (uint32_t k = 0; k < remaining_entries.size(); k++)
+ {
+ uint32_t dist = prev_entry.calc_hamming_dist(m_selector_palette[m_selector_remap_table_new_to_old[remaining_entries[k]]]);
+ if (dist < best_dist)
+ {
+ best_dist = dist;
+ best_entry = k;
+ }
+ }
+
+ optimized_selector_order.push_back(remaining_entries[best_entry]);
+
+ prev_entry = m_selector_palette[m_selector_remap_table_new_to_old[remaining_entries[best_entry]]];
+
+ remaining_entries.erase(remaining_entries.begin() + best_entry);
+ }
+ }
+
+ uint_vec temp(r.get_total_selector_clusters());
+ for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)
+ temp[i] = m_selector_remap_table_new_to_old[optimized_selector_order[i]];
+
+ m_selector_remap_table_new_to_old = temp;
+
+ for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++)
+ m_selector_remap_table_old_to_new[m_selector_remap_table_new_to_old[i]] = i;
+ }
+
+ bool basisu_backend::encode_image()
+ {
+ const basisu_frontend &r = *m_pFront_end;
+
+ uint_vec endpoint_histogram(r.get_total_endpoint_clusters() * 2);
+ uint_vec selector_histogram(r.get_total_selector_clusters() * 2);
+ uint_vec actual_selector_histogram(r.get_total_selector_clusters());
+
+ // TODO: Choose the size in an intelligent way (try different sizes?)
+ const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64;
+ basist::approx_move_to_front selector_history_buf(MAX_SELECTOR_HISTORY_BUF_SIZE);
+ histogram selector_history_buf_histogram(MAX_SELECTOR_HISTORY_BUF_SIZE);
+
+ uint32_t total_used_selector_history_buf = 0;
+
+ histogram delta_endpoint_histogram(r.get_total_endpoint_clusters() * 2);
+ histogram delta_selector_histogram(MAX_SELECTOR_HISTORY_BUF_SIZE + r.get_total_selector_clusters() * 2 + 1);
+ histogram template_histogram(basist::TOTAL_ENDPOINT_INDEX_TEMPLATES);
+
+ const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3;
+ const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6;
+ const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);
+
+ const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = r.get_total_selector_clusters() * 2;
+ const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + MAX_SELECTOR_HISTORY_BUF_SIZE;
+
+ histogram selector_history_buf_rle_histogram(1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS);
+
+ uint32_t total_selector_indices_remapped = 0;
+
+ std::vector selector_syms(m_slices.size());
+
+ m_output.m_slice_image_crcs.resize(m_slices.size());
+
+ for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
+ {
+ const uint32_t first_block_index = m_slices[slice_index].m_first_block_index;
+ const uint32_t width = m_slices[slice_index].m_width;
+ const uint32_t height = m_slices[slice_index].m_height;
+ const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
+ const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
+ const uint32_t num_macroblocks_x = m_slices[slice_index].m_num_macroblocks_x;
+ const uint32_t num_macroblocks_y = m_slices[slice_index].m_num_macroblocks_y;
+
+ selector_history_buf.reset();
+
+ int prev_endpoint_index = 0;
+ int prev_selector_index = 0;
+ int selector_history_buf_rle_count = 0;
+
+ gpu_image gi;
+ gi.init(cETC1, width, height);
+
+ for (uint32_t macroblock_y = 0; macroblock_y < num_macroblocks_y; macroblock_y++)
+ {
+ const uint32_t y = macroblock_y * 2;
+
+ const int x_start = (macroblock_y & 1) ? (num_macroblocks_x - 1) : 0;
+ const int x_end = (macroblock_y & 1) ? -1 : num_macroblocks_x;
+ const int x_dir = (macroblock_y & 1) ? -1 : 1;
+
+ for (int macroblock_x = x_start; macroblock_x != x_end; macroblock_x += x_dir)
+ {
+ const uint32_t x = macroblock_x * 2;
+
+ uint32_t block_indices[4];
+ block_indices[0] = first_block_index + x + y * num_blocks_x;
+ block_indices[1] = first_block_index + minimum(x + 1, num_blocks_x - 1) + y * num_blocks_x;
+ block_indices[2] = first_block_index + x + minimum(y + 1, num_blocks_y - 1) * num_blocks_x;
+ block_indices[3] = first_block_index + minimum(x + 1, num_blocks_x - 1) + minimum(y + 1, num_blocks_y - 1) * num_blocks_x;
+
+ etc1_macroblock &m = m_slice_macroblocks[slice_index](macroblock_x, macroblock_y);
+
+ template_histogram.inc(m.m_template_index);
+
+ for (uint32_t i = 0; i < m.m_endpoint_indices.size(); i++)
+ {
+ int idx = m_endpoint_remap_table_old_to_new[m.m_endpoint_indices[i]];
+
+ int delta_idx = idx - prev_endpoint_index;
+ prev_endpoint_index = idx;
+
+ m.m_endpoint_indices[i] = idx;
+ m.m_endpoint_delta_indices.push_back(delta_idx);
+
+ delta_endpoint_histogram.inc(delta_idx + r.get_total_endpoint_clusters());
+
+ endpoint_histogram[r.get_total_endpoint_clusters() + delta_idx]++;
+ }
+
+ for (uint32_t i = 0; i < m.m_selector_indices.size(); i++)
+ {
+ int idx = m_selector_remap_table_old_to_new[m.m_selector_indices[i]];
+
+ int selector_history_buf_index = -1;
+
+#if 1
+ if (m_params.m_delta_selector_rdo_quality_thresh > 0.0f)
+ {
+ const pixel_block &src_pixels = r.get_source_pixel_block(block_indices[i]);
+
+ etc_block etc_blk(r.get_output_block(block_indices[i]));
+
+ color_rgba etc_blk_unpacked[16];
+ unpack_etc1(etc_blk, etc_blk_unpacked);
+
+ uint64_t cur_err = 0;
+ for (uint32_t p = 0; p < 16; p++)
+ cur_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false);
+
+ uint64_t best_trial_err = UINT64_MAX;
+ int best_trial_idx = 0;
+ uint32_t best_trial_history_buf_idx = 0;
+
+ //int cur_delta_idx = idx - prev_selector_index;
+
+ etc_block best_trial_etc_block;
+
+ const float SELECTOR_REMAP_THRESH = maximum(1.0f, m_params.m_delta_selector_rdo_quality_thresh); //2.5f;
+
+ for (uint32_t j = 0; j < selector_history_buf.size(); j++)
+ {
+ int trial_idx = selector_history_buf[j];
+
+ for (uint32_t sy = 0; sy < 4; sy++)
+ for (uint32_t sx = 0; sx < 4; sx++)
+ etc_blk.set_selector(sx, sy, m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]](sx, sy));
+
+ unpack_etc1(etc_blk, etc_blk_unpacked);
+
+ uint64_t trial_err = 0;
+ for (uint32_t p = 0; p < 16; p++)
+ trial_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false);
+
+ if (trial_err <= cur_err * SELECTOR_REMAP_THRESH)
+ {
+ //int trial_delta_idx = trial_idx - prev_selector_index;
+
+ if (trial_err < best_trial_err)
+ {
+ best_trial_err = trial_err;
+ best_trial_idx = trial_idx;
+ best_trial_etc_block = etc_blk;
+ best_trial_history_buf_idx = j;
+ }
+ }
+ }
+
+ if (best_trial_err != UINT64_MAX)
+ {
+ idx = best_trial_idx;
+
+ //total_selector_indices_remapped++;
+
+ total_used_selector_history_buf++;
+
+ selector_history_buf_index = best_trial_history_buf_idx;
+
+ selector_history_buf_histogram.inc(best_trial_history_buf_idx);
+ }
+ }
+#endif
+
+#if 1
+ if ((selector_history_buf_index < 0) && (m_params.m_delta_selector_rdo_quality_thresh > 0.0f))
+ {
+ const pixel_block &src_pixels = r.get_source_pixel_block(block_indices[i]);
+
+ etc_block etc_blk(r.get_output_block(block_indices[i]));
+
+ color_rgba etc_blk_unpacked[16];
+ unpack_etc1(etc_blk, etc_blk_unpacked);
+
+ uint64_t cur_err = 0;
+ for (uint32_t p = 0; p < 16; p++)
+ cur_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false);
+
+ uint64_t best_trial_err = UINT64_MAX;
+ int best_trial_idx = 0;
+
+ int cur_delta_idx = idx - prev_selector_index;
+
+ etc_block best_trial_etc_block;
+
+ const float SELECTOR_REMAP_THRESH = maximum(1.0f, m_params.m_delta_selector_rdo_quality_thresh); //2.5f;
+
+ for (int d = -cur_delta_idx + 1; d < cur_delta_idx; d++)
+ {
+ int trial_idx = prev_selector_index + d;
+ if (trial_idx < 0)
+ continue;
+ else if (trial_idx >= static_cast(r.get_total_selector_clusters()))
+ continue;
+
+ if (trial_idx == idx)
+ continue;
+
+ //etc_blk.set_raw_selector_bits(r.get_selector_cluster_selector_bits(m_selector_remap_table_new_to_old[trial_idx]).get_raw_selector_bits());
+ for (uint32_t sy = 0; sy < 4; sy++)
+ for (uint32_t sx = 0; sx < 4; sx++)
+ etc_blk.set_selector(sx, sy, m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]](sx, sy));
+
+ unpack_etc1(etc_blk, etc_blk_unpacked);
+
+ uint64_t trial_err = 0;
+ for (uint32_t p = 0; p < 16; p++)
+ trial_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false);
+
+ if (trial_err < cur_err * SELECTOR_REMAP_THRESH)
+ {
+ int trial_delta_idx = trial_idx - prev_selector_index;
+
+ const int N = r.get_total_selector_clusters() / 4;
+ if (iabs(trial_delta_idx) < (uint32_t)N)
+ {
+ float f = iabs(trial_delta_idx) / float(N);
+
+ f = powf(f, 2.0f);
+
+ trial_err = static_cast(trial_err * lerp(.4f, 1.0f, f));
+ }
+
+ if (trial_err < best_trial_err)
+ {
+ best_trial_err = trial_err;
+ best_trial_idx = trial_idx;
+ best_trial_etc_block = etc_blk;
+ }
+ }
+ }
+
+ if (best_trial_err != UINT64_MAX)
+ {
+ idx = best_trial_idx;
+
+ total_selector_indices_remapped++;
+ }
+ } // if (m_params.m_delta_selector_rdo_quality_thresh >= 1.0f)
+#endif
+
+ int delta_idx = idx - prev_selector_index;
+ prev_selector_index = idx;
+
+ m.m_selector_indices[i] = m_selector_remap_table_new_to_old[idx];
+
+ if ((selector_history_buf_rle_count) && (selector_history_buf_index != 0))
+ {
+ if (selector_history_buf_rle_count >= (int)SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH)
+ {
+ selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
+ selector_syms[slice_index].push_back(selector_history_buf_rle_count);
+
+ int run_sym = selector_history_buf_rle_count - SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
+ if (run_sym >= ((int)SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
+ selector_history_buf_rle_histogram.inc(SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1);
+ else
+ selector_history_buf_rle_histogram.inc(run_sym);
+
+ delta_selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
+ }
+ else
+ {
+ for (int k = 0; k < selector_history_buf_rle_count; k++)
+ {
+ uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0;
+
+ selector_syms[slice_index].push_back(sym_index);
+
+ delta_selector_histogram.inc(sym_index);
+ }
+ }
+
+ selector_history_buf_rle_count = 0;
+ }
+
+ if (selector_history_buf_index >= 0)
+ {
+ if (selector_history_buf_index == 0)
+ selector_history_buf_rle_count++;
+ else
+ {
+ uint32_t delta_indices_sym = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + selector_history_buf_index;
+
+ selector_syms[slice_index].push_back(delta_indices_sym);
+
+ delta_selector_histogram.inc(delta_indices_sym);
+ }
+ }
+ else
+ {
+ uint32_t delta_indices_sym = delta_idx + r.get_total_selector_clusters();
+
+ selector_syms[slice_index].push_back(delta_indices_sym);
+
+ delta_selector_histogram.inc(delta_indices_sym);
+ }
+
+ m.m_selector_delta_indices.push_back(delta_idx);
+ m.m_selector_history_buf_indices.push_back(selector_history_buf_index);
+
+ actual_selector_histogram[idx]++;
+ selector_histogram[r.get_total_selector_clusters() + delta_idx]++;
+
+ if (selector_history_buf_index < 0)
+ selector_history_buf.add(idx);
+ else if (selector_history_buf.size())
+ selector_history_buf.use(selector_history_buf_index);
+ }
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ const uint32_t block_x = macroblock_x * 2 + (i & 1);
+ const uint32_t block_y = macroblock_y * 2 + (i / 2);
+ if ((block_x >= gi.get_blocks_x()) || (block_y >= gi.get_blocks_y()))
+ continue;
+
+ etc_block &output_block = *(etc_block *)gi.get_block_ptr(block_x, block_y);
+
+ output_block.set_diff_bit(((m.m_diff_bits << i) & 8) != 0);
+ output_block.set_flip_bit(((m.m_flip_bits << i) & 8) != 0);
+
+ const basist::endpoint_index_template &t = basist::g_endpoint_index_templates[m.m_template_index];
+
+ uint32_t e0 = m_endpoint_remap_table_new_to_old[m.m_endpoint_indices[t.m_local_indices[i * 2 + 0]]];
+ uint32_t e1 = m_endpoint_remap_table_new_to_old[m.m_endpoint_indices[t.m_local_indices[i * 2 + 1]]];
+
+ if (output_block.get_diff_bit())
+ {
+ BASISU_BACKEND_VERIFY(m_endpoint_palette[e0].m_color5_valid);
+ BASISU_BACKEND_VERIFY(m_endpoint_palette[e1].m_color5_valid);
+
+ if (!output_block.set_block_color5_check(m_endpoint_palette[e0].m_color5, m_endpoint_palette[e1].m_color5))
+ {
+ BASISU_BACKEND_VERIFY(0);
+ }
+
+ output_block.set_inten_table(0, m_endpoint_palette[e0].m_inten5);
+ output_block.set_inten_table(1, m_endpoint_palette[e1].m_inten5);
+ }
+ else
+ {
+ BASISU_BACKEND_VERIFY(false);
+ }
+
+ uint32_t selector_idx = m.m_selector_indices[i];
+ const basist::etc1_selector_palette_entry &selectors = m_selector_palette[selector_idx];
+ for (uint32_t sy = 0; sy < 4; sy++)
+ for (uint32_t sx = 0; sx < 4; sx++)
+ output_block.set_selector(sx, sy, selectors(sx, sy));
+ }
+
+ } // macroblock_x
+
+ } // macroblock_y
+
+ if (selector_history_buf_rle_count)
+ {
+ if (selector_history_buf_rle_count >= (int)SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH)
+ {
+ selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
+ selector_syms[slice_index].push_back(selector_history_buf_rle_count);
+
+ int run_sym = selector_history_buf_rle_count - SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
+ if (run_sym >= ((int)SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
+ selector_history_buf_rle_histogram.inc(SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1);
+ else
+ selector_history_buf_rle_histogram.inc(run_sym);
+
+ delta_selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX);
+ }
+ else
+ {
+ for (int i = 0; i < selector_history_buf_rle_count; i++)
+ {
+ uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0;
+
+ selector_syms[slice_index].push_back(sym_index);
+
+ delta_selector_histogram.inc(sym_index);
+ }
+ }
+
+ selector_history_buf_rle_count = 0;
+ }
+
+ m_output.m_slice_image_crcs[slice_index] = basist::crc16(gi.get_ptr(), gi.get_size_in_bytes(), 0);
+
+ if (m_params.m_debug_images)
+ {
+ image gi_unpacked;
+ gi.unpack(gi_unpacked);
+
+ char buf[256];
+#ifdef _WIN32
+ sprintf_s(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index);
+#else
+ snprintf(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index);
+#endif
+ save_png(buf, gi_unpacked);
+ }
+
+ } // slice_index
+
+ debug_printf("Total selector indices remapped: %u %3.2f%%, Used history buf: %u %3.2f%%\n",
+ total_selector_indices_remapped, total_selector_indices_remapped * 100.0f / (get_total_macroblocks() * 4),
+ total_used_selector_history_buf, total_used_selector_history_buf * 100.0f / (get_total_macroblocks() * 4));
+
+ if (m_params.m_debug_images)
+ {
+ //draw_histogram_chart("delta_endpoint_hist.png", "Delta Endpoint Histogram", endpoint_histogram);
+ //draw_histogram_chart("delta_selector_hist.png", "Delta Selector Histogram", selector_histogram);
+ //draw_histogram_chart("selector_hist.png", "Selector Histogram", actual_selector_histogram);
+ }
+
+ double delta_endpoint_entropy = delta_endpoint_histogram.get_entropy() / delta_endpoint_histogram.get_total();
+ double delta_selector_entropy = delta_selector_histogram.get_entropy() / delta_selector_histogram.get_total();
+ double template_entropy = template_histogram.get_entropy() / template_histogram.get_total();
+
+ debug_printf("Entropy: AvgEndpoints/macroblock: %3.3f DeltaEndpoint: %3.3f DeltaSelector: %3.3f Template: %3.3f\n",
+ static_cast(delta_endpoint_histogram.get_total()) / get_total_macroblocks(),
+ delta_endpoint_entropy, delta_selector_entropy, template_entropy);
+
+ huffman_encoding_table template_model;
+ if (!template_model.init(template_histogram, 16))
+ {
+ error_printf("template_model.init() failed!");
+ return false;
+ }
+
+ huffman_encoding_table delta_endpoint_model;
+ if (!delta_endpoint_model.init(delta_endpoint_histogram, 16))
+ {
+ error_printf("delta_endpoint_model.init() failed!");
+ return false;
+ }
+
+ BASISU_ASSUME(basisu_frontend::cMaxEndpointClusterBits <= 15);
+ uint32_t max_delta_selector_code_size = ceil_log2i(r.get_total_selector_clusters() * 2) + 2;
+
+ max_delta_selector_code_size = clamp(max_delta_selector_code_size, 10, 15);
+
+ if (m_params.m_debug_images)
+ {
+ uint_vec delta_selector_plot_histogram(delta_selector_histogram.size());
+ for (uint32_t i = 0; i < delta_selector_histogram.size(); i++)
+ delta_selector_plot_histogram[i] = delta_selector_histogram[i];
+ //draw_histogram_chart("delta_selector_symbol_hist.png", "Delta Selector Symbol Histogram", delta_selector_plot_histogram);
+ }
+
+ huffman_encoding_table delta_selector_model;
+ if (!delta_selector_model.init(delta_selector_histogram, max_delta_selector_code_size))
+ {
+ error_printf("delta_selector_model.init() failed!");
+ return false;
+ }
+
+ if (!selector_history_buf_rle_histogram.get_total())
+ selector_history_buf_rle_histogram.inc(0);
+
+ huffman_encoding_table selector_history_buf_rle_model;
+ if (!selector_history_buf_rle_model.init(selector_history_buf_rle_histogram, 15))
+ {
+ error_printf("selector_history_buf_rle_model.init() failed!");
+ return false;
+ }
+
+ bitwise_coder coder;
+ coder.init(1024 * 1024 * 4);
+
+ uint32_t template_model_bits = coder.emit_huffman_table(template_model);
+ uint32_t delta_endpoint_model_bits = coder.emit_huffman_table(delta_endpoint_model);
+ uint32_t delta_selector_model_bits = coder.emit_huffman_table(delta_selector_model);
+ uint32_t selector_history_buf_run_sym_bits = coder.emit_huffman_table(selector_history_buf_rle_model);
+
+ coder.put_bits(MAX_SELECTOR_HISTORY_BUF_SIZE, 13);
+
+ const uint32_t SELECTOR_HISTORY_BUF_RUN_RICE_BITS = 3;
+ coder.put_bits(SELECTOR_HISTORY_BUF_RUN_RICE_BITS, 4);
+
+ debug_printf("Model sizes: Template: %u DeltaEndpoint: %u (%3.3f bpp) DeltaSelector: %u (%3.3f bpp) SelectorHistBufRLE: %u (%3.3f bpp)\n",
+ (template_model_bits + 7) / 8,
+ (delta_endpoint_model_bits + 7) / 8, delta_endpoint_model_bits / float(get_total_input_texels()),
+ (delta_selector_model_bits + 7) / 8, delta_selector_model_bits / float(get_total_input_texels()),
+ (selector_history_buf_run_sym_bits + 7) / 8, selector_history_buf_run_sym_bits / float(get_total_input_texels()));
+
+ coder.flush();
+
+ m_output.m_slice_image_tables = coder.get_bytes();
+
+ uint32_t total_template_bits = 0, total_delta_endpoint_bits = 0, total_delta_selector_bits = 0;
+
+ uint32_t total_image_bytes = 0;
+
+ m_output.m_slice_image_data.resize(m_slices.size());
+
+ for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++)
+ {
+ const uint32_t width = m_slices[slice_index].m_width;
+ const uint32_t height = m_slices[slice_index].m_height;
+ const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x;
+ const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y;
+ const uint32_t num_macroblocks_x = m_slices[slice_index].m_num_macroblocks_x;
+ const uint32_t num_macroblocks_y = m_slices[slice_index].m_num_macroblocks_y;
+
+ coder.init(1024 * 1024 * 4);
+
+ uint32_t cur_selector_sym_ofs = 0;
+ uint32_t selector_rle_count = 0;
+
+ for (uint32_t macroblock_y = 0; macroblock_y < num_macroblocks_y; macroblock_y++)
+ {
+ const int x_start = (macroblock_y & 1) ? (num_macroblocks_x - 1) : 0;
+ const int x_end = (macroblock_y & 1) ? -1 : num_macroblocks_x;
+ const int x_dir = (macroblock_y & 1) ? -1 : 1;
+
+ for (int macroblock_x = x_start; macroblock_x != x_end; macroblock_x += x_dir)
+ {
+ const etc1_macroblock &m = m_slice_macroblocks[slice_index](macroblock_x, macroblock_y);
+
+ total_template_bits += coder.put_code(m.m_template_index, template_model);
+
+ for (uint32_t i = 0; i < m.m_endpoint_delta_indices.size(); i++)
+ total_delta_endpoint_bits += coder.put_code(m.m_endpoint_delta_indices[i] + r.get_total_endpoint_clusters(), delta_endpoint_model);
+
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if (!selector_rle_count)
+ {
+ uint32_t selector_sym_index = selector_syms[slice_index][cur_selector_sym_ofs++];
+
+ if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX)
+ selector_rle_count = selector_syms[slice_index][cur_selector_sym_ofs++];
+
+ total_delta_selector_bits += coder.put_code(selector_sym_index, delta_selector_model);
+
+ if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX)
+ {
+ int run_sym = selector_rle_count - SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH;
+ if (run_sym >= ((int)SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1))
+ {
+ total_delta_selector_bits += coder.put_code(SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1, selector_history_buf_rle_model);
+ total_delta_selector_bits += coder.put_rice(selector_rle_count - SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH, SELECTOR_HISTORY_BUF_RUN_RICE_BITS);
+ }
+ else
+ total_delta_selector_bits += coder.put_code(run_sym, selector_history_buf_rle_model);
+ }
+ }
+
+ if (selector_rle_count)
+ selector_rle_count--;
+ }
+
+ } // macroblock_x
+
+ } // macroblock_y
+
+ BASISU_BACKEND_VERIFY(cur_selector_sym_ofs == selector_syms[slice_index].size());
+
+ coder.flush();
+
+ m_output.m_slice_image_data[slice_index] = coder.get_bytes();
+
+ total_image_bytes += (uint32_t)coder.get_bytes().size();
+
+ debug_printf("Slice %u compressed size: %u bytes, %3.3f bits per slice texel\n", slice_index, m_output.m_slice_image_data[slice_index].size(), m_output.m_slice_image_data[slice_index].size() * 8.0f / (m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height));
+
+ } // slice_index
+
+ const double total_texels = static_cast(get_total_input_texels());
+ const double total_macroblocks = static_cast(get_total_macroblocks());
+
+ debug_printf("Total template bits: %u bytes: %u bits/texel: %3.3f bits/macroblock: %3.3f\n", total_template_bits, total_template_bits / 8, total_template_bits / total_texels, total_template_bits / total_macroblocks);
+ debug_printf("Total delta endpoint bits: %u bytes: %u bits/texel: %3.3f bits/macroblock: %3.3f\n", total_delta_endpoint_bits, total_delta_endpoint_bits / 8, total_delta_endpoint_bits / total_texels, total_delta_endpoint_bits / total_macroblocks);
+ debug_printf("Total delta selector bits: %u bytes: %u bits/texel: %3.3f bits/macroblock: %3.3f\n", total_delta_selector_bits, total_delta_selector_bits / 8, total_delta_selector_bits / total_texels, total_delta_selector_bits / total_macroblocks);
+
+ debug_printf("Total table bytes: %u, Total image bytes: %u, %3.3f bits/texel\n", m_output.m_slice_image_tables.size(), total_image_bytes, total_image_bytes * 8.0f / total_texels);
+
+ return true;
+ }
+
+ bool basisu_backend::encode_endpoint_palette()
+ {
+ const basisu_frontend &r = *m_pFront_end;
+
+ histogram color5_delta_hist(32 * 2 - 1);
+ histogram inten5_delta_hist(8 * 2 - 1);
+
+ color_rgba prev_color5(0, 0, 0, 0);
+ int prev_inten5 = 0;
+
+ // Maps NEW to OLD endpoints
+ uint_vec endpoint_remap_table_inv(r.get_total_endpoint_clusters());
+ for (uint32_t old_endpoint_index = 0; old_endpoint_index < m_endpoint_remap_table_old_to_new.size(); old_endpoint_index++)
+ endpoint_remap_table_inv[m_endpoint_remap_table_old_to_new[old_endpoint_index]] = old_endpoint_index;
+
+ for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++)
+ {
+ const uint32_t old_endpoint_index = endpoint_remap_table_inv[new_endpoint_index];
+
+ int delta_r5 = m_endpoint_palette[old_endpoint_index].m_color5[0] - prev_color5[0];
+ int delta_g5 = m_endpoint_palette[old_endpoint_index].m_color5[1] - prev_color5[1];
+ int delta_b5 = m_endpoint_palette[old_endpoint_index].m_color5[2] - prev_color5[2];
+ int delta_inten5 = m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten5;
+
+ prev_color5[0] = m_endpoint_palette[old_endpoint_index].m_color5[0];
+ prev_color5[1] = m_endpoint_palette[old_endpoint_index].m_color5[1];
+ prev_color5[2] = m_endpoint_palette[old_endpoint_index].m_color5[2];
+ prev_inten5 = m_endpoint_palette[old_endpoint_index].m_inten5;
+
+ color5_delta_hist.inc(31 + delta_r5);
+ color5_delta_hist.inc(31 + delta_g5);
+ color5_delta_hist.inc(31 + delta_b5);
+ inten5_delta_hist.inc(7 + delta_inten5);
+ }
+
+ huffman_encoding_table color5_delta_model;
+ if (!color5_delta_model.init(color5_delta_hist, 16))
+ {
+ error_printf("color5_delta_model.init() failed!");
+ return false;
+ }
+
+ huffman_encoding_table inten5_delta_model;
+ if (!inten5_delta_model.init(inten5_delta_hist, 16))
+ {
+ error_printf("inten5_delta_model.init() failed!");
+ return false;
+ }
+
+ bitwise_coder coder;
+
+ coder.init(1024 * 1024);
+
+ coder.emit_huffman_table(color5_delta_model);
+ coder.emit_huffman_table(inten5_delta_model);
+
+ prev_color5.set(0, 0, 0, 0);
+ prev_inten5 = 0;
+
+ for (uint32_t q = 0; q < r.get_total_endpoint_clusters(); q++)
+ {
+ const uint32_t i = endpoint_remap_table_inv[q];
+
+ int delta_r5 = m_endpoint_palette[i].m_color5[0] - prev_color5[0];
+ int delta_g5 = m_endpoint_palette[i].m_color5[1] - prev_color5[1];
+ int delta_b5 = m_endpoint_palette[i].m_color5[2] - prev_color5[2];
+ int delta_inten5 = m_endpoint_palette[i].m_inten5 - prev_inten5;
+
+ prev_color5[0] = m_endpoint_palette[i].m_color5[0];
+ prev_color5[1] = m_endpoint_palette[i].m_color5[1];
+ prev_color5[2] = m_endpoint_palette[i].m_color5[2];
+ prev_inten5 = m_endpoint_palette[i].m_inten5;
+
+ coder.put_code(31 + delta_r5, color5_delta_model);
+ coder.put_code(31 + delta_g5, color5_delta_model);
+ coder.put_code(31 + delta_b5, color5_delta_model);
+ coder.put_code(7 + delta_inten5, inten5_delta_model);
+
+ } // q
+
+ coder.flush();
+
+ m_output.m_endpoint_palette = coder.get_bytes();
+
+ debug_printf("Endpoint palette size: %u, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n",
+ m_output.m_endpoint_palette.size(), m_output.m_endpoint_palette.size() * 8.0f / r.get_total_endpoint_clusters(), m_output.m_endpoint_palette.size() * 8.0f / get_total_input_texels());
+
+ return true;
+ }
+
+ bool basisu_backend::encode_selector_palette()
+ {
+ const basisu_frontend &r = *m_pFront_end;
+
+ if ((m_params.m_use_global_sel_codebook) && (!m_params.m_use_hybrid_sel_codebooks))
+ {
+ histogram global_mod_indices(1 << m_params.m_global_sel_codebook_mod_bits);
+
+ for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
+ global_mod_indices.inc(m_global_selector_palette_desc[q].m_mod_index);
+
+ huffman_encoding_table global_pal_model, global_mod_model;
+
+ if (!global_mod_model.init(global_mod_indices, 16))
+ {
+ error_printf("global_mod_model.init() failed!");
+ return false;
+ }
+
+ bitwise_coder coder;
+ coder.init(1024 * 1024);
+
+ coder.put_bits(1, 1); // use global codebook
+
+ coder.put_bits(m_params.m_global_sel_codebook_pal_bits, 4); // pal bits
+ coder.put_bits(m_params.m_global_sel_codebook_mod_bits, 4); // mod bits
+
+ uint32_t mod_model_bits = 0;
+ if (m_params.m_global_sel_codebook_mod_bits)
+ mod_model_bits = coder.emit_huffman_table(global_mod_model);
+
+ uint32_t total_pal_bits = 0;
+ uint32_t total_mod_bits = 0;
+ for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
+ {
+ const uint32_t i = m_selector_remap_table_new_to_old[q];
+
+ if (m_params.m_global_sel_codebook_pal_bits)
+ {
+ coder.put_bits(m_global_selector_palette_desc[i].m_pal_index, m_params.m_global_sel_codebook_pal_bits);
+ total_pal_bits += m_params.m_global_sel_codebook_pal_bits;
+ }
+
+ if (m_params.m_global_sel_codebook_mod_bits)
+ total_mod_bits += coder.put_code(m_global_selector_palette_desc[i].m_mod_index, global_mod_model);
+ }
+
+ coder.flush();
+
+ m_output.m_selector_palette = coder.get_bytes();
+
+ debug_printf("Modifier model bits: %u Avg per entry: %3.3f\n", mod_model_bits, mod_model_bits / float(r.get_total_selector_clusters()));
+ debug_printf("Palette bits: %u Avg per entry: %3.3f, Modifier bits: %u Avg per entry: %3.3f\n", total_pal_bits, total_pal_bits / float(r.get_total_selector_clusters()), total_mod_bits, total_mod_bits / float(r.get_total_selector_clusters()));
+ }
+ else if (m_params.m_use_hybrid_sel_codebooks)
+ {
+ huff2D used_global_cb_bitflag_huff2D(1, 8);
+
+ histogram global_mod_indices(1 << m_params.m_global_sel_codebook_mod_bits);
+
+ for (uint32_t s = 0; s < r.get_total_selector_clusters(); s++)
+ {
+ const uint32_t q = m_selector_remap_table_new_to_old[s];
+
+ const bool used_global_cb_flag = r.get_selector_cluster_uses_global_cb_vec()[q];
+
+ used_global_cb_bitflag_huff2D.emit(used_global_cb_flag);
+
+ global_mod_indices.inc(m_global_selector_palette_desc[q].m_mod_index);
+ }
+
+ huffman_encoding_table global_mod_indices_model;
+ if (!global_mod_indices_model.init(global_mod_indices, 16))
+ {
+ error_printf("global_mod_indices_model.init() failed!");
+ return false;
+ }
+
+ bitwise_coder coder;
+ coder.init(1024 * 1024);
+
+ coder.put_bits(0, 1); // use global codebook
+ coder.put_bits(1, 1); // uses hybrid codebooks
+
+ coder.put_bits(m_params.m_global_sel_codebook_pal_bits, 4); // pal bits
+ coder.put_bits(m_params.m_global_sel_codebook_mod_bits, 4); // mod bits
+
+ used_global_cb_bitflag_huff2D.start_encoding(16);
+ coder.emit_huffman_table(used_global_cb_bitflag_huff2D.get_encoding_table());
+
+ if (m_params.m_global_sel_codebook_mod_bits)
+ coder.emit_huffman_table(global_mod_indices_model);
+
+ uint32_t total_global_cb_entries = 0;
+ uint32_t total_pal_bits = 0;
+ uint32_t total_mod_bits = 0;
+ uint32_t total_selectors = 0;
+ uint32_t total_selector_bits = 0;
+ uint32_t total_flag_bits = 0;
+
+ for (uint32_t s = 0; s < r.get_total_selector_clusters(); s++)
+ {
+ const uint32_t q = m_selector_remap_table_new_to_old[s];
+
+ total_flag_bits += used_global_cb_bitflag_huff2D.emit_next_sym(coder);
+
+ const bool used_global_cb_flag = r.get_selector_cluster_uses_global_cb_vec()[q];
+
+ if (used_global_cb_flag)
+ {
+ total_global_cb_entries++;
+
+ total_pal_bits += coder.put_bits(r.get_selector_cluster_global_selector_entry_ids()[q].m_palette_index, m_params.m_global_sel_codebook_pal_bits);
+ total_mod_bits += coder.put_code(r.get_selector_cluster_global_selector_entry_ids()[q].m_modifier.get_index(), global_mod_indices_model);
+ }
+ else
+ {
+ total_selectors++;
+ total_selector_bits += 32;
+
+ for (uint32_t j = 0; j < 4; j++)
+ coder.put_bits(m_selector_palette[q].get_byte(j), 8);
+ }
+ }
+
+ coder.flush();
+
+ m_output.m_selector_palette = coder.get_bytes();
+
+ debug_printf("Total global CB entries: %u %3.2f%%\n", total_global_cb_entries, total_global_cb_entries * 100.0f / r.get_total_selector_clusters());
+ debug_printf("Total selector entries: %u %3.2f%%\n", total_selectors, total_selectors * 100.0f / r.get_total_selector_clusters());
+ debug_printf("Total pal bits: %u, mod bits: %u, selector bits: %u, flag bits: %u\n", total_pal_bits, total_mod_bits, total_selector_bits, total_flag_bits);
+ }
+ else
+ {
+ histogram delta_selector_pal_histogram(256);
+
+ for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
+ {
+ if (!q)
+ continue;
+
+ const basist::etc1_selector_palette_entry &cur = m_selector_palette[m_selector_remap_table_new_to_old[q]];
+ const basist::etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]);
+
+ for (uint32_t j = 0; j < 4; j++)
+ delta_selector_pal_histogram.inc(cur.get_byte(j) ^ predictor.get_byte(j));
+ }
+
+ if (!delta_selector_pal_histogram.get_total())
+ delta_selector_pal_histogram.inc(0);
+
+ huffman_encoding_table delta_selector_pal_model;
+ if (!delta_selector_pal_model.init(delta_selector_pal_histogram, 16))
+ {
+ error_printf("delta_selector_pal_model.init() failed!");
+ return false;
+ }
+
+ bitwise_coder coder;
+ coder.init(1024 * 1024);
+
+ coder.put_bits(0, 1); // use global codebook
+ coder.put_bits(0, 1); // uses hybrid codebooks
+
+ coder.put_bits(0, 1); // raw bytes
+
+ coder.emit_huffman_table(delta_selector_pal_model);
+
+ for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
+ {
+ if (!q)
+ {
+ for (uint32_t j = 0; j < 4; j++)
+ coder.put_bits(m_selector_palette[m_selector_remap_table_new_to_old[q]].get_byte(j), 8);
+ continue;
+ }
+
+ const basist::etc1_selector_palette_entry &cur = m_selector_palette[m_selector_remap_table_new_to_old[q]];
+ const basist::etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]);
+
+ for (uint32_t j = 0; j < 4; j++)
+ coder.put_code(cur.get_byte(j) ^ predictor.get_byte(j), delta_selector_pal_model);
+ }
+
+ coder.flush();
+
+ m_output.m_selector_palette = coder.get_bytes();
+
+ if (m_output.m_selector_palette.size() >= r.get_total_selector_clusters() * 4)
+ {
+ coder.init(1024 * 1024);
+
+ coder.put_bits(0, 1); // use global codebook
+ coder.put_bits(0, 1); // uses hybrid codebooks
+
+ coder.put_bits(1, 1); // raw bytes
+
+ for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++)
+ {
+ const uint32_t i = m_selector_remap_table_new_to_old[q];
+
+ for (uint32_t j = 0; j < 4; j++)
+ coder.put_bits(m_selector_palette[i].get_byte(j), 8);
+ }
+
+ coder.flush();
+
+ m_output.m_selector_palette = coder.get_bytes();
+ }
+
+ } // if (m_params.m_use_global_sel_codebook)
+
+ debug_printf("Selector palette bytes: %u, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n", m_output.m_selector_palette.size(), m_output.m_selector_palette.size() * 8.0f / r.get_total_selector_clusters(), m_output.m_selector_palette.size() * 8.0f / get_total_input_texels());
+
+ return true;
+ }
+
+ uint32_t basisu_backend::encode()
+ {
+ const basisu_frontend &r = *m_pFront_end;
+
+ m_output.m_slice_desc = m_slices;
+ m_output.m_etc1s = m_params.m_etc1s;
+ m_output.m_num_endpoints = r.get_total_endpoint_clusters();
+ m_output.m_num_selectors = r.get_total_selector_clusters();
+
+ create_endpoint_palette();
+ create_selector_palette();
+
+ create_macroblocks();
+
+ if (!encode_image())
+ return 0;
+
+ if (!encode_endpoint_palette())
+ return 0;
+
+ if (!encode_selector_palette())
+ return 0;
+
+ uint32_t total_compressed_bytes = (uint32_t)(m_output.m_slice_image_tables.size() + m_output.m_endpoint_palette.size() + m_output.m_selector_palette.size());
+ for (uint32_t i = 0; i < m_output.m_slice_image_data.size(); i++)
+ total_compressed_bytes += (uint32_t)m_output.m_slice_image_data[i].size();
+
+ debug_printf("Wrote %u bytes, %3.3f bits/texel\n", total_compressed_bytes, total_compressed_bytes * 8.0f / get_total_input_texels());
+
+ return total_compressed_bytes;
+ }
+
+} // namespace basisu
diff --git a/basisu_backend.h b/basisu_backend.h
new file mode 100644
index 0000000..f854e15
--- /dev/null
+++ b/basisu_backend.h
@@ -0,0 +1,331 @@
+// basisu_backend.h
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "transcoder/basisu.h"
+#include "basisu_enc.h"
+#include "transcoder/basisu_transcoder_internal.h"
+#include "transcoder/basisu_global_selector_palette.h"
+#include "basisu_frontend.h"
+
+namespace basisu
+{
+ struct etc1_macroblock
+ {
+ etc1_macroblock()
+ {
+ clear();
+ }
+
+ uint8_t m_diff_bits;
+ uint8_t m_flip_bits;
+
+ int m_template_index;
+
+ int_vec m_endpoint_indices;
+ int_vec m_selector_indices;
+
+ int_vec m_endpoint_delta_indices;
+ int_vec m_selector_delta_indices;
+ int_vec m_selector_history_buf_indices;
+
+ void clear()
+ {
+ m_diff_bits = 0;
+ m_flip_bits = 0;
+ m_template_index = 0;
+ m_endpoint_indices.clear();
+ m_selector_indices.clear();
+ m_endpoint_delta_indices.clear();
+ m_selector_delta_indices.clear();
+ m_selector_history_buf_indices.clear();
+ }
+ };
+
+ typedef std::vector etc1_macroblock_vec;
+ typedef vector2D etc1_macroblock_vec2D;
+
+ struct etc1_endpoint_palette_entry
+ {
+ etc1_endpoint_palette_entry()
+ {
+ clear();
+ }
+
+ color_rgba m_color5;
+ uint32_t m_inten5;
+ bool m_color5_valid;
+
+ void clear()
+ {
+ clear_obj(*this);
+ }
+ };
+
+ typedef std::vector etc1_endpoint_palette_entry_vec;
+
+ struct basisu_backend_params
+ {
+ bool m_etc1s;
+ bool m_debug, m_debug_images;
+ float m_delta_selector_rdo_quality_thresh; // 1.25f
+
+ bool m_use_global_sel_codebook;
+ uint32_t m_global_sel_codebook_pal_bits;
+ uint32_t m_global_sel_codebook_mod_bits;
+ bool m_use_hybrid_sel_codebooks;
+
+ basisu_backend_params()
+ {
+ clear();
+ }
+
+ void clear()
+ {
+ m_etc1s = false;
+ m_debug = false;
+ m_debug_images = false;
+ m_delta_selector_rdo_quality_thresh = 0.0f;//2.5f;
+
+ m_use_global_sel_codebook = false;
+ m_global_sel_codebook_pal_bits = ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_PAL_BITS;
+ m_global_sel_codebook_mod_bits = basist::etc1_global_palette_entry_modifier::cTotalBits;
+ m_use_hybrid_sel_codebooks = false;
+ }
+ };
+
+ struct basisu_backend_slice_desc
+ {
+ uint32_t m_first_block_index;
+
+ uint32_t m_orig_width;
+ uint32_t m_orig_height;
+
+ uint32_t m_width;
+ uint32_t m_height;
+
+ uint32_t m_num_blocks_x;
+ uint32_t m_num_blocks_y;
+
+ uint32_t m_num_macroblocks_x;
+ uint32_t m_num_macroblocks_y;
+
+ uint32_t m_source_file_index;
+ uint32_t m_mip_index;
+ bool m_alpha;
+ };
+
+ typedef std::vector basisu_backend_slice_desc_vec;
+
+ struct basisu_backend_output
+ {
+ bool m_etc1s;
+
+ uint32_t m_num_endpoints;
+ uint32_t m_num_selectors;
+
+ uint8_vec m_endpoint_palette;
+ uint8_vec m_selector_palette;
+
+ basisu_backend_slice_desc_vec m_slice_desc;
+
+ uint8_vec m_slice_image_tables;
+ std::vector m_slice_image_data;
+ uint16_vec m_slice_image_crcs;
+
+ basisu_backend_output()
+ {
+ clear();
+ }
+
+ void clear()
+ {
+ m_etc1s = false;
+
+ m_num_endpoints = 0;
+ m_num_selectors = 0;
+
+ m_endpoint_palette.clear();
+ m_selector_palette.clear();
+ m_slice_desc.clear();
+ m_slice_image_tables.clear();
+ m_slice_image_data.clear();
+ m_slice_image_crcs.clear();
+ }
+
+ uint32_t get_output_size_estimate() const
+ {
+ uint32_t total_compressed_bytes = (uint32_t)(m_slice_image_tables.size() + m_endpoint_palette.size() + m_selector_palette.size());
+ for (uint32_t i = 0; i < m_slice_image_data.size(); i++)
+ total_compressed_bytes += (uint32_t)m_slice_image_data[i].size();
+
+ return total_compressed_bytes;
+ }
+ };
+
+ class basisu_backend
+ {
+ BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_backend);
+
+ public:
+
+ basisu_backend();
+
+ void clear();
+
+ void init(basisu_frontend *pFront_end, basisu_backend_params ¶ms, const basisu_backend_slice_desc_vec &slice_desc, const basist::etc1_global_selector_codebook *pGlobal_sel_codebook);
+
+ uint32_t encode();
+
+ const basisu_backend_output &get_output() const { return m_output; }
+
+ private:
+ const basisu_frontend *m_pFront_end;
+ basisu_backend_params m_params;
+ basisu_backend_slice_desc_vec m_slices;
+ basisu_backend_output m_output;
+ const basist::etc1_global_selector_codebook *m_pGlobal_sel_codebook;
+
+ etc1_endpoint_palette_entry_vec m_endpoint_palette;
+ basist::etc1_selector_palette_entry_vec m_selector_palette;
+
+ struct etc1_global_selector_cb_entry_desc
+ {
+ uint32_t m_pal_index;
+ uint32_t m_mod_index;
+ bool m_was_used;
+ };
+
+ typedef std::vector etc1_global_selector_cb_entry_desc_vec;
+
+ etc1_global_selector_cb_entry_desc_vec m_global_selector_palette_desc;
+
+ std::vector m_slice_macroblocks;
+
+ // Maps OLD to NEW endpoint/selector indices
+ uint_vec m_endpoint_remap_table_old_to_new;
+ uint_vec m_endpoint_remap_table_new_to_old;
+
+ uint_vec m_selector_remap_table_old_to_new;
+
+ // Maps NEW to OLD endpoint/selector indices
+ uint_vec m_selector_remap_table_new_to_old;
+
+ uint32_t get_total_slices() const
+ {
+ return (uint32_t)m_slices.size();
+ }
+
+ uint32_t get_total_slice_blocks() const
+ {
+ return m_pFront_end->get_total_output_blocks();
+ }
+
+ uint32_t get_block_index(uint32_t slice_index, uint32_t block_x, uint32_t block_y) const
+ {
+ const basisu_backend_slice_desc &slice = m_slices[slice_index];
+
+ assert((block_x < slice.m_num_blocks_x) && (block_y < slice.m_num_blocks_y));
+
+ return slice.m_first_block_index + block_y * slice.m_num_blocks_x + block_x;
+ }
+
+ uint32_t get_num_macroblocks_x(uint32_t slice_index) const
+ {
+ return m_slices[slice_index].m_num_macroblocks_x;
+ }
+
+ uint32_t get_num_macroblocks_y(uint32_t slice_index) const
+ {
+ return m_slices[slice_index].m_num_macroblocks_y;
+ }
+
+ uint32_t get_total_macroblocks(uint32_t slice_index) const
+ {
+ return m_slices[slice_index].m_num_macroblocks_x * m_slices[slice_index].m_num_macroblocks_y;
+ }
+
+ uint32_t get_total_macroblocks() const
+ {
+ uint32_t total_macroblocks = 0;
+ for (uint32_t i = 0; i < m_slices.size(); i++)
+ total_macroblocks += get_total_macroblocks(i);
+ return total_macroblocks;
+ }
+
+ // Returns the total number of input texels, not counting padding up to blocks/macroblocks.
+ uint32_t get_total_input_texels(uint32_t slice_index) const
+ {
+ return m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height;
+ }
+
+ uint32_t get_total_input_texels() const
+ {
+ uint32_t total_texels = 0;
+ for (uint32_t i = 0; i < m_slices.size(); i++)
+ total_texels += get_total_input_texels(i);
+ return total_texels;
+ }
+
+ int find_slice(uint32_t block_index, uint32_t *pBlock_x, uint32_t *pBlock_y) const
+ {
+ for (uint32_t i = 0; i < m_slices.size(); i++)
+ {
+ if ((block_index >= m_slices[i].m_first_block_index) && (block_index < (m_slices[i].m_first_block_index + m_slices[i].m_num_blocks_x * m_slices[i].m_num_blocks_y)))
+ {
+ const uint32_t ofs = block_index - m_slices[i].m_first_block_index;
+ const uint32_t x = ofs % m_slices[i].m_num_blocks_x;
+ const uint32_t y = ofs / m_slices[i].m_num_blocks_x;
+
+ if (pBlock_x) *pBlock_x = x;
+ if (pBlock_y) *pBlock_y = y;
+
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ void create_endpoint_palette();
+
+ void create_selector_palette();
+
+ // endpoint palette
+ // 5:5:5 and predicted 4:4:4 colors, 1 or 2 3-bit intensity table indices
+ // selector palette
+ // 4x4 2-bit selectors
+
+ // per-macroblock:
+ // 4 diff bits
+ // 4 flip bits
+ // Endpoint template index, 1-8 endpoint indices
+ // Alternately, if no template applies, we can send 4 ETC1S bits followed by 4-8 endpoint indices
+ // 4 selector indices
+
+ float selector_zeng_similarity_func(uint32_t index_a, uint32_t index_b, void *pContext);
+
+ void create_macroblocks();
+
+ void optimize_selector_palette_order(const uint_vec &all_selector_indices);
+
+ bool encode_image();
+
+ bool encode_endpoint_palette();
+
+ bool encode_selector_palette();
+ };
+
+} // namespace basisu
+
diff --git a/basisu_basis_file.cpp b/basisu_basis_file.cpp
new file mode 100644
index 0000000..b2799d0
--- /dev/null
+++ b/basisu_basis_file.cpp
@@ -0,0 +1,199 @@
+// basisu_basis_file.cpp
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "basisu_basis_file.h"
+#include "transcoder/basisu_transcoder.h"
+
+// The output file version. Keep in sync with BASISD_SUPPORTED_BASIS_VERSION.
+#define BASIS_FILE_VERSION (0x11)
+
+namespace basisu
+{
+ void basisu_file::create_header(const basisu_backend_output &encoder_output, uint32_t userdata0, uint32_t userdata1, bool y_flipped)
+ {
+ m_header.m_header_size = sizeof(basist::basis_file_header);
+
+ m_header.m_data_size = m_total_file_size - sizeof(basist::basis_file_header);
+
+ m_header.m_total_slices = (uint32_t)encoder_output.m_slice_desc.size();
+
+ m_header.m_total_images = 0;
+ for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++)
+ m_header.m_total_images = maximum(m_header.m_total_images, encoder_output.m_slice_desc[i].m_source_file_index + 1);
+
+ m_header.m_format = basist::cETC1;
+ m_header.m_flags = 0;
+
+ if (encoder_output.m_etc1s)
+ m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagETC1S;
+
+ if (y_flipped)
+ m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagYFlipped;
+
+ for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++)
+ {
+ if (encoder_output.m_slice_desc[i].m_alpha)
+ {
+ m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagHasAlphaSlices;
+ break;
+ }
+ }
+
+ m_header.m_userdata0 = userdata0;
+ m_header.m_userdata1 = userdata1;
+
+ m_header.m_total_endpoints = encoder_output.m_num_endpoints;
+ m_header.m_endpoint_cb_file_ofs = m_endpoint_cb_file_ofs;
+ m_header.m_endpoint_cb_file_size = (uint32_t)encoder_output.m_endpoint_palette.size();
+
+ m_header.m_total_selectors = encoder_output.m_num_selectors;
+ m_header.m_selector_cb_file_ofs = m_selector_cb_file_ofs;
+ m_header.m_selector_cb_file_size = (uint32_t)encoder_output.m_selector_palette.size();
+
+ m_header.m_tables_file_ofs = m_tables_file_ofs;
+ m_header.m_tables_file_size = (uint32_t)encoder_output.m_slice_image_tables.size();
+
+ m_header.m_slice_desc_file_ofs = m_slice_descs_file_ofs;
+ }
+
+ bool basisu_file::create_image_descs(const basisu_backend_output &encoder_output)
+ {
+ const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc;
+
+ m_images_descs.resize(slice_descs.size());
+
+ uint64_t cur_slice_file_ofs = m_first_image_file_ofs;
+ for (uint32_t i = 0; i < slice_descs.size(); i++)
+ {
+ clear_obj(m_images_descs[i]);
+
+ m_images_descs[i].m_image_index = slice_descs[i].m_source_file_index;
+ m_images_descs[i].m_level_index = slice_descs[i].m_mip_index;
+
+ if (slice_descs[i].m_alpha)
+ m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsIsAlphaData;
+
+ m_images_descs[i].m_orig_width = slice_descs[i].m_orig_width;
+ m_images_descs[i].m_orig_height = slice_descs[i].m_orig_height;
+ m_images_descs[i].m_num_blocks_x = slice_descs[i].m_num_blocks_x;
+ m_images_descs[i].m_num_blocks_y = slice_descs[i].m_num_blocks_y;
+ m_images_descs[i].m_slice_data_crc16 = encoder_output.m_slice_image_crcs[i];
+
+ if (encoder_output.m_slice_image_data[i].size() > UINT32_MAX)
+ {
+ error_printf("basisu_file::create_image_descs: Basis file too large\n");
+ return false;
+ }
+
+ const uint32_t image_size = (uint32_t)encoder_output.m_slice_image_data[i].size();
+
+ m_images_descs[i].m_file_ofs = (uint32_t)cur_slice_file_ofs;
+ m_images_descs[i].m_file_size = image_size;
+
+ cur_slice_file_ofs += image_size;
+ if (cur_slice_file_ofs > UINT32_MAX)
+ {
+ error_printf("basisu_file::create_image_descs: Basis file too large\n");
+ return false;
+ }
+ }
+
+ assert(cur_slice_file_ofs == m_total_file_size);
+ return true;
+ }
+
+ void basisu_file::create_comp_data(const basisu_backend_output &encoder_output)
+ {
+ const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc;
+
+ append_vector(m_comp_data, reinterpret_cast(&m_header), sizeof(m_header));
+
+ assert(m_comp_data.size() == m_slice_descs_file_ofs);
+ append_vector(m_comp_data, reinterpret_cast(&m_images_descs[0]), m_images_descs.size() * sizeof(m_images_descs[0]));
+
+ assert(m_comp_data.size() == m_endpoint_cb_file_ofs);
+ append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_endpoint_palette[0]), encoder_output.m_endpoint_palette.size());
+
+ assert(m_comp_data.size() == m_selector_cb_file_ofs);
+ append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_selector_palette[0]), encoder_output.m_selector_palette.size());
+
+ assert(m_comp_data.size() == m_tables_file_ofs);
+ append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_slice_image_tables[0]), encoder_output.m_slice_image_tables.size());
+
+ assert(m_comp_data.size() == m_first_image_file_ofs);
+ for (uint32_t i = 0; i < slice_descs.size(); i++)
+ append_vector(m_comp_data, &encoder_output.m_slice_image_data[i][0], encoder_output.m_slice_image_data[i].size());
+
+ assert(m_comp_data.size() == m_total_file_size);
+ }
+
+ void basisu_file::fixup_crcs()
+ {
+ basist::basis_file_header *pHeader = reinterpret_cast(&m_comp_data[0]);
+
+ pHeader->m_data_size = m_total_file_size - sizeof(basist::basis_file_header);
+ pHeader->m_data_crc16 = basist::crc16(&m_comp_data[0] + sizeof(basist::basis_file_header), m_total_file_size - sizeof(basist::basis_file_header), 0);
+
+ pHeader->m_header_crc16 = basist::crc16(&pHeader->m_data_size, sizeof(basist::basis_file_header) - BASISU_OFFSETOF(basist::basis_file_header, m_data_size), 0);
+
+ pHeader->m_sig = basist::basis_file_header::cBASISSigValue;
+ pHeader->m_ver = BASIS_FILE_VERSION;// basist::basis_file_header::cBASISFirstVersion;
+ }
+
+ bool basisu_file::init(const basisu_backend_output &encoder_output, uint32_t userdata0, uint32_t userdata1, bool y_flipped)
+ {
+ clear();
+
+ const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc;
+
+ // The Basis file uses 32-bit fields for lots of stuff, so make sure it's not too large.
+ uint64_t check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() +
+ (uint64_t)encoder_output.m_endpoint_palette.size() + (uint64_t)encoder_output.m_selector_palette.size() + (uint64_t)encoder_output.m_slice_image_tables.size();
+ if (check_size >= 0xFFFF0000ULL)
+ {
+ error_printf("basisu_file::init: File is too large!\n");
+ return false;
+ }
+
+ m_header_file_ofs = 0;
+ m_slice_descs_file_ofs = sizeof(basist::basis_file_header);
+ m_endpoint_cb_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size();
+ m_selector_cb_file_ofs = m_endpoint_cb_file_ofs + (uint32_t)encoder_output.m_endpoint_palette.size();
+ m_tables_file_ofs = m_selector_cb_file_ofs + (uint32_t)encoder_output.m_selector_palette.size();
+ m_first_image_file_ofs = m_tables_file_ofs + (uint32_t)encoder_output.m_slice_image_tables.size();
+
+ uint64_t total_file_size = m_first_image_file_ofs;
+ for (uint32_t i = 0; i < encoder_output.m_slice_image_data.size(); i++)
+ total_file_size += encoder_output.m_slice_image_data[i].size();
+ if (total_file_size >= 0xFFFF0000ULL)
+ {
+ error_printf("basisu_file::init: File is too large!\n");
+ return false;
+ }
+
+ m_total_file_size = (uint32_t)total_file_size;
+
+ create_header(encoder_output, userdata0, userdata1, y_flipped);
+
+ if (!create_image_descs(encoder_output))
+ return false;
+
+ create_comp_data(encoder_output);
+
+ fixup_crcs();
+
+ return true;
+ }
+
+} // namespace basisu
diff --git a/basisu_basis_file.h b/basisu_basis_file.h
new file mode 100644
index 0000000..ffbb119
--- /dev/null
+++ b/basisu_basis_file.h
@@ -0,0 +1,70 @@
+// basisu_basis_file.h
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "transcoder/basisu_file_headers.h"
+#include "basisu_backend.h"
+
+namespace basisu
+{
+ class basisu_file
+ {
+ BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_file);
+
+ public:
+ basisu_file()
+ {
+ }
+
+ void clear()
+ {
+ m_comp_data.clear();
+
+ clear_obj(m_header);
+ m_images_descs.clear();
+
+ m_header_file_ofs = 0;
+ m_slice_descs_file_ofs = 0;
+ m_endpoint_cb_file_ofs = 0;
+ m_selector_cb_file_ofs = 0;
+ m_tables_file_ofs = 0;
+ m_first_image_file_ofs = 0;
+ m_total_file_size = 0;
+ }
+
+ bool init(const basisu_backend_output& encoder_output, uint32_t userdata0 = 0, uint32_t userdata1 = 0, bool y_flipped = false);
+
+ const uint8_vec &get_compressed_data() const { return m_comp_data; }
+
+ private:
+ basist::basis_file_header m_header;
+ std::vector m_images_descs;
+
+ uint8_vec m_comp_data;
+
+ uint32_t m_header_file_ofs;
+ uint32_t m_slice_descs_file_ofs;
+ uint32_t m_endpoint_cb_file_ofs;
+ uint32_t m_selector_cb_file_ofs;
+ uint32_t m_tables_file_ofs;
+ uint32_t m_first_image_file_ofs;
+ uint32_t m_total_file_size;
+
+ void create_header(const basisu_backend_output& encoder_output, uint32_t userdata0, uint32_t userdata1, bool y_flipped);
+ bool create_image_descs(const basisu_backend_output& encoder_output);
+ void create_comp_data(const basisu_backend_output& encoder_output);
+ void fixup_crcs();
+ };
+
+} // namespace basisu
diff --git a/basisu_comp.cpp b/basisu_comp.cpp
new file mode 100644
index 0000000..72d5725
--- /dev/null
+++ b/basisu_comp.cpp
@@ -0,0 +1,1059 @@
+// basisu_comp.cpp
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "basisu_comp.h"
+#include "basisu_enc.h"
+#include
+
+#define BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN 0
+#define DEBUG_RESIZE_TEXTURE_TO_64x64 (0)
+#define DEBUG_EXTRACT_SINGLE_BLOCK (0)
+
+namespace basisu
+{
+ basis_compressor::basis_compressor() :
+ m_total_blocks(0),
+ m_auto_global_sel_pal(false),
+ m_basis_file_size(0),
+ m_basis_bits_per_texel(0),
+ m_any_source_image_has_alpha(false)
+ {
+ debug_printf("basis_compressor::basis_compressor\n");
+ }
+
+ bool basis_compressor::init(const basis_compressor_params ¶ms)
+ {
+ debug_printf("basis_compressor::init\n");
+
+ m_params = params;
+
+ if (m_params.m_debug)
+ {
+ debug_printf("basis_compressor::init:\n");
+
+#define PRINT_BOOL_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed());
+#define PRINT_INT_VALUE(v) debug_printf("%s: %i %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed());
+#define PRINT_UINT_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed());
+#define PRINT_FLOAT_VALUE(v) debug_printf("%s: %f %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed());
+
+ debug_printf("Has global selector codebook: %i\n", m_params.m_pSel_codebook != nullptr);
+
+ debug_printf("Source images: %u, source filenames: %u, source alpha filenames: %i\n",
+ (uint32_t)m_params.m_source_images.size(), (uint32_t)m_params.m_source_filenames.size(), (uint32_t)m_params.m_source_alpha_filenames.size());
+
+ PRINT_BOOL_VALUE(m_y_flip);
+ PRINT_BOOL_VALUE(m_debug);
+ PRINT_BOOL_VALUE(m_debug_images);
+ PRINT_BOOL_VALUE(m_global_sel_pal);
+ PRINT_BOOL_VALUE(m_no_auto_global_sel_pal);
+ PRINT_BOOL_VALUE(m_no_endpoint_refinement);
+ PRINT_BOOL_VALUE(m_no_hybrid_sel_cb);
+ PRINT_BOOL_VALUE(m_perceptual);
+ PRINT_BOOL_VALUE(m_no_selector_rdo);
+ PRINT_BOOL_VALUE(m_read_source_images);
+ PRINT_BOOL_VALUE(m_write_output_basis_files);
+ PRINT_BOOL_VALUE(m_faster);
+ PRINT_BOOL_VALUE(m_compute_stats);
+ PRINT_BOOL_VALUE(m_check_for_alpha)
+ PRINT_BOOL_VALUE(m_force_alpha)
+ PRINT_BOOL_VALUE(m_seperate_rg_to_color_alpha);
+
+ PRINT_FLOAT_VALUE(m_hybrid_sel_cb_quality_thresh);
+
+ PRINT_INT_VALUE(m_global_pal_bits);
+ PRINT_INT_VALUE(m_global_mod_bits);
+
+ PRINT_FLOAT_VALUE(m_selector_rdo_thresh);
+
+ PRINT_BOOL_VALUE(m_mip_gen);
+ PRINT_BOOL_VALUE(m_mip_renormalize);
+ PRINT_BOOL_VALUE(m_mip_wrapping);
+ PRINT_BOOL_VALUE(m_mip_srgb);
+ PRINT_FLOAT_VALUE(m_mip_premultiplied);
+ PRINT_FLOAT_VALUE(m_mip_scale);
+ PRINT_INT_VALUE(m_mip_smallest_dimension);
+ debug_printf("m_mip_filter: %s\n", m_params.m_mip_filter.c_str());
+
+ debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_max_endpoint_clusters);
+ debug_printf("m_max_selector_clusters: %u\n", m_params.m_max_selector_clusters);
+ debug_printf("m_quality_level: %i\n", m_params.m_quality_level);
+
+#undef PRINT_BOOL_VALUE
+#undef PRINT_INT_VALUE
+#undef PRINT_UINT_VALUE
+#undef PRINT_FLOAT_VALUE
+ }
+
+ if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size()))
+ {
+ assert(0);
+ return false;
+ }
+
+ return true;
+ }
+
+ basis_compressor::error_code basis_compressor::process()
+ {
+ debug_printf("basis_compressor::process\n");
+
+ if (!read_source_images())
+ return cECFailedReadingSourceImages;
+
+ if (!process_frontend())
+ return cECFailedFrontEnd;
+
+ if (!extract_frontend_texture_data())
+ return cECFailedFontendExtract;
+
+ if (!process_backend())
+ return cECFailedBackend;
+
+ if (!create_basis_file_and_transcode())
+ return cECFailedCreateBasisFile;
+
+ if (!write_output_files_and_compute_stats())
+ return cECFailedWritingOutput;
+
+ return cECSuccess;
+ }
+
+ bool basis_compressor::generate_mipmaps(const image &img, std::vector &mips, bool has_alpha)
+ {
+ debug_printf("basis_compressor::generate_mipmaps\n");
+
+ uint32_t total_levels = 1;
+ uint32_t w = img.get_width(), h = img.get_height();
+ while (maximum(w, h) > (uint32_t)m_params.m_mip_smallest_dimension)
+ {
+ w = maximum(w >> 1U, 1U);
+ h = maximum(h >> 1U, 1U);
+ total_levels++;
+ }
+
+#if BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN
+ // Requires stb_image_resize
+ stbir_filter filter = STBIR_FILTER_DEFAULT;
+ if (m_params.m_mip_filter == "box")
+ filter = STBIR_FILTER_BOX;
+ else if (m_params.m_mip_filter == "triangle")
+ filter = STBIR_FILTER_TRIANGLE;
+ else if (m_params.m_mip_filter == "cubic")
+ filter = STBIR_FILTER_CUBICBSPLINE;
+ else if (m_params.m_mip_filter == "catmull")
+ filter = STBIR_FILTER_CATMULLROM;
+ else if (m_params.m_mip_filter == "mitchell")
+ filter = STBIR_FILTER_MITCHELL;
+
+ for (uint32_t level = 1; level < total_levels; level++)
+ {
+ const uint32_t level_width = maximum(1, img.get_width() >> level);
+ const uint32_t level_height = maximum(1, img.get_height() >> level);
+
+ image &level_img = *enlarge_vector(mips, 1);
+ level_img.resize(level_width, level_height);
+
+ int result = stbir_resize_uint8_generic(
+ (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba),
+ (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba),
+ has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0,
+ m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR,
+ nullptr);
+
+ if (result == 0)
+ {
+ error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n");
+ return false;
+ }
+
+ if (m_params.m_mip_renormalize)
+ level_img.renormalize_normal_map();
+ }
+#else
+ for (uint32_t level = 1; level < total_levels; level++)
+ {
+ const uint32_t level_width = maximum(1, img.get_width() >> level);
+ const uint32_t level_height = maximum(1, img.get_height() >> level);
+
+ image &level_img = *enlarge_vector(mips, 1);
+ level_img.resize(level_width, level_height);
+
+ bool status = image_resample(img, level_img, m_params.m_mip_srgb, m_params.m_mip_filter.c_str(), m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3);
+ if (!status)
+ {
+ error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n");
+ return false;
+ }
+
+ if (m_params.m_mip_renormalize)
+ level_img.renormalize_normal_map();
+ }
+#endif
+
+ return true;
+ }
+
+ bool basis_compressor::read_source_images()
+ {
+ debug_printf("basis_compressor::read_source_images\n");
+
+ const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : (uint32_t)m_params.m_source_images.size();
+ if (!total_source_files)
+ return false;
+
+ m_stats.resize(0);
+ m_slice_descs.resize(0);
+ m_source_images.resize(0);
+
+ m_total_blocks = 0;
+ uint32_t total_macroblocks = 0;
+
+ m_any_source_image_has_alpha = false;
+
+ std::vector source_images;
+ std::vector source_filenames;
+
+ // First load all source images, and determine if any have an alpha channel.
+ for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++)
+ {
+ const char *pSource_filename = "";
+
+ image file_image;
+
+ if (m_params.m_read_source_images)
+ {
+ pSource_filename = m_params.m_source_filenames[source_file_index].c_str();
+
+ // Load the source image
+ if (!load_png(pSource_filename, file_image))
+ {
+ error_printf("Failed reading source image: %s\n", pSource_filename);
+ return false;
+ }
+
+ printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height());
+
+ // Optionally load another image and put a grayscale version of it into the alpha channel.
+ if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size()))
+ {
+ const char *pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str();
+
+ image alpha_data;
+
+ if (!load_png(pSource_alpha_image, alpha_data))
+ {
+ error_printf("Failed reading source image: %s\n", pSource_alpha_image);
+ return false;
+ }
+
+ printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height());
+
+ alpha_data.crop(file_image.get_width(), file_image.get_height());
+
+ for (uint32_t y = 0; y < file_image.get_height(); y++)
+ for (uint32_t x = 0; x < file_image.get_width(); x++)
+ file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma();
+ }
+ }
+ else
+ {
+ file_image = m_params.m_source_images[source_file_index];
+ }
+
+ if (m_params.m_seperate_rg_to_color_alpha)
+ {
+ // Used for XY normal maps in RG - puts X in color, Y in alpha
+ for (uint32_t y = 0; y < file_image.get_height(); y++)
+ for (uint32_t x = 0; x < file_image.get_width(); x++)
+ {
+ const color_rgba &c = file_image(x, y);
+ file_image(x, y).set_noclamp_rgba(c.r, c.r, c.r, c.g);
+ }
+ }
+
+ bool has_alpha = false;
+ if ((m_params.m_force_alpha) || (m_params.m_seperate_rg_to_color_alpha))
+ has_alpha = true;
+ else if (!m_params.m_check_for_alpha)
+ file_image.set_alpha(255);
+ else if (file_image.has_alpha())
+ has_alpha = true;
+
+ if (has_alpha)
+ m_any_source_image_has_alpha = true;
+
+ debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha);
+
+ if (m_params.m_y_flip)
+ file_image.flip_y();
+
+#if DEBUG_EXTRACT_SINGLE_BLOCK
+ image block_image(4, 4);
+ const uint32_t block_x = 0;
+ const uint32_t block_y = 0;
+ block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0);
+ file_image = block_image;
+#endif
+
+#if DEBUG_RESIZE_TEXTURE_TO_64x64
+ file_image.resize(64, 64);
+#endif
+
+ if ((!file_image.get_width()) || (!file_image.get_height()))
+ {
+ error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n");
+ return false;
+ }
+
+ if ((file_image.get_width() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (file_image.get_height() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION))
+ {
+ error_printf("basis_compressor::read_source_images: Source image is too large!\n");
+ return false;
+ }
+
+ source_images.push_back(file_image);
+ source_filenames.push_back(pSource_filename);
+ }
+
+ debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha);
+
+ for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++)
+ {
+ image &file_image = source_images[source_file_index];
+ const std::string &source_filename = source_filenames[source_file_index];
+
+ std::vector slices;
+
+ slices.reserve(32);
+ slices.push_back(file_image);
+
+ if (m_params.m_mip_gen)
+ {
+ if (!generate_mipmaps(file_image, slices, m_any_source_image_has_alpha))
+ return false;
+ }
+
+ uint_vec mip_indices(slices.size());
+ for (uint32_t i = 0; i < slices.size(); i++)
+ mip_indices[i] = i;
+
+ if (m_any_source_image_has_alpha)
+ {
+ // If source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB.
+ std::vector alpha_slices;
+ uint_vec new_mip_indices;
+
+ alpha_slices.reserve(slices.size() * 2);
+
+ for (uint32_t i = 0; i < slices.size(); i++)
+ {
+ image lvl_rgb(slices[i]);
+ image lvl_a(lvl_rgb);
+
+ for (uint32_t y = 0; y < lvl_a.get_height(); y++)
+ {
+ for (uint32_t x = 0; x < lvl_a.get_width(); x++)
+ {
+ uint8_t a = lvl_a(x, y).a;
+ lvl_a(x, y).set_noclamp_rgba(a, a, a, 255);
+ }
+ }
+
+ lvl_rgb.set_alpha(255);
+
+ alpha_slices.push_back(lvl_rgb);
+ new_mip_indices.push_back(i);
+
+ alpha_slices.push_back(lvl_a);
+ new_mip_indices.push_back(i);
+ }
+
+ slices.swap(alpha_slices);
+ mip_indices.swap(new_mip_indices);
+ }
+
+ assert(slices.size() == mip_indices.size());
+
+ for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++)
+ {
+ const bool is_alpha_slice = m_any_source_image_has_alpha && ((slice_index & 1) != 0);
+
+ image &source_image = slices[slice_index];
+ const uint32_t orig_width = source_image.get_width();
+ const uint32_t orig_height = source_image.get_height();
+
+ // Enlarge the source image to 4x4 block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks.
+ source_image.crop_dup_borders(source_image.get_block_width(4) * 4, source_image.get_block_height(4) * 4);
+
+ if (m_params.m_debug_images)
+ {
+ save_png(string_format("basis_debug_source_image_%u_%u.png", source_file_index, slice_index).c_str(), source_image);
+ }
+
+ enlarge_vector(m_stats, 1);
+ enlarge_vector(m_source_images, 1);
+ enlarge_vector(m_slice_descs, 1);
+
+ const uint32_t dest_image_index = (uint32_t)m_stats.size() - 1;
+
+ m_stats[dest_image_index].m_filename = source_filename.c_str();
+ m_stats[dest_image_index].m_width = orig_width;
+ m_stats[dest_image_index].m_height = orig_height;
+
+ m_source_images[dest_image_index] = source_image;
+
+ debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), orig_width, orig_height, source_image.get_width(), source_image.get_height());
+
+ basisu_backend_slice_desc &slice_desc = m_slice_descs[dest_image_index];
+
+ slice_desc.m_first_block_index = m_total_blocks;
+
+ slice_desc.m_orig_width = orig_width;
+ slice_desc.m_orig_height = orig_height;
+
+ slice_desc.m_width = source_image.get_width();
+ slice_desc.m_height = source_image.get_height();
+
+ slice_desc.m_num_blocks_x = source_image.get_block_width(4);
+ slice_desc.m_num_blocks_y = source_image.get_block_height(4);
+
+ slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1;
+ slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1;
+
+ slice_desc.m_source_file_index = source_file_index;
+
+ slice_desc.m_mip_index = mip_indices[slice_index];
+
+ slice_desc.m_alpha = is_alpha_slice;
+
+ m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
+ total_macroblocks += slice_desc.m_num_macroblocks_x * slice_desc.m_num_macroblocks_y;
+
+ } // slice_index
+
+ } // source_file_index
+
+ debug_printf("Total blocks: %u, Total macroblocks: %u\n", m_total_blocks, total_macroblocks);
+
+ // Make sure we don't have too many slices
+ if (m_slice_descs.size() > BASISU_MAX_SLICES)
+ {
+ error_printf("Too many slices!\n");
+ return false;
+ }
+
+ // Basic sanity check on the slices
+ for (uint32_t i = 1; i < m_slice_descs.size(); i++)
+ {
+ const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1];
+ const basisu_backend_slice_desc &slice_desc = m_slice_descs[i];
+
+ // Make sure images are in order
+ int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index;
+ if (image_delta > 1)
+ return false;
+
+ // Make sure mipmap levels are in order
+ if (!image_delta)
+ {
+ int level_delta = (int)slice_desc.m_mip_index - (int)prev_slice_desc.m_mip_index;
+ if (level_delta > 1)
+ return false;
+ }
+ }
+
+ printf("Total basis file slices: %u\n", (uint32_t)m_slice_descs.size());
+
+ for (uint32_t i = 0; i < m_slice_descs.size(); i++)
+ {
+ const basisu_backend_slice_desc &slice_desc = m_slice_descs[i];
+
+ printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u\n",
+ i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, slice_desc.m_width, slice_desc.m_height, slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index);
+
+ if (m_any_source_image_has_alpha)
+ {
+ // Alpha slices must be at odd slice indices
+ if (slice_desc.m_alpha)
+ {
+ if ((i & 1) == 0)
+ return false;
+
+ const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1];
+
+ // Make sure previous slice has this image's color data
+ if (prev_slice_desc.m_source_file_index != slice_desc.m_source_file_index)
+ return false;
+ if (prev_slice_desc.m_alpha)
+ return false;
+ if (prev_slice_desc.m_mip_index != slice_desc.m_mip_index)
+ return false;
+ if (prev_slice_desc.m_num_blocks_x != slice_desc.m_num_blocks_x)
+ return false;
+ if (prev_slice_desc.m_num_blocks_y != slice_desc.m_num_blocks_y)
+ return false;
+ }
+ else if (i & 1)
+ return false;
+ }
+ else if (slice_desc.m_alpha)
+ {
+ return false;
+ }
+
+ if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height))
+ return false;
+ }
+
+ return true;
+ }
+
+ bool basis_compressor::process_frontend()
+ {
+ debug_printf("basis_compressor::process_frontend\n");
+
+ m_source_blocks.resize(m_total_blocks);
+
+ for (uint32_t slice_index = 0; slice_index < m_source_images.size(); slice_index++)
+ {
+ const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index];
+
+ const uint32_t num_blocks_x = slice_desc.m_num_blocks_x;
+ const uint32_t num_blocks_y = slice_desc.m_num_blocks_y;
+
+ const image &source_image = m_source_images[slice_index];
+
+ for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
+ for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
+ source_image.extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4);
+ }
+
+#if 0
+ // TODO
+ basis_etc1_pack_params pack_params;
+ pack_params.m_quality = cETCQualityMedium;
+ pack_params.m_perceptual = m_params.m_perceptual;
+ pack_params.m_use_color4 = false;
+
+ pack_etc1_block_context pack_context;
+
+ std::unordered_set endpoint_hash;
+ std::unordered_set selector_hash;
+
+ for (uint32_t i = 0; i < m_source_blocks.size(); i++)
+ {
+ etc_block blk;
+ pack_etc1_block(blk, m_source_blocks[i].get_ptr(), pack_params, pack_context);
+
+ const color_rgba c0(blk.get_block_color(0, false));
+ endpoint_hash.insert((c0.r | (c0.g << 5) | (c0.b << 10)) | (blk.get_inten_table(0) << 16));
+
+ const color_rgba c1(blk.get_block_color(1, false));
+ endpoint_hash.insert((c1.r | (c1.g << 5) | (c1.b << 10)) | (blk.get_inten_table(1) << 16));
+
+ selector_hash.insert(blk.get_raw_selector_bits());
+ }
+
+ const uint32_t total_unique_endpoints = (uint32_t)endpoint_hash.size();
+ const uint32_t total_unique_selectors = (uint32_t)selector_hash.size();
+
+ if (m_params.m_debug)
+ {
+ debug_printf("Unique endpoints: %u, unique selectors: %u\n", total_unique_endpoints, total_unique_selectors);
+ }
+#endif
+
+ const double total_texels = m_total_blocks * 16.0f;
+
+ int endpoint_clusters = m_params.m_max_endpoint_clusters;
+ int selector_clusters = m_params.m_max_selector_clusters;
+
+ if (endpoint_clusters > basisu_frontend::cMaxEndpointClusters)
+ {
+ error_printf("Too many endpoint clusters! (%u but max is %u)\n", endpoint_clusters, basisu_frontend::cMaxEndpointClusters);
+ return false;
+ }
+ if (selector_clusters > basisu_frontend::cMaxSelectorClusters)
+ {
+ error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters);
+ return false;
+ }
+
+ if (m_params.m_quality_level != -1)
+ {
+ const float quality = saturate(m_params.m_quality_level / 255.0f);
+
+ float color_endpoint_quality = quality;
+ float color_selector_quality = quality;
+
+ const float bits_per_endpoint_cluster = 22.0f;
+ const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f
+ int max_endpoints = static_cast((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster);
+ max_endpoints = clamp(max_endpoints, 256, 3072); //basisu_frontend::cMaxEndpointClusters);
+ max_endpoints = minimum(max_endpoints, m_total_blocks);
+
+ const float mid = 128.0f / 255.0f;
+
+ const float endpoint_split_point = 0.5f;
+ if (color_endpoint_quality <= mid)
+ color_endpoint_quality = lerp(0.0f, endpoint_split_point, powf(color_endpoint_quality / mid, .65f));
+ else
+ color_endpoint_quality = lerp(endpoint_split_point, 1.0f, powf((color_endpoint_quality - mid) / (1.0f - mid), 1.5f));
+
+ if (max_endpoints < 64)
+ max_endpoints = 64;
+ endpoint_clusters = clamp((uint32_t)(.5f + lerp(32, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters);
+
+ float bits_per_selector_cluster = m_params.m_global_sel_pal ? 21.0f : 31.0f;
+
+ const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f
+ int max_selectors = static_cast((max_desired_selector_cluster_bits_per_texel * total_texels) / bits_per_selector_cluster);
+ max_selectors = clamp(max_selectors, 256, basisu_frontend::cMaxSelectorClusters);
+ max_selectors = minimum(max_selectors, m_total_blocks);
+
+ color_selector_quality = powf(color_selector_quality, 1.65f);
+
+ if (max_selectors < 96)
+ max_selectors = 96;
+ selector_clusters = clamp((uint32_t)(.5f + lerp(96, static_cast(max_selectors), color_selector_quality)), 8, basisu_frontend::cMaxSelectorClusters);
+
+ debug_printf("Max endpoints: %u (out of %u), max selectors: %u (out of %u)\n", endpoint_clusters, max_endpoints, selector_clusters, max_selectors);
+
+ if (m_params.m_quality_level >= 223)
+ {
+ if (!m_params.m_selector_rdo_thresh.was_changed())
+ m_params.m_selector_rdo_thresh *= .25f;
+ }
+ else if (m_params.m_quality_level >= 192)
+ {
+ if (!m_params.m_selector_rdo_thresh.was_changed())
+ m_params.m_selector_rdo_thresh *= .5f;
+ }
+ else if (m_params.m_quality_level >= 160)
+ {
+ if (!m_params.m_selector_rdo_thresh.was_changed())
+ m_params.m_selector_rdo_thresh *= .75f;
+ }
+ else if (m_params.m_quality_level >= 129)
+ {
+ float l = (quality - 129 / 255.0f) / ((160 - 129) / 255.0f);
+
+ if (!m_params.m_selector_rdo_thresh.was_changed())
+ m_params.m_selector_rdo_thresh *= lerp(1.0f, .75f, l);
+ }
+ }
+
+ m_auto_global_sel_pal = false;
+ if (!m_params.m_global_sel_pal && !m_params.m_no_auto_global_sel_pal)
+ {
+ const float bits_per_selector_cluster = 31.0f;
+ double selector_codebook_bpp_est = (bits_per_selector_cluster * selector_clusters) / total_texels;
+ debug_printf("selector_codebook_bpp_est: %f\n", selector_codebook_bpp_est);
+ const float force_global_sel_pal_bpp_threshold = .15f;
+ if ((total_texels <= 128.0f*128.0f) && (selector_codebook_bpp_est > force_global_sel_pal_bpp_threshold))
+ {
+ m_auto_global_sel_pal = true;
+ debug_printf("Auto global selector palette enabled\n");
+ }
+ }
+
+ basisu_frontend::params p;
+ p.m_num_source_blocks = m_total_blocks;
+ p.m_pSource_blocks = &m_source_blocks[0];
+ p.m_max_endpoint_clusters = endpoint_clusters;
+ p.m_max_selector_clusters = selector_clusters;
+ p.m_perceptual = m_params.m_perceptual;
+ p.m_endpoint_refinement = !m_params.m_no_endpoint_refinement;
+ p.m_debug_stats = m_params.m_debug;
+ p.m_debug_images = m_params.m_debug_images;
+ p.m_faster = m_params.m_faster;
+
+ if ((m_params.m_global_sel_pal) || (m_auto_global_sel_pal))
+ {
+ p.m_pGlobal_sel_codebook = m_params.m_pSel_codebook;
+ p.m_num_global_sel_codebook_pal_bits = m_params.m_global_pal_bits;
+ p.m_num_global_sel_codebook_mod_bits = m_params.m_global_mod_bits;
+ p.m_use_hybrid_selector_codebooks = !m_params.m_no_hybrid_sel_cb;
+ p.m_hybrid_codebook_quality_thresh = m_params.m_hybrid_sel_cb_quality_thresh;
+ }
+
+ if (!m_frontend.init(p))
+ {
+ error_printf("basisu_frontend::init() failed!\n");
+ return false;
+ }
+
+ m_frontend.compress();
+
+ if (m_params.m_debug_images)
+ {
+ for (uint32_t i = 0; i < m_slice_descs.size(); i++)
+ {
+ char filename[1024];
+#ifdef _WIN32
+ sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i);
+#else
+ snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i);
+#endif
+ m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true);
+
+#ifdef _WIN32
+ sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i);
+#else
+ snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i);
+#endif
+ m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false);
+ }
+ }
+
+ return true;
+ }
+
+ bool basis_compressor::extract_frontend_texture_data()
+ {
+ debug_printf("basis_compressor::extract_frontend_texture_data\n");
+
+ m_frontend_output_textures.resize(m_slice_descs.size());
+ m_best_etc1s_images.resize(m_slice_descs.size());
+ m_best_etc1s_images_unpacked.resize(m_slice_descs.size());
+
+ for (uint32_t i = 0; i < m_slice_descs.size(); i++)
+ {
+ const basisu_backend_slice_desc &slice_desc = m_slice_descs[i];
+
+ const uint32_t num_blocks_x = slice_desc.m_num_blocks_x;
+ const uint32_t num_blocks_y = slice_desc.m_num_blocks_y;
+
+ const uint32_t width = num_blocks_x * 4;
+ const uint32_t height = num_blocks_y * 4;
+
+ m_frontend_output_textures[i].init(cETC1, width, height);
+
+ for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
+ for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
+ memcpy(m_frontend_output_textures[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_output_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block));
+
+#if 0
+ if (m_params.m_debug_images)
+ {
+ char filename[1024];
+ sprintf_s(filename, sizeof(filename), "rdo_etc_frontend_%u_", i);
+ write_etc1_vis_images(m_frontend_output_textures[i], filename);
+ }
+#endif
+
+ m_best_etc1s_images[i].init(cETC1, width, height);
+ for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++)
+ for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++)
+ memcpy(m_best_etc1s_images[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_etc1s_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block));
+
+ m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i]);
+ }
+
+ return true;
+ }
+
+ bool basis_compressor::process_backend()
+ {
+ debug_printf("basis_compressor::process_backend\n");
+
+ basisu_backend_params backend_params;
+ backend_params.m_debug = m_params.m_debug;
+ backend_params.m_debug_images = m_params.m_debug_images;
+ backend_params.m_etc1s = true;
+ if (!m_params.m_no_selector_rdo)
+ backend_params.m_delta_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh;
+
+ backend_params.m_use_global_sel_codebook = (m_frontend.get_params().m_pGlobal_sel_codebook != NULL);
+ backend_params.m_global_sel_codebook_pal_bits = m_frontend.get_params().m_num_global_sel_codebook_pal_bits;
+ backend_params.m_global_sel_codebook_mod_bits = m_frontend.get_params().m_num_global_sel_codebook_mod_bits;
+ backend_params.m_use_hybrid_sel_codebooks = m_frontend.get_params().m_use_hybrid_selector_codebooks;
+
+ m_backend.init(&m_frontend, backend_params, m_slice_descs, m_params.m_pSel_codebook);
+ uint32_t total_packed_bytes = m_backend.encode();
+
+ if (!total_packed_bytes)
+ {
+ error_printf("basis_compressor::encode() failed!\n");
+ return false;
+ }
+
+ debug_printf("Total packed bytes (estimated): %u\n", total_packed_bytes);
+
+ return true;
+ }
+
+ bool basis_compressor::create_basis_file_and_transcode()
+ {
+ debug_printf("basis_compressor::create_basis_file_and_transcode\n");
+
+ const basisu_backend_output &encoded_output = m_backend.get_output();
+
+ if (!m_basis_file.init(encoded_output, 0, 0, m_params.m_y_flip))
+ {
+ error_printf("basis_compressor::write_output_files_and_compute_stats: basisu_backend:init() failed!\n");
+ return false;
+ }
+
+ const uint8_vec &comp_data = m_basis_file.get_compressed_data();
+
+ m_output_basis_file = comp_data;
+
+ // Verify the compressed data by transcoding it to ETC1/BC1 and validating the CRC's.
+ basist::basisu_transcoder decoder(m_params.m_pSel_codebook);
+ if (!decoder.validate_file_checksums(&comp_data[0], (uint32_t)comp_data.size(), true))
+ {
+ error_printf("decoder.validate_file_checksums() failed!\n");
+ return false;
+ }
+
+ m_decoded_output_textures.resize(m_slice_descs.size());
+ m_decoded_output_textures_unpacked.resize(m_slice_descs.size());
+
+ m_decoded_output_textures_bc1.resize(m_slice_descs.size());
+ m_decoded_output_textures_unpacked_bc1.resize(m_slice_descs.size());
+
+ interval_timer tm;
+ tm.start();
+
+ if (!decoder.start_decoding(&comp_data[0], (uint32_t)comp_data.size()))
+ {
+ error_printf("decoder.start_decoding() failed!\n");
+ return false;
+ }
+
+ debug_printf("basisu_comppressor::start_decoding() took %3.3fms\n", tm.get_elapsed_ms());
+
+ uint32_t total_orig_pixels = 0;
+ uint32_t total_texels = 0;
+
+ double total_time_etc1 = 0;
+
+ for (uint32_t i = 0; i < m_slice_descs.size(); i++)
+ {
+ gpu_image decoded_texture;
+ decoded_texture.init(cETC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height);
+
+ tm.start();
+
+ if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i,
+ reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::cETC1, 8))
+ {
+ error_printf("Transcoding failed to ETC1 on slice %u!\n", i);
+ return false;
+ }
+
+ total_time_etc1 += tm.get_elapsed_secs();
+
+ uint32_t image_crc16 = basist::crc16(decoded_texture.get_ptr(), decoded_texture.get_size_in_bytes(), 0);
+ if (image_crc16 != m_backend.get_output().m_slice_image_crcs[i])
+ {
+ error_printf("Decoded image data CRC check failed on slice %u!\n", i);
+ return EXIT_FAILURE;
+ }
+ debug_printf("Decoded image data CRC check succeeded on slice %i\n", i);
+
+ m_decoded_output_textures[i] = decoded_texture;
+
+ total_orig_pixels += m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height;
+ total_texels += m_slice_descs[i].m_width * m_slice_descs[i].m_height;
+ }
+
+ tm.start();
+
+ basist::basisu_transcoder_init();
+
+ debug_printf("basist::basisu_transcoder_init: Took %f ms\n", tm.get_elapsed_ms());
+
+ double total_time_bc1 = 0;
+
+ for (uint32_t i = 0; i < m_slice_descs.size(); i++)
+ {
+ gpu_image decoded_texture;
+ decoded_texture.init(cBC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height);
+
+ tm.start();
+
+ if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i,
+ reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::cBC1, 8))
+ {
+ error_printf("Transcoding failed to BC1 on slice %u!\n", i);
+ return false;
+ }
+
+ total_time_bc1 += tm.get_elapsed_secs();
+
+ m_decoded_output_textures_bc1[i] = decoded_texture;
+ }
+
+ for (uint32_t i = 0; i < m_slice_descs.size(); i++)
+ {
+ m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]);
+ m_decoded_output_textures_bc1[i].unpack(m_decoded_output_textures_unpacked_bc1[i]);
+ }
+
+ debug_printf("Transcoded to ETC1 in %3.3fms, %f texels/sec\n", total_time_etc1 * 1000.0f, total_orig_pixels / total_time_etc1);
+
+ debug_printf("Transcoded to BC1 in %3.3fms, %f texels/sec\n", total_time_bc1 * 1000.0f, total_orig_pixels / total_time_bc1);
+
+ debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels);
+
+ m_output_blocks.resize(0);
+
+ uint32_t total_orig_texels = 0;
+ for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)
+ {
+ const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index];
+
+ total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height;
+
+ const uint32_t total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y;
+
+ assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks);
+
+ memcpy(enlarge_vector(m_output_blocks, total_blocks), m_decoded_output_textures[slice_index].get_ptr(), sizeof(etc_block) * total_blocks);
+ }
+
+ m_basis_file_size = (uint32_t)comp_data.size();
+ m_basis_bits_per_texel = (comp_data.size() * 8.0f) / total_orig_texels;
+
+ return true;
+ }
+
+ bool basis_compressor::write_output_files_and_compute_stats()
+ {
+ debug_printf("basis_compressor::write_output_files_and_compute_stats\n");
+
+ if (m_params.m_write_output_basis_files)
+ {
+ const uint8_vec &comp_data = m_basis_file.get_compressed_data();
+
+ std::string basis_filename(m_params.m_out_filename);
+ string_remove_extension(basis_filename);
+ basis_filename += ".basis";
+
+ if (!write_vec_to_file(basis_filename.c_str(), comp_data))
+ {
+ error_printf("Failed writing output data to file \"%s\"\n", basis_filename.c_str());
+ return false;
+ }
+
+ printf("Wrote output .basis file \"%s\"\n", basis_filename.c_str());
+ }
+
+ m_stats.resize(m_slice_descs.size());
+
+ uint32_t total_orig_texels = 0;
+
+ for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++)
+ {
+ const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index];
+
+ total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height;
+
+ if (m_params.m_compute_stats)
+ {
+ printf("Slice: %u\n", slice_index);
+
+ image_stats &s = m_stats[slice_index];
+
+ image_metrics em;
+
+ // best possible ETC1S stats
+ em.calc(m_source_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0);
+ em.print("Unquantized ETC1S Luma: ");
+
+ s.m_best_luma_psnr = static_cast(em.m_psnr);
+ s.m_best_luma_ssim = static_cast(em.m_ssim);
+
+ em.calc(m_source_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3);
+ em.print("Unquantized ETC1S RGB Avg: ");
+
+ s.m_best_rgb_avg_psnr = static_cast(em.m_psnr);
+
+ // .basis ETC1S stats
+ em.calc(m_source_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0);
+ em.print(".basis ETC1S Luma: ");
+
+ s.m_basis_etc1_luma_psnr = static_cast(em.m_psnr);
+ s.m_basis_etc1_luma_ssim = static_cast(em.m_ssim);
+
+ em.calc(m_source_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3);
+ em.print(".basis ETC1S RGB Avg: ");
+
+ //debug_printf(".basis ETC1 Luma SSIM per bit/texel*1000: %3.3f\n", 1000.0f * s.m_basis_etc1_luma_ssim / ((m_backend.get_output().get_output_size_estimate() * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height)));
+
+ // .basis BC1 stats
+ em.calc(m_source_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 0);
+ em.print(".basis BC1 Luma: ");
+
+ s.m_basis_bc1_luma_psnr = static_cast(em.m_psnr);
+ s.m_basis_bc1_luma_ssim = static_cast(em.m_ssim);
+
+ em.calc(m_source_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 3);
+ em.print(".basis BC1 RGB Avg: ");
+
+ s.m_basis_bc1_rgb_avg_psnr = static_cast(em.m_psnr);
+ }
+
+ if (m_frontend.get_params().m_debug_images)
+ {
+ std::string out_basename;
+ if (m_params.m_out_filename.size())
+ string_get_filename(m_params.m_out_filename.c_str(), out_basename);
+ else if (m_params.m_source_filenames.size())
+ string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename);
+
+ string_remove_extension(out_basename);
+ out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index);
+
+ // Write "best" ETC1S debug images
+ {
+ gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]);
+ best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ write_compressed_texture_file(out_basename + "_best_etc1s.ktx", best_etc1s_gpu_image);
+
+ image best_etc1s_unpacked;
+ best_etc1s_gpu_image.unpack(best_etc1s_unpacked);
+ save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked);
+ }
+
+ // Write decoded ETC1S debug images
+ {
+ gpu_image decoded_etc1s(m_decoded_output_textures[slice_index]);
+ decoded_etc1s.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ write_compressed_texture_file(out_basename + "_decoded_etc1s.ktx", decoded_etc1s);
+
+ image temp(m_decoded_output_textures_unpacked[slice_index]);
+ temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ save_png(out_basename + "_decoded_etc1s.png", temp);
+ }
+
+ // Write decoded BC1 debug images
+ {
+ gpu_image decoded_bc1(m_decoded_output_textures_bc1[slice_index]);
+ decoded_bc1.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ write_compressed_texture_file(out_basename + "_decoded_bc1.ktx", decoded_bc1);
+
+ image temp(m_decoded_output_textures_unpacked_bc1[slice_index]);
+ temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height);
+ save_png(out_basename + "_decoded_bc1.png", temp);
+ }
+ }
+ }
+
+ return true;
+ }
+
+} // namespace basisu
diff --git a/basisu_comp.h b/basisu_comp.h
new file mode 100644
index 0000000..d847fd2
--- /dev/null
+++ b/basisu_comp.h
@@ -0,0 +1,396 @@
+// basisu_comp.h
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "basisu_frontend.h"
+#include "basisu_backend.h"
+#include "basisu_basis_file.h"
+#include "transcoder/basisu_global_selector_palette.h"
+#include "transcoder/basisu_transcoder.h"
+
+#define BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION (16384)
+
+const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f;
+const int BASISU_DEFAULT_QUALITY = 128;
+const float BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH = 2.0f;
+
+const uint32_t BSISU_MAX_IMAGE_DIMENSION = 16384;
+const uint32_t BASISU_QUALITY_MIN = 1;
+const uint32_t BASISU_QUALITY_MAX = 255;
+const uint32_t BASISU_MAX_ENDPOINT_CLUSTERS = 8192;
+const uint32_t BASISU_MAX_SELECTOR_CLUSTERS = 7936;
+
+const uint32_t BASISU_MAX_SLICES = 0xFFFFFF;
+
+namespace basisu
+{
+ struct image_stats
+ {
+ image_stats()
+ {
+ clear();
+ }
+
+ void clear()
+ {
+ m_filename.clear();
+ m_width = 0;
+ m_height = 0;
+
+ m_basis_etc1_rgb_avg_psnr = 0.0f;
+ m_basis_etc1_luma_psnr = 0.0f;
+ m_basis_etc1_luma_ssim = 0.0f;
+ m_basis_bits_per_texel = 0.0f;
+ m_basis_orig_size = 0;
+ m_basis_compressed_size = 0;
+
+ m_best_rgb_avg_psnr = 0.0f;
+ m_best_luma_psnr = 0.0f;
+ m_best_luma_ssim = 0.0f;
+ m_best_bits_per_texel = 0.0f;
+ m_best_orig_size = 0;
+ m_best_compressed_size = 0;
+ }
+
+ std::string m_filename;
+ uint32_t m_width;
+ uint32_t m_height;
+
+ // .basis compressed
+ float m_basis_etc1_rgb_avg_psnr;
+ float m_basis_etc1_luma_psnr;
+ float m_basis_etc1_luma_ssim;
+ float m_basis_bits_per_texel;
+ uint64_t m_basis_orig_size;
+ uint64_t m_basis_compressed_size;
+
+ float m_basis_bc1_rgb_avg_psnr;
+ float m_basis_bc1_luma_psnr;
+ float m_basis_bc1_luma_ssim;
+
+ // Normal (highest quality) compressed (ETC1S, not full ETC1)
+ float m_best_rgb_avg_psnr;
+ float m_best_luma_psnr;
+ float m_best_luma_ssim;
+ float m_best_bits_per_texel;
+ uint64_t m_best_orig_size;
+ uint64_t m_best_compressed_size;
+ };
+
+ template
+ struct bool_param
+ {
+ bool_param() :
+ m_value(def),
+ m_changed(false)
+ {
+ }
+
+ void clear()
+ {
+ m_value = def;
+ m_changed = false;
+ }
+
+ operator bool() const
+ {
+ return m_value;
+ }
+
+ bool operator= (bool v)
+ {
+ m_value = v;
+ m_changed = true;
+ return m_value;
+ }
+
+ bool was_changed() const { return m_changed; }
+ void set_changed(bool flag) { m_changed = flag; }
+
+ bool m_value;
+ bool m_changed;
+ };
+
+ template
+ struct param
+ {
+ param(T def, T min_v, T max_v) :
+ m_value(def),
+ m_def(def),
+ m_min(min_v),
+ m_max(max_v),
+ m_changed(false)
+ {
+ }
+
+ void clear()
+ {
+ m_value = m_def;
+ m_changed = false;
+ }
+
+ operator T() const
+ {
+ return m_value;
+ }
+
+ T operator= (T v)
+ {
+ m_value = clamp(v, m_min, m_max);
+ m_changed = true;
+ return m_value;
+ }
+
+ T operator *= (T v)
+ {
+ m_value *= v;
+ m_changed = true;
+ return m_value;
+ }
+
+ bool was_changed() const { return m_changed; }
+ void set_changed(bool flag) { m_changed = flag; }
+
+ T m_value;
+ T m_def;
+ T m_min;
+ T m_max;
+ bool m_changed;
+ };
+
+ struct basis_compressor_params
+ {
+ basis_compressor_params() :
+ m_hybrid_sel_cb_quality_thresh(BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH, 0.0f, 1e+10f),
+ m_global_pal_bits(8, 0, ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_PAL_BITS),
+ m_global_mod_bits(8, 0, basist::etc1_global_palette_entry_modifier::cTotalBits),
+ m_selector_rdo_thresh(BASISU_DEFAULT_SELECTOR_RDO_THRESH, 0.0f, 1e+10f),
+ m_pSel_codebook(NULL),
+ m_max_endpoint_clusters(512),
+ m_max_selector_clusters(512),
+ m_quality_level(-1),
+ m_mip_scale(1.0f, .000125f, 4.0f),
+ m_mip_smallest_dimension(1, 1, 16384)
+ {
+ clear();
+ }
+
+ void clear()
+ {
+ m_pSel_codebook = NULL;
+
+ m_source_filenames.clear();
+ m_source_alpha_filenames.clear();
+
+ m_source_images.clear();
+
+ m_out_filename.clear();
+
+ m_y_flip.clear();
+ m_debug.clear();
+ m_debug_images.clear();
+ m_global_sel_pal.clear();
+ m_no_auto_global_sel_pal.clear();
+ m_no_endpoint_refinement.clear();
+ m_no_hybrid_sel_cb.clear();
+ m_perceptual.clear();
+ m_no_selector_rdo.clear();
+ m_selector_rdo_thresh.clear();
+ m_read_source_images.clear();
+ m_write_output_basis_files.clear();
+ m_faster.clear();
+ m_compute_stats.clear();
+ m_check_for_alpha.clear();
+ m_force_alpha.clear();
+ m_seperate_rg_to_color_alpha.clear();
+ m_hybrid_sel_cb_quality_thresh.clear();
+ m_global_pal_bits.clear();
+ m_global_mod_bits.clear();
+
+ m_mip_gen.clear();
+ m_mip_scale.clear();
+ m_mip_filter = "kaiser";
+ m_mip_scale = 1.0f;
+ m_mip_srgb.clear();
+ m_mip_premultiplied.clear();
+ m_mip_renormalize.clear();
+ m_mip_wrapping.clear();
+ m_mip_smallest_dimension.clear();
+
+ m_max_endpoint_clusters = 0;
+ m_max_selector_clusters = 0;
+ m_quality_level = -1;
+ }
+
+ // Pointer to the global selector codebook, or nullptr to not use a global selector codebook
+ const basist::etc1_global_selector_codebook *m_pSel_codebook;
+
+ // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read.
+ // Otherwise, the compressor processes the images in m_source_images.
+ std::vector m_source_filenames;
+ std::vector m_source_alpha_filenames;
+
+ std::vector m_source_images;
+ // TODO: Allow caller to supply their own mipmaps
+
+ // Filename of the output basis file
+ std::string m_out_filename;
+
+ // The params are done this way so we can detect when the user has explictly changed them.
+
+ // Flip images across Y axis
+ bool_param m_y_flip;
+
+ // Output debug information during compression
+ bool_param m_debug;
+
+ // m_debug_images is pretty slow
+ bool_param m_debug_images;
+
+ bool_param m_global_sel_pal;
+ bool_param m_no_auto_global_sel_pal;
+
+ // Frontend/backend codec parameters
+ bool_param m_no_endpoint_refinement;
+ bool_param m_no_hybrid_sel_cb;
+
+ // Use perceptual sRGB colorspace metrics (for normal maps, etc.)
+ bool_param m_perceptual;
+
+ // Disable selector RDO, for faster compression but larger files
+ bool_param m_no_selector_rdo;
+ param m_selector_rdo_thresh;
+
+ // Read source images from m_source_filenames/m_source_alpha_filenames
+ bool_param m_read_source_images;
+
+ // Write the output basis file to disk using m_out_filename
+ bool_param m_write_output_basis_files;
+
+ // If true, the compressor disables some optional but slower refinement stages
+ bool_param m_faster;
+
+ // Compute and display image metrics
+ bool_param m_compute_stats;
+
+ // Check to see if any input image has an alpha channel, if so then the output basis file will have alpha channels
+ bool_param m_check_for_alpha;
+
+ // Always put alpha slices in the output basis file, even when the input doesn't have alpha
+ bool_param m_force_alpha;
+
+ // Split the R channel to RGB and the G channel to alpha, then write a basis file with alpha channels
+ bool_param m_seperate_rg_to_color_alpha;
+
+ // Global/hybrid selector codebook parameters
+ param m_hybrid_sel_cb_quality_thresh;
+ param m_global_pal_bits;
+ param m_global_mod_bits;
+
+ // mipmap generation parameters
+ bool_param m_mip_gen;
+ param m_mip_scale;
+ std::string m_mip_filter;
+ bool_param m_mip_srgb;
+ bool_param m_mip_premultiplied; // not currently supported
+ bool_param m_mip_renormalize;
+ bool_param m_mip_wrapping;
+ param m_mip_smallest_dimension;
+
+ // Codebook size (quality) control.
+ // If m_quality_level != -1, it controls the quality level. It ranges from [0,255].
+ // Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly.
+ uint32_t m_max_endpoint_clusters;
+ uint32_t m_max_selector_clusters;
+ int m_quality_level;
+ };
+
+ class basis_compressor
+ {
+ BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basis_compressor);
+
+ public:
+ basis_compressor();
+
+ bool init(const basis_compressor_params ¶ms);
+
+ enum error_code
+ {
+ cECSuccess = 0,
+ cECFailedReadingSourceImages,
+ cECFailedFrontEnd,
+ cECFailedFontendExtract,
+ cECFailedBackend,
+ cECFailedCreateBasisFile,
+ cECFailedWritingOutput
+ };
+
+ error_code process();
+
+ const uint8_vec &get_output_basis_file() const { return m_output_basis_file; }
+ const etc_block_vec &get_output_blocks() const { return m_output_blocks; }
+
+ const std::vector &get_stats() const { return m_stats; }
+
+ uint32_t get_basis_file_size() const { return m_basis_file_size; }
+ double get_basis_bits_per_texel() const { return m_basis_bits_per_texel; }
+
+ private:
+ basis_compressor_params m_params;
+
+ std::vector m_source_images;
+
+ std::vector m_stats;
+
+ uint32_t m_basis_file_size;
+ double m_basis_bits_per_texel;
+
+ basisu_backend_slice_desc_vec m_slice_descs;
+
+ uint32_t m_total_blocks;
+ bool m_auto_global_sel_pal;
+
+ basisu_frontend m_frontend;
+ pixel_block_vec m_source_blocks;
+
+ std::vector m_frontend_output_textures;
+
+ std::vector m_best_etc1s_images;
+ std::vector m_best_etc1s_images_unpacked;
+
+ basisu_backend m_backend;
+
+ basisu_file m_basis_file;
+
+ std::vector m_decoded_output_textures;
+ std::vector m_decoded_output_textures_unpacked;
+ std::vector m_decoded_output_textures_bc1;
+ std::vector m_decoded_output_textures_unpacked_bc1;
+
+ uint8_vec m_output_basis_file;
+ etc_block_vec m_output_blocks;
+
+ bool m_any_source_image_has_alpha;
+
+ bool read_source_images();
+ bool process_frontend();
+ bool extract_frontend_texture_data();
+ bool process_backend();
+ bool create_basis_file_and_transcode();
+ bool write_output_files_and_compute_stats();
+ bool generate_mipmaps(const image &img, std::vector &mips, bool has_alpha);
+ };
+
+} // namespace basisu
+
diff --git a/basisu_enc.cpp b/basisu_enc.cpp
new file mode 100644
index 0000000..3e585ef
--- /dev/null
+++ b/basisu_enc.cpp
@@ -0,0 +1,1128 @@
+// basisu_enc.cpp
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "basisu_enc.h"
+#include "lodepng.h"
+#include "basisu_resampler.h"
+#include "basisu_resampler_filters.h"
+#include "basisu_etc.h"
+#include "transcoder/basisu_transcoder.h"
+
+#if defined(_WIN32)
+// For QueryPerformanceCounter/QueryPerformanceFrequency
+#define WIN32_LEAN_AND_MEAN
+#include
+#endif
+
+namespace basisu
+{
+ uint64_t interval_timer::g_init_ticks, interval_timer::g_freq;
+ double interval_timer::g_timer_freq;
+
+ // Encoder library initialization (just call once at startup)
+ void basisu_encoder_init()
+ {
+ pack_etc1_block_init();
+ basist::basisu_transcoder_init();
+ }
+
+ void error_printf(const char *pFmt, ...)
+ {
+ va_list args;
+ va_start(args, pFmt);
+ vfprintf(stderr, pFmt, args);
+ va_end(args);
+ }
+
+#if defined(_WIN32)
+ inline void query_counter(timer_ticks* pTicks)
+ {
+ QueryPerformanceCounter(reinterpret_cast(pTicks));
+ }
+ inline void query_counter_frequency(timer_ticks* pTicks)
+ {
+ QueryPerformanceFrequency(reinterpret_cast(pTicks));
+ }
+#elif defined(__APPLE__)
+#include
+ inline void query_counter(timer_ticks* pTicks)
+ {
+ struct timeval cur_time;
+ gettimeofday(&cur_time, NULL);
+ *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec);
+ }
+ inline void query_counter_frequency(timer_ticks* pTicks)
+ {
+ *pTicks = 1000000;
+ }
+#elif defined(__GNUC__)
+#include
+ inline void query_counter(timer_ticks* pTicks)
+ {
+ struct timeval cur_time;
+ gettimeofday(&cur_time, NULL);
+ *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec);
+ }
+ inline void query_counter_frequency(timer_ticks* pTicks)
+ {
+ *pTicks = 1000000;
+ }
+#else
+#error TODO
+#endif
+
+ interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false)
+ {
+ if (!g_timer_freq)
+ init();
+ }
+
+ void interval_timer::start()
+ {
+ query_counter(&m_start_time);
+ m_started = true;
+ m_stopped = false;
+ }
+
+ void interval_timer::stop()
+ {
+ assert(m_started);
+ query_counter(&m_stop_time);
+ m_stopped = true;
+ }
+
+ double interval_timer::get_elapsed_secs() const
+ {
+ assert(m_started);
+ if (!m_started)
+ return 0;
+
+ timer_ticks stop_time = m_stop_time;
+ if (!m_stopped)
+ query_counter(&stop_time);
+
+ timer_ticks delta = stop_time - m_start_time;
+ return delta * g_timer_freq;
+ }
+
+ void interval_timer::init()
+ {
+ if (!g_timer_freq)
+ {
+ query_counter_frequency(&g_freq);
+ g_timer_freq = 1.0f / g_freq;
+ query_counter(&g_init_ticks);
+ }
+ }
+
+ timer_ticks interval_timer::get_ticks()
+ {
+ if (!g_timer_freq)
+ init();
+ timer_ticks ticks;
+ query_counter(&ticks);
+ return ticks - g_init_ticks;
+ }
+
+ double interval_timer::ticks_to_secs(timer_ticks ticks)
+ {
+ if (!g_timer_freq)
+ init();
+ return ticks * g_timer_freq;
+ }
+
+ bool load_png(const char* pFilename, image& img)
+ {
+ std::vector buffer;
+ unsigned err = lodepng::load_file(buffer, std::string(pFilename));
+ if (err)
+ return false;
+
+ std::vector out;
+ unsigned w = 0, h = 0;
+
+ err = lodepng::decode(out, w, h, &buffer[0], buffer.size());
+ if ((err != 0) || (!w) || (!h))
+ return false;
+
+ if (out.size() != (w * h * 4))
+ return false;
+
+ img.resize(w, h);
+
+ memcpy(img.get_ptr(), &out[0], out.size());
+
+ return true;
+ }
+
+ bool save_png(const char* pFilename, const image & img, uint32_t image_save_flags, uint32_t grayscale_comp)
+ {
+ if (!img.get_total_pixels())
+ return false;
+
+ std::vector out;
+ unsigned err = 0;
+
+ if (image_save_flags & cImageSaveGrayscale)
+ {
+ uint8_vec g_pixels(img.get_width() * img.get_height());
+ uint8_t *pDst = &g_pixels[0];
+
+ for (uint32_t y = 0; y < img.get_height(); y++)
+ for (uint32_t x = 0; x < img.get_width(); x++)
+ *pDst++ = img(x, y)[grayscale_comp];
+
+ err = lodepng::encode(out, (const uint8_t*)& g_pixels[0], img.get_width(), img.get_height(), LCT_GREY, 8);
+ }
+ else
+ {
+ bool has_alpha = img.has_alpha();
+ if ((!has_alpha) || ((image_save_flags & cImageSaveIgnoreAlpha) != 0))
+ {
+ uint8_vec rgb_pixels(img.get_width() * 3 * img.get_height());
+ uint8_t *pDst = &rgb_pixels[0];
+
+ for (uint32_t y = 0; y < img.get_height(); y++)
+ {
+ for (uint32_t x = 0; x < img.get_width(); x++)
+ {
+ const color_rgba& c = img(x, y);
+ pDst[0] = c.r;
+ pDst[1] = c.g;
+ pDst[2] = c.b;
+ pDst += 3;
+ }
+ }
+
+ err = lodepng::encode(out, (const uint8_t*)& rgb_pixels[0], img.get_width(), img.get_height(), LCT_RGB, 8);
+ }
+ else
+ {
+ err = lodepng::encode(out, (const uint8_t*)img.get_ptr(), img.get_width(), img.get_height(), LCT_RGBA, 8);
+ }
+ }
+
+ err = lodepng::save_file(out, std::string(pFilename));
+ if (err)
+ return false;
+
+ return true;
+ }
+
+ bool read_file_to_vec(const char* pFilename, uint8_vec& data)
+ {
+ FILE* pFile = nullptr;
+#ifdef _WIN32
+ fopen_s(&pFile, pFilename, "rb");
+#else
+ pFile = fopen(pFilename, "rb");
+#endif
+ if (!pFile)
+ return false;
+
+ fseek(pFile, 0, SEEK_END);
+#ifdef _WIN32
+ int64_t filesize = _ftelli64(pFile);
+#else
+ int64_t filesize = ftello(pFile);
+#endif
+ if (filesize < 0)
+ {
+ fclose(pFile);
+ return false;
+ }
+ fseek(pFile, 0, SEEK_SET);
+
+ if (sizeof(size_t) == sizeof(uint32_t))
+ {
+ if (filesize > 0x70000000)
+ {
+ // File might be too big to load safely in one alloc
+ fclose(pFile);
+ return false;
+ }
+ }
+
+ data.resize((size_t)filesize);
+
+ if (filesize)
+ {
+ if (fread(&data[0], 1, (size_t)filesize, pFile) != (size_t)filesize)
+ {
+ fclose(pFile);
+ return false;
+ }
+ }
+
+ fclose(pFile);
+ return true;
+ }
+
+ bool write_data_to_file(const char* pFilename, const void* pData, size_t len)
+ {
+ FILE* pFile = nullptr;
+#ifdef _WIN32
+ fopen_s(&pFile, pFilename, "wb");
+#else
+ pFile = fopen(pFilename, "wb");
+#endif
+ if (!pFile)
+ return false;
+
+ if (len)
+ {
+ if (fwrite(pData, 1, len, pFile) != len)
+ {
+ fclose(pFile);
+ return false;
+ }
+ }
+
+ return fclose(pFile) != EOF;
+ }
+
+ float linear_to_srgb(float l)
+ {
+ assert(l >= 0.0f && l <= 1.0f);
+ if (l < .0031308f)
+ return saturate(l * 12.92f);
+ else
+ return saturate(1.055f * powf(l, 1.0f/2.4f) - .055f);
+ }
+
+ float srgb_to_linear(float s)
+ {
+ assert(s >= 0.0f && s <= 1.0f);
+ if (s < .04045f)
+ return saturate(s * (1.0f/12.92f));
+ else
+ return saturate(powf((s + .055f) * (1.0f/1.055f), 2.4f));
+ }
+
+ bool image_resample(const image &src, image &dst, bool srgb,
+ const char *pFilter, float filter_scale,
+ bool wrapping,
+ uint32_t first_comp, uint32_t num_comps)
+ {
+ assert((first_comp + num_comps) <= 4);
+
+ const int cMaxComps = 4;
+
+ const uint32_t src_w = src.get_width(), src_h = src.get_height();
+ const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height();
+
+ if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION)
+ {
+ printf("Image is too large!\n");
+ return false;
+ }
+
+ if (!src_w || !src_h || !dst_w || !dst_h)
+ return false;
+
+ if ((num_comps < 1) || (num_comps > cMaxComps))
+ return false;
+
+ if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION))
+ {
+ printf("Image is too large!\n");
+ return false;
+ }
+
+ if ((src_w == dst_w) && (src_h == dst_h))
+ {
+ dst = src;
+ return true;
+ }
+
+ float srgb_to_linear_table[256];
+ if (srgb)
+ {
+ for (int i = 0; i < 256; ++i)
+ srgb_to_linear_table[i] = srgb_to_linear((float)i * (1.0f/255.0f));
+ }
+
+ const int LINEAR_TO_SRGB_TABLE_SIZE = 8192;
+ uint8_t linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE];
+
+ if (srgb)
+ {
+ for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)
+ linear_to_srgb_table[i] = (uint8_t)clamp((int)(255.0f * linear_to_srgb((float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1))) + .5f), 0, 255);
+ }
+
+ std::vector samples[cMaxComps];
+ Resampler *resamplers[cMaxComps];
+
+ resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h,
+ wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f,
+ pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0);
+ samples[0].resize(src_w);
+
+ for (uint32_t i = 1; i < num_comps; ++i)
+ {
+ resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h,
+ wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f,
+ pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0);
+ samples[i].resize(src_w);
+ }
+
+ uint32_t dst_y = 0;
+
+ for (uint32_t src_y = 0; src_y < src_h; ++src_y)
+ {
+ const color_rgba *pSrc = &src(0, src_y);
+
+ // Put source lines into resampler(s)
+ for (uint32_t x = 0; x < src_w; ++x)
+ {
+ for (uint32_t c = 0; c < num_comps; ++c)
+ {
+ const uint32_t comp_index = first_comp + c;
+ const uint32_t v = (*pSrc)[comp_index];
+
+ if (!srgb || (comp_index == 3))
+ samples[c][x] = v * (1.0f / 255.0f);
+ else
+ samples[c][x] = srgb_to_linear_table[v];
+ }
+
+ pSrc++;
+ }
+
+ for (uint32_t c = 0; c < num_comps; ++c)
+ {
+ if (!resamplers[c]->put_line(&samples[c][0]))
+ {
+ for (uint32_t i = 0; i < num_comps; i++)
+ delete resamplers[i];
+ return false;
+ }
+ }
+
+ // Now retrieve any output lines
+ for (;;)
+ {
+ uint32_t c;
+ for (c = 0; c < num_comps; ++c)
+ {
+ const uint32_t comp_index = first_comp + c;
+
+ const float *pOutput_samples = resamplers[c]->get_line();
+ if (!pOutput_samples)
+ break;
+
+ const bool linear_flag = !srgb || (comp_index == 3);
+
+ color_rgba *pDst = &dst(0, dst_y);
+
+ for (uint32_t x = 0; x < dst_w; x++)
+ {
+ // TODO: Add dithering
+ if (linear_flag)
+ {
+ int j = (int)(255.0f * pOutput_samples[x] + .5f);
+ (*pDst)[comp_index] = (uint8_t)clamp(j, 0, 255);
+ }
+ else
+ {
+ int j = (int)((LINEAR_TO_SRGB_TABLE_SIZE - 1) * pOutput_samples[x] + .5f);
+ (*pDst)[comp_index] = linear_to_srgb_table[clamp(j, 0, LINEAR_TO_SRGB_TABLE_SIZE - 1)];
+ }
+
+ pDst++;
+ }
+ }
+ if (c < num_comps)
+ break;
+
+ ++dst_y;
+ }
+ }
+
+ for (uint32_t i = 0; i < num_comps; ++i)
+ delete resamplers[i];
+
+ return true;
+ }
+
+ void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms)
+ {
+ // See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen
+ if (!num_syms)
+ return;
+
+ if (1 == num_syms)
+ {
+ A[0].m_key = 1;
+ return;
+ }
+
+ A[0].m_key += A[1].m_key;
+
+ int s = 2, r = 0, next;
+ for (next = 1; next < (num_syms - 1); ++next)
+ {
+ if ((s >= num_syms) || (A[r].m_key < A[s].m_key))
+ {
+ A[next].m_key = A[r].m_key;
+ A[r].m_key = static_cast(next);
+ ++r;
+ }
+ else
+ {
+ A[next].m_key = A[s].m_key;
+ ++s;
+ }
+
+ if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key))
+ {
+ A[next].m_key = static_cast(A[next].m_key + A[r].m_key);
+ A[r].m_key = static_cast(next);
+ ++r;
+ }
+ else
+ {
+ A[next].m_key = static_cast(A[next].m_key + A[s].m_key);
+ ++s;
+ }
+ }
+ A[num_syms - 2].m_key = 0;
+
+ for (next = num_syms - 3; next >= 0; --next)
+ {
+ A[next].m_key = 1 + A[A[next].m_key].m_key;
+ }
+
+ int num_avail = 1, num_used = 0, depth = 0;
+ r = num_syms - 2;
+ next = num_syms - 1;
+ while (num_avail > 0)
+ {
+ for ( ; (r >= 0) && ((int)A[r].m_key == depth); ++num_used, --r )
+ ;
+
+ for ( ; num_avail > num_used; --next, --num_avail)
+ A[next].m_key = static_cast(depth);
+
+ num_avail = 2 * num_used;
+ num_used = 0;
+ ++depth;
+ }
+ }
+
+ void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
+ {
+ int i;
+ uint32_t total = 0;
+ if (code_list_len <= 1)
+ return;
+
+ for (i = max_code_size + 1; i <= cHuffmanMaxSupportedInternalCodeSize; i++)
+ pNum_codes[max_code_size] += pNum_codes[i];
+
+ for (i = max_code_size; i > 0; i--)
+ total += (((uint32_t)pNum_codes[i]) << (max_code_size - i));
+
+ while (total != (1UL << max_code_size))
+ {
+ pNum_codes[max_code_size]--;
+ for (i = max_code_size - 1; i > 0; i--)
+ {
+ if (pNum_codes[i])
+ {
+ pNum_codes[i]--;
+ pNum_codes[i + 1] += 2;
+ break;
+ }
+ }
+
+ total--;
+ }
+ }
+
+ sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1)
+ {
+ uint32_t total_passes = 2, pass_shift, pass, i, hist[256 * 2];
+ sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;
+
+ clear_obj(hist);
+
+ for (i = 0; i < num_syms; i++)
+ {
+ uint32_t freq = pSyms0[i].m_key;
+ hist[freq & 0xFF]++;
+ hist[256 + ((freq >> 8) & 0xFF)]++;
+ }
+
+ while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))
+ total_passes--;
+
+ for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
+ {
+ const uint32_t *pHist = &hist[pass << 8];
+ uint32_t offsets[256], cur_ofs = 0;
+ for (i = 0; i < 256; i++)
+ {
+ offsets[i] = cur_ofs;
+ cur_ofs += pHist[i];
+ }
+
+ for (i = 0; i < num_syms; i++)
+ pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
+
+ sym_freq *t = pCur_syms;
+ pCur_syms = pNew_syms;
+ pNew_syms = t;
+ }
+
+ return pCur_syms;
+ }
+
+ bool huffman_encoding_table::init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size)
+ {
+ if (max_code_size > cHuffmanMaxSupportedCodeSize)
+ return false;
+ if ((!num_syms) || (num_syms > cHuffmanMaxSyms))
+ return false;
+
+ uint32_t total_used_syms = 0;
+ for (uint32_t i = 0; i < num_syms; i++)
+ if (pFreq[i])
+ total_used_syms++;
+
+ if (!total_used_syms)
+ return false;
+
+ std::vector sym_freq0(total_used_syms), sym_freq1(total_used_syms);
+ for (uint32_t i = 0, j = 0; i < num_syms; i++)
+ {
+ if (pFreq[i])
+ {
+ sym_freq0[j].m_key = pFreq[i];
+ sym_freq0[j++].m_sym_index = static_cast(i);
+ }
+ }
+
+ sym_freq *pSym_freq = canonical_huffman_radix_sort_syms(total_used_syms, &sym_freq0[0], &sym_freq1[0]);
+
+ canonical_huffman_calculate_minimum_redundancy(pSym_freq, total_used_syms);
+
+ int num_codes[cHuffmanMaxSupportedInternalCodeSize + 1];
+ clear_obj(num_codes);
+
+ for (uint32_t i = 0; i < total_used_syms; i++)
+ {
+ if (pSym_freq[i].m_key > cHuffmanMaxSupportedInternalCodeSize)
+ return false;
+
+ num_codes[pSym_freq[i].m_key]++;
+ }
+
+ canonical_huffman_enforce_max_code_size(num_codes, total_used_syms, max_code_size);
+
+ m_code_sizes.resize(0);
+ m_code_sizes.resize(num_syms);
+
+ m_codes.resize(0);
+ m_codes.resize(num_syms);
+
+ for (uint32_t i = 1, j = total_used_syms; i <= max_code_size; i++)
+ for (uint32_t l = num_codes[i]; l > 0; l--)
+ m_code_sizes[pSym_freq[--j].m_sym_index] = static_cast(i);
+
+ uint32_t next_code[cHuffmanMaxSupportedInternalCodeSize + 1];
+
+ next_code[1] = 0;
+ for (uint32_t j = 0, i = 2; i <= max_code_size; i++)
+ next_code[i] = j = ((j + num_codes[i - 1]) << 1);
+
+ for (uint32_t i = 0; i < num_syms; i++)
+ {
+ uint32_t rev_code = 0, code, code_size;
+ if ((code_size = m_code_sizes[i]) == 0)
+ continue;
+ if (code_size > cHuffmanMaxSupportedInternalCodeSize)
+ return false;
+ code = next_code[code_size]++;
+ for (uint32_t l = code_size; l > 0; l--, code >>= 1)
+ rev_code = (rev_code << 1) | (code & 1);
+ m_codes[i] = static_cast(rev_code);
+ }
+
+ return true;
+ }
+
+ bool huffman_encoding_table::init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size)
+ {
+ if ((!num_syms) || (num_syms > cHuffmanMaxSyms))
+ return false;
+
+ uint16_vec sym_freq(num_syms);
+
+ uint32_t max_freq = 0;
+ for (uint32_t i = 0; i < num_syms; i++)
+ max_freq = maximum(max_freq, pSym_freq[i]);
+
+ if (max_freq < UINT16_MAX)
+ {
+ for (uint32_t i = 0; i < num_syms; i++)
+ sym_freq[i] = static_cast(pSym_freq[i]);
+ }
+ else
+ {
+ for (uint32_t i = 0; i < num_syms; i++)
+ if (pSym_freq[i])
+ sym_freq[i] = static_cast(maximum((pSym_freq[i] * 65534U + (max_freq >> 1)) / max_freq, 1));
+ }
+
+ return init(num_syms, &sym_freq[0], max_code_size);
+ }
+
+ void bitwise_coder::end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len)
+ {
+ if (run_size)
+ {
+ if (run_size < cHuffmanSmallRepeatSizeMin)
+ {
+ while (run_size--)
+ syms.push_back(static_cast(len));
+ }
+ else if (run_size <= cHuffmanSmallRepeatSizeMax)
+ {
+ syms.push_back(static_cast(cHuffmanSmallRepeatCode | ((run_size - cHuffmanSmallRepeatSizeMin) << 6)));
+ }
+ else
+ {
+ assert((run_size >= cHuffmanBigRepeatSizeMin) && (run_size <= cHuffmanBigRepeatSizeMax));
+ syms.push_back(static_cast(cHuffmanBigRepeatCode | ((run_size - cHuffmanBigRepeatSizeMin) << 6)));
+ }
+ }
+
+ run_size = 0;
+ }
+
+ void bitwise_coder::end_zero_run(uint16_vec &syms, uint32_t &run_size)
+ {
+ if (run_size)
+ {
+ if (run_size < cHuffmanSmallZeroRunSizeMin)
+ {
+ while (run_size--)
+ syms.push_back(0);
+ }
+ else if (run_size <= cHuffmanSmallZeroRunSizeMax)
+ {
+ syms.push_back(static_cast(cHuffmanSmallZeroRunCode | ((run_size - cHuffmanSmallZeroRunSizeMin) << 6)));
+ }
+ else
+ {
+ assert((run_size >= cHuffmanBigZeroRunSizeMin) && (run_size <= cHuffmanBigZeroRunSizeMax));
+ syms.push_back(static_cast(cHuffmanBigZeroRunCode | ((run_size - cHuffmanBigZeroRunSizeMin) << 6)));
+ }
+ }
+
+ run_size = 0;
+ }
+
+ uint32_t bitwise_coder::emit_huffman_table(const huffman_encoding_table &tab)
+ {
+ const uint64_t start_bits = m_total_bits;
+
+ const uint8_vec &code_sizes = tab.get_code_sizes();
+
+ uint32_t total_used = tab.get_total_used_codes();
+ put_bits(total_used, cHuffmanMaxSymsLog2);
+
+ if (!total_used)
+ return 0;
+
+ uint16_vec syms;
+ syms.reserve(total_used + 16);
+
+ uint32_t prev_code_len = UINT_MAX, zero_run_size = 0, nonzero_run_size = 0;
+
+ for (uint32_t i = 0; i <= total_used; ++i)
+ {
+ const uint32_t code_len = (i == total_used) ? 0xFF : code_sizes[i];
+ assert((code_len == 0xFF) || (code_len <= 16));
+
+ if (code_len)
+ {
+ end_zero_run(syms, zero_run_size);
+
+ if (code_len != prev_code_len)
+ {
+ end_nonzero_run(syms, nonzero_run_size, prev_code_len);
+ if (code_len != 0xFF)
+ syms.push_back(static_cast(code_len));
+ }
+ else if (++nonzero_run_size == cHuffmanBigRepeatSizeMax)
+ end_nonzero_run(syms, nonzero_run_size, prev_code_len);
+ }
+ else
+ {
+ end_nonzero_run(syms, nonzero_run_size, prev_code_len);
+
+ if (++zero_run_size == cHuffmanBigZeroRunSizeMax)
+ end_zero_run(syms, zero_run_size);
+ }
+
+ prev_code_len = code_len;
+ }
+
+ histogram h(cHuffmanTotalCodelengthCodes);
+ for (uint32_t i = 0; i < syms.size(); i++)
+ h.inc(syms[i] & 63);
+
+ huffman_encoding_table ct;
+ if (!ct.init(h, 7))
+ return 0;
+
+ assert(cHuffmanTotalSortedCodelengthCodes == cHuffmanTotalCodelengthCodes);
+
+ uint32_t total_codelength_codes;
+ for (total_codelength_codes = cHuffmanTotalSortedCodelengthCodes; total_codelength_codes > 0; total_codelength_codes--)
+ if (ct.get_code_sizes()[g_huffman_sorted_codelength_codes[total_codelength_codes - 1]])
+ break;
+
+ assert(total_codelength_codes);
+
+ put_bits(total_codelength_codes, 5);
+ for (uint32_t i = 0; i < total_codelength_codes; i++)
+ put_bits(ct.get_code_sizes()[g_huffman_sorted_codelength_codes[i]], 3);
+
+ for (uint32_t i = 0; i < syms.size(); ++i)
+ {
+ const uint32_t l = syms[i] & 63, e = syms[i] >> 6;
+
+ put_code(l, ct);
+
+ if (l == cHuffmanSmallZeroRunCode)
+ put_bits(e, cHuffmanSmallZeroRunExtraBits);
+ else if (l == cHuffmanBigZeroRunCode)
+ put_bits(e, cHuffmanBigZeroRunExtraBits);
+ else if (l == cHuffmanSmallRepeatCode)
+ put_bits(e, cHuffmanSmallRepeatExtraBits);
+ else if (l == cHuffmanBigRepeatCode)
+ put_bits(e, cHuffmanBigRepeatExtraBits);
+ }
+
+ return (uint32_t)(m_total_bits - start_bits);
+ }
+
+ bool huffman_test(int rand_seed)
+ {
+ histogram h(19);
+
+ // Feed in a fibonacci sequence to force large codesizes
+ h[0] += 1; h[1] += 1; h[2] += 2; h[3] += 3;
+ h[4] += 5; h[5] += 8; h[6] += 13; h[7] += 21;
+ h[8] += 34; h[9] += 55; h[10] += 89; h[11] += 144;
+ h[12] += 233; h[13] += 377; h[14] += 610; h[15] += 987;
+ h[16] += 1597; h[17] += 2584; h[18] += 4181;
+
+ huffman_encoding_table etab;
+ etab.init(h, 16);
+
+ {
+ bitwise_coder c;
+ c.init(1024);
+
+ c.emit_huffman_table(etab);
+ for (int i = 0; i < 19; i++)
+ c.put_code(i, etab);
+
+ c.flush();
+
+ basist::bitwise_decoder d;
+ d.init(&c.get_bytes()[0], static_cast(c.get_bytes().size()));
+
+ basist::huffman_decoding_table dtab;
+ bool success = d.read_huffman_table(dtab);
+ if (!success)
+ {
+ assert(0);
+ printf("Failure 5\n");
+ return false;
+ }
+
+ for (uint32_t i = 0; i < 19; i++)
+ {
+ uint32_t s = d.decode_huffman(dtab);
+ if (s != i)
+ {
+ assert(0);
+ printf("Failure 5\n");
+ return false;
+ }
+ }
+ }
+
+ basisu::rand r;
+ r.seed(rand_seed);
+
+ for (int iter = 0; iter < 500000; iter++)
+ {
+ printf("%u\n", iter);
+
+ uint32_t max_sym = r.irand(0, 8193);
+ uint32_t num_codes = r.irand(1, 10000);
+ uint_vec syms(num_codes);
+
+ for (uint32_t i = 0; i < num_codes; i++)
+ {
+ if (r.bit())
+ syms[i] = r.irand(0, max_sym);
+ else
+ {
+ int s = (int)(r.gaussian((float)max_sym / 2, (float)maximum(1, max_sym / 2)) + .5f);
+ s = basisu::clamp(s, 0, max_sym);
+
+ syms[i] = s;
+ }
+
+ }
+
+ histogram h1(max_sym + 1);
+ for (uint32_t i = 0; i < num_codes; i++)
+ h1[syms[i]]++;
+
+ huffman_encoding_table etab2;
+ if (!etab2.init(h1, 16))
+ {
+ assert(0);
+ printf("Failed 0\n");
+ return false;
+ }
+
+ bitwise_coder c;
+ c.init(1024);
+
+ c.emit_huffman_table(etab2);
+
+ for (uint32_t i = 0; i < num_codes; i++)
+ c.put_code(syms[i], etab2);
+
+ c.flush();
+
+ basist::bitwise_decoder d;
+ d.init(&c.get_bytes()[0], (uint32_t)c.get_bytes().size());
+
+ basist::huffman_decoding_table dtab;
+ bool success = d.read_huffman_table(dtab);
+ if (!success)
+ {
+ assert(0);
+ printf("Failed 2\n");
+ return false;
+ }
+
+ for (uint32_t i = 0; i < num_codes; i++)
+ {
+ uint32_t s = d.decode_huffman(dtab);
+ if (s != syms[i])
+ {
+ assert(0);
+ printf("Failed 4\n");
+ return false;
+ }
+ }
+
+ }
+ return true;
+ }
+
+ void palette_index_reorderer::init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)
+ {
+ assert((num_syms > 0) && (num_indices > 0));
+ assert((dist_func_weight >= 0.0f) && (dist_func_weight <= 1.0f));
+
+ clear();
+
+ m_remap_table.resize(num_syms);
+ m_entries_picked.reserve(num_syms);
+ m_total_count_to_picked.resize(num_syms);
+
+ if (num_indices <= 1)
+ return;
+
+ prepare_hist(num_syms, num_indices, pIndices);
+ find_initial(num_syms);
+
+ while (m_entries_to_do.size())
+ {
+ // Find the best entry to move into the picked list.
+ uint32_t best_entry;
+ double best_count;
+ find_next_entry(best_entry, best_count, pDist_func, pCtx, dist_func_weight);
+
+ // We now have chosen an entry to place in the picked list, now determine which side it goes on.
+ const uint32_t entry_to_move = m_entries_to_do[best_entry];
+
+ float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight);
+
+ // Put entry_to_move either on the "left" or "right" side of the picked entries
+ if (side <= 0)
+ m_entries_picked.push_back(entry_to_move);
+ else
+ m_entries_picked.insert(m_entries_picked.begin(), entry_to_move);
+
+ // Erase best_entry from the todo list
+ m_entries_to_do.erase(m_entries_to_do.begin() + best_entry);
+
+ // We've just moved best_entry to the picked list, so now we need to update m_total_count_to_picked[] to factor the additional count to best_entry
+ for (uint32_t i = 0; i < m_entries_to_do.size(); i++)
+ m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], entry_to_move, num_syms);
+ }
+
+ for (uint32_t i = 0; i < num_syms; i++)
+ m_remap_table[m_entries_picked[i]] = i;
+ }
+
+ void palette_index_reorderer::prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices)
+ {
+ m_hist.resize(0);
+ m_hist.resize(num_syms * num_syms);
+
+ for (uint32_t i = 0; i < num_indices; i++)
+ {
+ const uint32_t idx = pIndices[i];
+ inc_hist(idx, (i < (num_indices - 1)) ? pIndices[i + 1] : -1, num_syms);
+ inc_hist(idx, (i > 0) ? pIndices[i - 1] : -1, num_syms);
+ }
+ }
+
+ void palette_index_reorderer::find_initial(uint32_t num_syms)
+ {
+ uint32_t max_count = 0, max_index = 0;
+ for (uint32_t i = 0; i < num_syms * num_syms; i++)
+ if (m_hist[i] > max_count)
+ max_count = m_hist[i], max_index = i;
+
+ uint32_t a = max_index / num_syms, b = max_index % num_syms;
+
+ m_entries_picked.push_back(a);
+ m_entries_picked.push_back(b);
+
+ for (uint32_t i = 0; i < num_syms; i++)
+ if ((i != b) && (i != a))
+ m_entries_to_do.push_back(i);
+
+ for (uint32_t i = 0; i < m_entries_to_do.size(); i++)
+ for (uint32_t j = 0; j < m_entries_picked.size(); j++)
+ m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], m_entries_picked[j], num_syms);
+ }
+
+ void palette_index_reorderer::find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)
+ {
+ best_entry = 0;
+ best_count = 0;
+
+ for (uint32_t i = 0; i < m_entries_to_do.size(); i++)
+ {
+ const uint32_t u = m_entries_to_do[i];
+ double total_count = m_total_count_to_picked[u];
+
+ if (pDist_func)
+ {
+ float w = maximum((*pDist_func)(u, m_entries_picked.front(), pCtx), (*pDist_func)(u, m_entries_picked.back(), pCtx));
+ assert((w >= 0.0f) && (w <= 1.0f));
+ total_count = (total_count + 1.0f) * lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, w);
+ }
+
+ if (total_count <= best_count)
+ continue;
+
+ best_entry = i;
+ best_count = total_count;
+ }
+ }
+
+ float palette_index_reorderer::pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight)
+ {
+ float which_side = 0;
+
+ int l_count = 0, r_count = 0;
+ for (uint32_t j = 0; j < m_entries_picked.size(); j++)
+ {
+ const int count = get_hist(entry_to_move, m_entries_picked[j], num_syms), r = ((int)m_entries_picked.size() + 1 - 2 * (j + 1));
+ which_side += static_cast(r * count);
+ if (r >= 0)
+ l_count += r * count;
+ else
+ r_count += -r * count;
+ }
+
+ if (pDist_func)
+ {
+ float w_left = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.front(), pCtx));
+ float w_right = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.back(), pCtx));
+ which_side = w_left * l_count - w_right * r_count;
+ }
+ return which_side;
+ }
+
+ void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error)
+ {
+ assert((first_chan < 4U) && (first_chan + total_chans <= 4U));
+
+ const uint32_t width = std::min(a.get_width(), b.get_width());
+ const uint32_t height = std::min(a.get_height(), b.get_height());
+
+ double hist[256];
+ clear_obj(hist);
+
+ for (uint32_t y = 0; y < height; y++)
+ {
+ for (uint32_t x = 0; x < width; x++)
+ {
+ const color_rgba &ca = a(x, y), &cb = b(x, y);
+
+ if (total_chans)
+ {
+ for (uint32_t c = 0; c < total_chans; c++)
+ hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++;
+ }
+ else
+ {
+ hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++;
+ }
+ }
+ }
+
+ m_max = 0;
+ double sum = 0.0f, sum2 = 0.0f;
+ for (uint32_t i = 0; i < 256; i++)
+ {
+ if (hist[i])
+ {
+ m_max = std::max(m_max, (float)i);
+ double v = i * hist[i];
+ sum += v;
+ sum2 += i * v;
+ }
+ }
+
+ double total_values = (double)width * (double)height;
+ if (avg_comp_error)
+ total_values *= (double)clamp(total_chans, 1, 4);
+
+ m_mean = (float)clamp(sum / total_values, 0.0f, 255.0);
+ m_mean_squared = (float)clamp(sum2 / total_values, 0.0f, 255.0 * 255.0);
+ m_rms = (float)sqrt(m_mean_squared);
+ m_psnr = m_rms ? (float)clamp(log10(255.0 / m_rms) * 20.0, 0.0f, 300.0f) : 1e+10f;
+ }
+
+} // namespace basisu
diff --git a/basisu_enc.h b/basisu_enc.h
new file mode 100644
index 0000000..4e558ea
--- /dev/null
+++ b/basisu_enc.h
@@ -0,0 +1,2244 @@
+// basisu_enc.h
+// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "transcoder/basisu.h"
+#include "basisu_enc.h"
+#include "transcoder/basisu_transcoder_internal.h"
+
+#ifndef _WIN32
+#include
+#endif
+
+namespace basisu
+{
+ // Encoder library initialization
+ void basisu_encoder_init();
+
+ void error_printf(const char *pFmt, ...);
+
+ // Linear algebra
+
+ template
+ class vec
+ {
+ protected:
+ T m_v[N];
+
+ public:
+ enum { num_elements = N };
+
+ inline vec() { }
+ inline vec(eZero) { set_zero(); }
+
+ explicit inline vec(T val) { set(val); }
+ inline vec(T v0, T v1) { set(v0, v1); }
+ inline vec(T v0, T v1, T v2) { set(v0, v1, v2); }
+ inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); }
+ inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; }
+ template inline vec(const vec &other) { set(other); }
+
+ inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; }
+ inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; }
+
+ inline T getX() const { return m_v[0]; }
+ inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; }
+ inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; }
+ inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; }
+
+ inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; }
+ inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }
+
+ inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
+
+ template
+ inline vec &set(const vec &other)
+ {
+ uint32_t i;
+ if (static_cast(&other) == static_cast(this))
+ return *this;
+ const uint32_t m = minimum(OtherN, N);
+ for (i = 0; i < m; i++)
+ m_v[i] = static_cast(other[i]);
+ for (; i < N; i++)
+ m_v[i] = 0;
+ return *this;
+ }
+
+ inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; }
+ inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; }
+ inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; }
+
+ inline vec &set(T v0, T v1)
+ {
+ m_v[0] = v0;
+ if (N >= 2)
+ {
+ m_v[1] = v1;
+ clear_elements(2, N);
+ }
+ return *this;
+ }
+
+ inline vec &set(T v0, T v1, T v2)
+ {
+ m_v[0] = v0;
+ if (N >= 2)
+ {
+ m_v[1] = v1;
+ if (N >= 3)
+ {
+ m_v[2] = v2;
+ clear_elements(3, N);
+ }
+ }
+ return *this;
+ }
+
+ inline vec &set(T v0, T v1, T v2, T v3)
+ {
+ m_v[0] = v0;
+ if (N >= 2)
+ {
+ m_v[1] = v1;
+ if (N >= 3)
+ {
+ m_v[2] = v2;
+
+ if (N >= 4)
+ {
+ m_v[3] = v3;
+ clear_elements(5, N);
+ }
+ }
+ }
+ return *this;
+ }
+
+ inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; }
+ template inline vec &operator=(const vec &rhs) { set(rhs); return *this; }
+
+ inline const T *get_ptr() const { return reinterpret_cast(&m_v[0]); }
+ inline T *get_ptr() { return reinterpret_cast(&m_v[0]); }
+
+ inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; }
+ inline vec operator+ () const { return *this; }
+ inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; }
+ inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; }
+ inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; }
+ inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; }
+ inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; }
+ inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; }
+
+ friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; }
+ friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; }
+ friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; }
+ friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; }
+ friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; }
+ friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; }
+
+ static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; }
+
+ inline T dot(const vec &rhs) const { return dot_product(*this, rhs); }
+
+ inline T norm() const { return dot_product(*this, *this); }
+ inline T length() const { return sqrt(norm()); }
+
+ inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; }
+
+ inline T distance(const vec &other) const { return squared_distance(other); }
+
+ inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }
+
+ inline vec &clamp(T l, T h)
+ {
+ for (uint32_t i = 0; i < N; i++)
+ m_v[i] = basisu::clamp(m_v[i], l, h);
+ return *this;
+ }
+ };
+
+ typedef vec<4, double> vec4D;
+ typedef vec<3, double> vec3D;
+ typedef vec<2, double> vec2D;
+ typedef vec<1, double> vec1D;
+
+ typedef vec<4, float> vec4F;
+ typedef vec<3, float> vec3F;
+ typedef vec<2, float> vec2F;
+ typedef vec<1, float> vec1F;
+
+ template
+ class matrix
+ {
+ public:
+ typedef vec col_vec;
+ typedef vec row_vec;
+
+ typedef T scalar_type;
+
+ enum { rows = Rows, cols = Cols };
+
+ protected:
+ row_vec m_r[Rows];
+
+ public:
+ inline matrix() {}
+ inline matrix(eZero) { set_zero(); }
+ inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; }
+ inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; }
+
+ inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
+ inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; }
+
+ inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; }
+ inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; }
+
+ inline matrix &set_zero()
+ {
+ for (uint32_t i = 0; i < Rows; i++)
+ m_r[i].set_zero();
+ return *this;
+ }
+
+ inline matrix &set_identity()
+ {
+ for (uint32_t i = 0; i < Rows; i++)
+ {
+ m_r[i].set_zero();
+ if (i < Cols)
+ m_r[i][i] = 1.0f;
+ }
+ return *this;
+ }
+ };
+
+ template
+ inline VectorType compute_pca_from_covar(matrix &cmatrix)
+ {
+ VectorType axis;
+ if (N == 1)
+ axis.set(1.0f);
+ else
+ {
+ for (uint32_t i = 0; i < N; i++)
+ axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum(N - 1, 1)));
+ }
+
+ VectorType prev_axis(axis);
+
+ // Power iterations
+ for (uint32_t power_iter = 0; power_iter < 8; power_iter++)
+ {
+ VectorType trial_axis;
+ double max_sum = 0;
+
+ for (uint32_t i = 0; i < N; i++)
+ {
+ double sum = 0;
+ for (uint32_t j = 0; j < N; j++)
+ sum += cmatrix[i][j] * axis[j];
+
+ trial_axis[i] = static_cast(sum);
+
+ max_sum = maximum(fabs(sum), max_sum);
+ }
+
+ if (max_sum != 0.0f)
+ trial_axis *= static_cast(1.0f / max_sum);
+
+ VectorType delta_axis(prev_axis - trial_axis);
+
+ prev_axis = axis;
+ axis = trial_axis;
+
+ if (delta_axis.norm() < .0024f)
+ break;
+ }
+
+ return axis.normalize_in_place();
+ }
+
+ template inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys)
+ {
+ for (uint32_t i = 0; i < num_indices; i++)
+ pIndices[i] = i;
+
+ std::sort(
+ pIndices,
+ pIndices + num_indices,
+ [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; }
+ );
+ }
+
+ // Simple 32-bit color class
+
+ class color_rgba_i16
+ {
+ public:
+ union
+ {
+ int16_t m_comps[4];
+
+ struct
+ {
+ int16_t r;
+ int16_t g;
+ int16_t b;
+ int16_t a;
+ };
+ };
+
+ inline color_rgba_i16()
+ {
+ static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4");
+ }
+
+ inline color_rgba_i16(int sr, int sg, int sb, int sa)
+ {
+ set(sr, sg, sb, sa);
+ }
+
+ inline color_rgba_i16 &set(int sr, int sg, int sb, int sa)
+ {
+ m_comps[0] = (int16_t)clamp(sr, INT16_MIN, INT16_MAX);
+ m_comps[1] = (int16_t)clamp(sg, INT16_MIN, INT16_MAX);
+ m_comps[2] = (int16_t)clamp(sb, INT16_MIN, INT16_MAX);
+ m_comps[3] = (int16_t)clamp(sa, INT16_MIN, INT16_MAX);
+ return *this;
+ }
+ };
+
+ class color_rgba
+ {
+ public:
+ union
+ {
+ uint8_t m_comps[4];
+
+ struct
+ {
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+ uint8_t a;
+ };
+ };
+
+ inline color_rgba()
+ {
+ static_assert(sizeof(*this) == 4, "sizeof(*this) != 4");
+ }
+
+ inline color_rgba(int y)
+ {
+ set(y);
+ }
+
+ inline color_rgba(int y, int na)
+ {
+ set(y, na);
+ }
+
+ inline color_rgba(int sr, int sg, int sb, int sa)
+ {
+ set(sr, sg, sb, sa);
+ }
+
+ inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa)
+ {
+ set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa);
+ }
+
+ inline color_rgba& set_noclamp_y(int y)
+ {
+ m_comps[0] = (uint8_t)y;
+ m_comps[1] = (uint8_t)y;
+ m_comps[2] = (uint8_t)y;
+ m_comps[3] = (uint8_t)255;
+ return *this;
+ }
+
+ inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa)
+ {
+ m_comps[0] = (uint8_t)sr;
+ m_comps[1] = (uint8_t)sg;
+ m_comps[2] = (uint8_t)sb;
+ m_comps[3] = (uint8_t)sa;
+ return *this;
+ }
+
+ inline color_rgba &set(int y)
+ {
+ m_comps[0] = static_cast(clamp(y, 0, 255));
+ m_comps[1] = m_comps[0];
+ m_comps[2] = m_comps[0];
+ m_comps[3] = 255;
+ return *this;
+ }
+
+ inline color_rgba &set(int y, int na)
+ {
+ m_comps[0] = static_cast(clamp(y, 0, 255));
+ m_comps[1] = m_comps[0];
+ m_comps[2] = m_comps[0];
+ m_comps[3] = static_cast(clamp(na, 0, 255));
+ return *this;
+ }
+
+ inline color_rgba &set(int sr, int sg, int sb, int sa)
+ {
+ m_comps[0] = static_cast(clamp(sr, 0, 255));
+ m_comps[1] = static_cast(clamp(sg, 0, 255));
+ m_comps[2] = static_cast(clamp(sb, 0, 255));
+ m_comps[3] = static_cast(clamp(sa, 0, 255));
+ return *this;
+ }
+
+ inline color_rgba &set_rgb(int sr, int sg, int sb)
+ {
+ m_comps[0] = static_cast(clamp(sr, 0, 255));
+ m_comps[1] = static_cast(clamp(sg, 0, 255));
+ m_comps[2] = static_cast(clamp(sb, 0, 255));
+ return *this;
+ }
+
+ inline color_rgba &set_rgb(const color_rgba &other)
+ {
+ r = other.r;
+ g = other.g;
+ b = other.b;
+ return *this;
+ }
+
+ inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; }
+ inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; }
+
+ inline void clear()
+ {
+ m_comps[0] = 0;
+ m_comps[1] = 0;
+ m_comps[2] = 0;
+ m_comps[3] = 0;
+ }
+
+ inline bool operator== (const color_rgba &rhs) const
+ {
+ if (m_comps[0] != rhs.m_comps[0]) return false;
+ if (m_comps[1] != rhs.m_comps[1]) return false;
+ if (m_comps[2] != rhs.m_comps[2]) return false;
+ if (m_comps[3] != rhs.m_comps[3]) return false;
+ return true;
+ }
+
+ inline bool operator!= (const color_rgba &rhs) const
+ {
+ return !(*this == rhs);
+ }
+
+ inline bool operator<(const color_rgba &rhs) const
+ {
+ for (int i = 0; i < 4; i++)
+ {
+ if (m_comps[i] < rhs.m_comps[i])
+ return true;
+ else if (m_comps[i] != rhs.m_comps[i])
+ return false;
+ }
+ return false;
+ }
+
+ inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; }
+ };
+
+ typedef std::vector color_rgba_vec;
+
+ const color_rgba g_black_color(0, 0, 0, 255);
+ const color_rgba g_white_color(255, 255, 255, 255);
+
+ inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1)
+ {
+ int dr = r0 - r1, dg = g0 - g1, db = b0 - b1;
+ return dr * dr + dg * dg + db * db;
+ }
+
+ inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1)
+ {
+ int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1;
+ return dr * dr + dg * dg + db * db + da * da;
+ }
+
+ inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha)
+ {
+ if (alpha)
+ return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a);
+ else
+ return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b);
+ }
+
+ // TODO: Allow user to control channel weightings.
+ inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha)
+ {
+ if (perceptual)
+ {
+ const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f;
+ const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f;
+
+ const float cr1 = e1.r - l1;
+ const float cr2 = e2.r - l2;
+
+ const float cb1 = e1.b - l1;
+ const float cb2 = e2.b - l2;
+
+ const float dl = l1 - l2;
+ const float dcr = cr1 - cr2;
+ const float dcb = cb1 - cb2;
+
+ uint32_t d = static_cast(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb);
+
+ if (alpha)
+ {
+ int da = static_cast(e1.a) - static_cast(e2.a);
+ d += static_cast(128.0f*da*da);
+ }
+
+ return d;
+ }
+ else
+ return color_distance(e1, e2, alpha);
+ }
+
+ // String helpers
+
+ inline int string_find_right(const std::string& filename, char c)
+ {
+ size_t result = filename.find_last_of(c);
+ return (result == std::string::npos) ? -1 : (int)result;
+ }
+
+ inline std::string string_get_extension(const std::string &filename)
+ {
+ int sep = -1;
+#ifdef _WIN32
+ sep = string_find_right(filename, '\\');
+#endif
+ if (sep < 0)
+ sep = string_find_right(filename, '/');
+
+ int dot = string_find_right(filename, '.');
+ if (dot <= sep)
+ return "";
+
+ std::string result(filename);
+ result.erase(0, dot + 1);
+
+ return result;
+ }
+
+ inline bool string_remove_extension(std::string &filename)
+ {
+ int sep = -1;
+#ifdef _WIN32
+ sep = string_find_right(filename, '\\');
+#endif
+ if (sep < 0)
+ sep = string_find_right(filename, '/');
+
+ int dot = string_find_right(filename, '.');
+ if ((dot < sep) || (dot < 0))
+ return false;
+
+ filename.resize(dot);
+
+ return true;
+ }
+
+ inline std::string string_format(const char* pFmt, ...)
+ {
+ char buf[2048];
+
+ va_list args;
+ va_start(args, pFmt);
+#ifdef _WIN32
+ vsprintf_s(buf, sizeof(buf), pFmt, args);
+#else
+ vsnprintf(buf, sizeof(buf), pFmt, args);
+#endif
+ va_end(args);
+
+ return std::string(buf);
+ }
+
+ inline std::string string_tolower(const std::string& s)
+ {
+ std::string result(s);
+ for (size_t i = 0; i < result.size(); i++)
+ result[i] = (char)tolower((int)result[i]);
+ return result;
+ }
+
+ inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc)
+ {
+ assert(pDst && pSrc && dst_len);
+ if (!dst_len)
+ return pDst;
+
+ const size_t src_len = strlen(pSrc);
+ const size_t src_len_plus_terminator = src_len + 1;
+
+ if (src_len_plus_terminator <= dst_len)
+ memcpy(pDst, pSrc, src_len_plus_terminator);
+ else
+ {
+ if (dst_len > 1)
+ memcpy(pDst, pSrc, dst_len - 1);
+ pDst[dst_len - 1] = '\0';
+ }
+
+ return pDst;
+ }
+
+ inline bool string_ends_with(const std::string& s, char c)
+ {
+ return (s.size() != 0) && (s.back() == c);
+ }
+
+ inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt)
+ {
+#ifdef _MSC_VER
+ char drive_buf[_MAX_DRIVE] = { 0 };
+ char dir_buf[_MAX_DIR] = { 0 };
+ char fname_buf[_MAX_FNAME] = { 0 };
+ char ext_buf[_MAX_EXT] = { 0 };
+
+ errno_t error = _splitpath_s(p,
+ pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0,
+ pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0,
+ pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0,
+ pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0);
+ if (error != 0)
+ return false;
+
+ if (pDrive) *pDrive = drive_buf;
+ if (pDir) *pDir = dir_buf;
+ if (pFilename) *pFilename = fname_buf;
+ if (pExt) *pExt = ext_buf;
+ return true;
+#else
+ char dirtmp[1024], nametmp[1024];
+ strcpy_safe(dirtmp, sizeof(dirtmp), p);
+ strcpy_safe(nametmp, sizeof(nametmp), p);
+
+ if (pDrive)
+ pDrive->resize(0);
+
+ const char *pDirName = dirname(dirtmp);
+ const char* pBaseName = basename(nametmp);
+ if ((!pDirName) || (!pBaseName))
+ return false;
+
+ if (pDir)
+ {
+ *pDir = pDirName;
+ if ((pDir->size()) && (pDir->back() != '/'))
+ *pDir += "/";
+ }
+
+ if (pFilename)
+ {
+ *pFilename = pBaseName;
+ string_remove_extension(*pFilename);
+ }
+
+ if (pExt)
+ {
+ *pExt = pBaseName;
+ *pExt = string_get_extension(*pExt);
+ if (pExt->size())
+ *pExt = "." + *pExt;
+ }
+
+ return true;
+#endif
+ }
+
+ inline bool is_path_separator(char c)
+ {
+#ifdef _WIN32
+ return (c == '/') || (c == '\\');
+#else
+ return (c == '/');
+#endif
+ }
+
+ inline bool is_drive_separator(char c)
+ {
+#ifdef _WIN32
+ return (c == ':');
+#else
+ (void)c;
+ return false;
+#endif
+ }
+
+ inline void string_combine_path(std::string &dst, const char *p, const char *q)
+ {
+ std::string temp(p);
+ if (temp.size() && !is_path_separator(q[0]))
+ {
+ if (!is_path_separator(temp.back()))
+ temp.append(1, BASISU_PATH_SEPERATOR_CHAR);
+ }
+ temp += q;
+ dst.swap(temp);
+ }
+
+ inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r)
+ {
+ string_combine_path(dst, p, q);
+ string_combine_path(dst, dst.c_str(), r);
+ }
+
+ inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt)
+ {
+ string_combine_path(dst, p, q, r);
+ if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.'))
+ dst.append(1, '.');
+ dst.append(pExt);
+ }
+
+ inline bool string_get_pathname(const char *p, std::string &path)
+ {
+ std::string temp_drive, temp_path;
+ if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL))
+ return false;
+ string_combine_path(path, temp_drive.c_str(), temp_path.c_str());
+ return true;
+ }
+
+ inline bool string_get_filename(const char *p, std::string &filename)
+ {
+ std::string temp_ext;
+ if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext))
+ return false;
+ filename += temp_ext;
+ return true;
+ }
+
+ class rand
+ {
+ std::mt19937 m_mt;
+
+ public:
+ rand() { }
+
+ rand(uint32_t s) { seed(s); }
+ void seed(uint32_t s) { m_mt.seed(s); }
+
+ // between [l,h]
+ int irand(int l, int h) { std::uniform_int_distribution d(l, h); return d(m_mt); }
+
+ uint32_t urand32() { return static_cast(irand(INT32_MIN, INT32_MAX)); }
+
+ bool bit() { return irand(0, 1) == 1; }
+
+ // between [l,h)
+ float frand(float l, float h) { std::uniform_real_distribution d(l, h); return d(m_mt); }
+
+ float gaussian(float mean, float stddev) { std::normal_distribution d(mean, stddev); return d(m_mt); }
+ };
+
+ class priority_queue
+ {
+ public:
+ priority_queue() :
+ m_size(0)
+ {
+ }
+
+ void clear()
+ {
+ m_heap.clear();
+ m_size = 0;
+ }
+
+ void init(uint32_t max_entries, uint32_t first_index, float first_priority)
+ {
+ m_heap.resize(max_entries + 1);
+ m_heap[1].m_index = first_index;
+ m_heap[1].m_priority = first_priority;
+ m_size = 1;
+ }
+
+ inline uint32_t size() const { return m_size; }
+
+ inline uint32_t get_top_index() const { return m_heap[1].m_index; }
+ inline float get_top_priority() const { return m_heap[1].m_priority; }
+
+ inline void delete_top()
+ {
+ assert(m_size > 0);
+ m_heap[1] = m_heap[m_size];
+ m_size--;
+ if (m_size)
+ down_heap(1);
+ }
+
+ inline void add_heap(uint32_t index, float priority)
+ {
+ m_size++;
+
+ uint32_t k = m_size;
+
+ if (m_size >= m_heap.size())
+ m_heap.resize(m_size + 1);
+
+ for (;;)
+ {
+ uint32_t parent_index = k >> 1;
+ if ((!parent_index) || (m_heap[parent_index].m_priority > priority))
+ break;
+ m_heap[k] = m_heap[parent_index];
+ k = parent_index;
+ }
+
+ m_heap[k].m_index = index;
+ m_heap[k].m_priority = priority;
+ }
+
+ private:
+ struct entry
+ {
+ uint32_t m_index;
+ float m_priority;
+ };
+
+ std::vector m_heap;
+ uint32_t m_size;
+
+ // Push down entry at index
+ inline void down_heap(uint32_t heap_index)
+ {
+ uint32_t orig_index = m_heap[heap_index].m_index;
+ const float orig_priority = m_heap[heap_index].m_priority;
+
+ uint32_t child_index;
+ while ((child_index = (heap_index << 1)) <= m_size)
+ {
+ if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index;
+ if (orig_priority > m_heap[child_index].m_priority)
+ break;
+ m_heap[heap_index] = m_heap[child_index];
+ heap_index = child_index;
+ }
+
+ m_heap[heap_index].m_index = orig_index;
+ m_heap[heap_index].m_priority = orig_priority;
+ }
+ };
+
+ // Tree structured vector quantization (TSVQ)
+
+ template
+ class tree_vector_quant
+ {
+ public:
+ typedef std::pair training_vec_with_weight;
+ typedef std::vector< training_vec_with_weight > array_of_weighted_training_vecs;
+
+ tree_vector_quant()
+ {
+ }
+
+ void clear()
+ {
+ clear_vector(m_training_vecs);
+ clear_vector(m_nodes);
+ }
+
+ void add_training_vec(const TrainingVectorType &v, uint32_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); }
+
+ void retrieve(std::vector< std::vector > &codebook) const
+ {
+ for (uint32_t i = 0; i < m_nodes.size(); i++)
+ {
+ const tsvq_node &n = m_nodes[i];
+ if (!n.is_leaf())
+ continue;
+
+ codebook.resize(codebook.size() + 1);
+ codebook.back() = n.m_training_vecs;
+ }
+ }
+
+ void retrieve(std::vector &codebook) const
+ {
+ for (uint32_t i = 0; i < m_nodes.size(); i++)
+ {
+ const tsvq_node &n = m_nodes[i];
+ if (!n.is_leaf())
+ continue;
+
+ codebook.resize(codebook.size() + 1);
+ codebook.back() = n.m_origin;
+ }
+ }
+
+ bool generate(uint32_t max_size)
+ {
+ if (!m_training_vecs.size())
+ return false;
+
+ clear_vector(m_nodes);
+ m_nodes.reserve(max_size * 2 + 1);
+
+ m_nodes.push_back(prepare_root());
+
+ priority_queue var_heap;
+ var_heap.init(max_size, 0, m_nodes[0].m_var);
+
+ std::vector l_children, r_children;
+
+ // Now split the worst nodes
+ l_children.reserve(m_training_vecs.size() + 1);
+ r_children.reserve(m_training_vecs.size() + 1);
+
+ uint32_t total_leaf_nodes = 1;
+
+ while ((var_heap.size()) && (total_leaf_nodes < max_size))
+ {
+ const uint32_t node_index = var_heap.get_top_index();
+ const tsvq_node &node = m_nodes[node_index];
+
+ assert(node.m_var == var_heap.get_top_priority());
+ assert(node.is_leaf());
+
+ var_heap.delete_top();
+
+ if (node.m_training_vecs.size() > 1)
+ {
+ if (split_node(node_index, var_heap, l_children, r_children))
+ {
+ // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total.
+ total_leaf_nodes += 1;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ private:
+ class tsvq_node
+ {
+ public:
+ inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1) { }
+
+ // vecs is erased
+ inline void set(const TrainingVectorType &org, uint64_t weight, float var, std::vector &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); }
+
+ inline bool is_leaf() const { return m_left_index < 0; }
+
+ float m_var;
+ uint64_t m_weight;
+ TrainingVectorType m_origin;
+ int32_t m_left_index, m_right_index;
+ std::vector m_training_vecs;
+ };
+
+ typedef std::vector tsvq_node_vec;
+ tsvq_node_vec m_nodes;
+
+ array_of_weighted_training_vecs m_training_vecs;
+
+ tsvq_node prepare_root() const
+ {
+ double ttsum = 0.0f;
+
+ // Prepare root node containing all training vectors
+ tsvq_node root;
+ root.m_training_vecs.reserve(m_training_vecs.size());
+
+ for (uint32_t i = 0; i < m_training_vecs.size(); i++)
+ {
+ const TrainingVectorType &v = m_training_vecs[i].first;
+ const uint32_t weight = m_training_vecs[i].second;
+
+ root.m_training_vecs.push_back(i);
+
+ root.m_origin += (v * static_cast(weight));
+ root.m_weight += weight;
+
+ ttsum += v.dot(v) * weight;
+ }
+
+ root.m_var = static_cast(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight));
+
+ root.m_origin *= (1.0f / root.m_weight);
+
+ return root;
+ }
+
+ bool split_node(uint32_t node_index, priority_queue &var_heap, std::vector &l_children, std::vector &r_children)
+ {
+ TrainingVectorType l_child_org, r_child_org;
+ uint64_t l_weight = 0, r_weight = 0;
+ float l_var = 0.0f, r_var = 0.0f;
+
+ // Compute initial left/right child origins
+ prep_split(m_nodes[node_index], l_child_org, r_child_org);
+
+ // Use k-means iterations to refine these children vectors
+ if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children))
+ return false;
+
+ // Create children
+ const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1;
+
+ m_nodes[node_index].m_left_index = l_child_index;
+ m_nodes[node_index].m_right_index = r_child_index;
+
+ m_nodes.resize(m_nodes.size() + 2);
+
+ tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index];
+
+ l_child.set(l_child_org, l_weight, l_var, l_children);
+ r_child.set(r_child_org, r_weight, r_var, r_children);
+
+ if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1))
+ var_heap.add_heap(l_child_index, l_var);
+
+ if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1))
+ var_heap.add_heap(r_child_index, r_var);
+
+ return true;
+ }
+
+ TrainingVectorType compute_split_axis(const tsvq_node &node) const
+ {
+ const uint32_t N = TrainingVectorType::num_elements;
+
+ matrix cmatrix(cZero);
+
+ // Compute covariance matrix from weighted input vectors
+ for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
+ {
+ const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin);
+ const TrainingVectorType w(static_cast(m_training_vecs[node.m_training_vecs[i]].second) * v);
+
+ for (uint32_t x = 0; x < N; x++)
+ for (uint32_t y = x; y < N; y++)
+ cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y];
+ }
+
+ const float renorm_scale = 1.0f / node.m_weight;
+
+ for (uint32_t x = 0; x < N; x++)
+ for (uint32_t y = x; y < N; y++)
+ cmatrix[x][y] *= renorm_scale;
+
+ // Diagonal flip
+ for (uint32_t x = 0; x < (N - 1); x++)
+ for (uint32_t y = x + 1; y < N; y++)
+ cmatrix[y][x] = cmatrix[x][y];
+
+ return compute_pca_from_covar(cmatrix);
+ }
+
+ void prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const
+ {
+ const uint32_t N = TrainingVectorType::num_elements;
+
+ if (2 == node.m_training_vecs.size())
+ {
+ l_child_result = m_training_vecs[node.m_training_vecs[0]].first;
+ r_child_result = m_training_vecs[node.m_training_vecs[1]].first;
+ return;
+ }
+
+ TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f);
+ double l_weight = 0.0f, r_weight = 0.0f;
+
+ // Compute initial left/right children
+ for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
+ {
+ const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second;
+
+ const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
+
+ double t = (v - node.m_origin).dot(axis);
+ if (t >= 0.0f)
+ {
+ r_child += v * weight;
+ r_weight += weight;
+ }
+ else
+ {
+ l_child += v * weight;
+ l_weight += weight;
+ }
+ }
+
+ if ((l_weight > 0.0f) && (r_weight > 0.0f))
+ {
+ l_child_result = l_child * static_cast(1.0f / l_weight);
+ r_child_result = r_child * static_cast(1.0f / r_weight);
+ }
+ else
+ {
+ // Empty cell problem
+ l_child_result = node.m_origin;
+ r_child_result = node.m_origin;
+
+ // Nudge the two cells apart and hope k-means can separate them.
+ for (uint32_t i = 0; i < N; i++)
+ {
+ l_child_result[i] -= .000125f;
+ r_child_result[i] += .000125f;
+ }
+ }
+ }
+
+ bool refine_split(const tsvq_node &node,
+ TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, std::vector &l_children,
+ TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, std::vector &r_children) const
+ {
+ l_children.reserve(node.m_training_vecs.size());
+ r_children.reserve(node.m_training_vecs.size());
+
+ float prev_total_variance = 1e+10f;
+
+ // Refine left/right children locations using k-means iterations
+ const uint32_t cMaxIters = 6;
+ for (uint32_t iter = 0; iter < cMaxIters; iter++)
+ {
+ l_children.resize(0);
+ r_children.resize(0);
+
+ TrainingVectorType new_l_child(cZero), new_r_child(cZero);
+
+ double l_ttsum = 0.0f, r_ttsum = 0.0f;
+
+ l_weight = 0;
+ r_weight = 0;
+
+ for (uint32_t i = 0; i < node.m_training_vecs.size(); i++)
+ {
+ const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first;
+ const uint32_t weight = m_training_vecs[node.m_training_vecs[i]].second;
+
+ double left_dist2 = l_child.squared_distance(v), right_dist2 = r_child.squared_distance(v);
+
+ if (left_dist2 >= right_dist2)
+ {
+ new_r_child += (v * static_cast(weight));
+ r_weight += weight;
+
+ r_ttsum += weight * v.dot(v);
+ r_children.push_back(node.m_training_vecs[i]);
+ }
+ else
+ {
+ new_l_child += (v * static_cast(weight));
+ l_weight += weight;
+
+ l_ttsum += weight * v.dot(v);
+ l_children.push_back(node.m_training_vecs[i]);
+ }
+ }
+
+ if ((!l_weight) || (!r_weight))
+ return false;
+
+ l_var = static_cast(l_ttsum - (new_l_child.dot(new_l_child) / l_weight));
+ r_var = static_cast(r_ttsum - (new_r_child.dot(new_r_child) / r_weight));
+
+ new_l_child *= (1.0f / l_weight);
+ new_r_child *= (1.0f / r_weight);
+
+ l_child = new_l_child;
+ r_child = new_r_child;
+
+ float total_var = l_var + r_var;
+ const float cGiveupVariance = .00001f;
+ if (total_var < cGiveupVariance)
+ break;
+
+ // Check to see if the variance has settled
+ const float cVarianceDeltaThresh = .00125f;
+ if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh)
+ break;
+
+ prev_total_variance = total_var;
+ }
+
+ return true;
+ }
+ };
+
+ // Canonical Huffman coding
+
+ class histogram
+ {
+ std::vector m_hist;
+
+ public:
+ histogram(uint32_t size = 0) { init(size); }
+
+ void clear()
+ {
+ clear_vector(m_hist);
+ }
+
+ void init(uint32_t size)
+ {
+ m_hist.resize(0);
+ m_hist.resize(size);
+ }
+
+ inline uint32_t size() const { return static_cast(m_hist.size()); }
+
+ inline const uint32_t &operator[] (uint32_t index) const
+ {
+ return m_hist[index];
+ }
+
+ inline uint32_t &operator[] (uint32_t index)
+ {
+ return m_hist[index];
+ }
+
+ inline void inc(uint32_t index)
+ {
+ m_hist[index]++;
+ }
+
+ uint64_t get_total() const
+ {
+ uint64_t total = 0;
+ for (uint32_t i = 0; i < m_hist.size(); ++i)
+ total += m_hist[i];
+ return total;
+ }
+
+ double get_entropy() const
+ {
+ double total = static_cast(get_total());
+ if (total == 0.0f)
+ return 0.0f;
+
+ const double inv_total = 1.0f / total;
+ const double neg_inv_log2 = -1.0f / log(2.0f);
+
+ double e = 0.0f;
+ for (uint32_t i = 0; i < m_hist.size(); i++)
+ if (m_hist[i])
+ e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast(m_hist[i]);
+
+ return e;
+ }
+ };
+
+ struct sym_freq
+ {
+ uint16_t m_key, m_sym_index;
+ };
+
+ sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1);
+ void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms);
+ void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size);
+
+ class huffman_encoding_table
+ {
+ public:
+ huffman_encoding_table()
+ {
+ }
+
+ void clear()
+ {
+ clear_vector(m_codes);
+ clear_vector(m_code_sizes);
+ }
+
+ bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize)
+ {
+ return init(h.size(), &h[0], max_code_size);
+ }
+
+ bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size);
+ bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size);
+
+ inline const uint16_vec &get_codes() const { return m_codes; }
+ inline const uint8_vec &get_code_sizes() const { return m_code_sizes; }
+
+ uint32_t get_total_used_codes() const
+ {
+ for (int i = static_cast