diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..b13fa73 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,88 @@ +project(basisu) + +cmake_minimum_required(VERSION 3.0) +option(BUILD_X64 "build 64-bit" TRUE) +option(STATIC "static linking" FALSE) + +message("Initial BUILD_X64=${BUILD_X64}") +message("Initial CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}") + +if( NOT CMAKE_BUILD_TYPE ) + set( CMAKE_BUILD_TYPE Release ) +endif( NOT CMAKE_BUILD_TYPE ) + +message( ${PROJECT_NAME} " build type: " ${CMAKE_BUILD_TYPE} ) + +if (BUILD_X64) + message("Building 64-bit") +else() + message("Building 32-bit") +endif(BUILD_X64) + +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") + +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + +set(CMAKE_CXX_FLAGS -std=c++11) +set(GCC_COMPILE_FLAGS "-fvisibility=hidden -fvisibility-inlines-hidden -fPIC -fopenmp -fno-strict-aliasing -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 -Wall -Wextra -Wno-unused-local-typedefs -Wno-unused-value -Wno-unused-parameter -Wno-unused-but-set-variable -Wno-unused-variable -Wno-reorder") + +if (NOT BUILD_X64) + set(GCC_COMPILE_FLAGS "${GCC_COMPILE_FLAGS} -m32") +endif() + +if (STATIC) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -static-libgcc -static-libstdc++ -static") +else() + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -Wl,-rpath .") +endif(STATIC) + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${GCC_COMPILE_FLAGS}") +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${GCC_COMPILE_FLAGS}") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${GCC_COMPILE_FLAGS} -D_DEBUG") + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_COMPILE_FLAGS}") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${GCC_COMPILE_FLAGS}") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${GCC_COMPILE_FLAGS} -D_DEBUG") + +set(BASISU_SRC_LIST ${COMMON_SRC_LIST} + basisu_backend.cpp + basisu_basis_file.cpp + basisu_comp.cpp + basisu_enc.cpp + basisu_etc.cpp + basisu_frontend.cpp + basisu_global_selector_palette_helpers.cpp + basisu_gpu_texture.cpp + basisu_pvrtc1_4.cpp + basisu_resampler.cpp + basisu_resample_filters.cpp + basisu_ssim.cpp + basisu_tool.cpp + lodepng.cpp + detex/decompress_bc.c + detex/decompress_bc7.c + detex/decompress_eac.c + transcoder/basisu_transcoder.cpp + ) + +if (APPLE) + set(BIN_DIRECTORY "bin_osx") +else() + set(BIN_DIRECTORY "bin") +endif() + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/${BIN_DIRECTORY}) + +add_executable(basisu ${BASISU_SRC_LIST}) +target_link_libraries(basisu m pthread) + +if (CMAKE_BUILD_TYPE STREQUAL Release) + if (APPLE) + add_custom_command(TARGET basisu POST_BUILD COMMAND strip -X -x ${CMAKE_SOURCE_DIR}/${BIN_DIRECTORY}/basisu) + else() + add_custom_command(TARGET basisu POST_BUILD COMMAND strip -g -X -x ${CMAKE_SOURCE_DIR}/${BIN_DIRECTORY}/basisu) + endif() +endif() + diff --git a/basisu.sln b/basisu.sln new file mode 100644 index 0000000..dda6ffb --- /dev/null +++ b/basisu.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.28803.202 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "basisu", "basisu.vcxproj", "{59586A07-8E7E-411D-BC3D-387E039AA423}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {59586A07-8E7E-411D-BC3D-387E039AA423}.Debug|x64.ActiveCfg = Debug|x64 + {59586A07-8E7E-411D-BC3D-387E039AA423}.Debug|x64.Build.0 = Debug|x64 + {59586A07-8E7E-411D-BC3D-387E039AA423}.Debug|x86.ActiveCfg = Debug|Win32 + {59586A07-8E7E-411D-BC3D-387E039AA423}.Debug|x86.Build.0 = Debug|Win32 + {59586A07-8E7E-411D-BC3D-387E039AA423}.Release|x64.ActiveCfg = Release|x64 + {59586A07-8E7E-411D-BC3D-387E039AA423}.Release|x64.Build.0 = Release|x64 + {59586A07-8E7E-411D-BC3D-387E039AA423}.Release|x86.ActiveCfg = Release|Win32 + {59586A07-8E7E-411D-BC3D-387E039AA423}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {5E583429-7830-4B3A-9DDE-F01B115CE0D8} + EndGlobalSection +EndGlobal diff --git a/basisu.vcxproj b/basisu.vcxproj new file mode 100644 index 0000000..b3a0b32 --- /dev/null +++ b/basisu.vcxproj @@ -0,0 +1,208 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + {59586A07-8E7E-411D-BC3D-387E039AA423} + basisu + 10.0 + + + + Application + true + v142 + MultiByte + + + Application + false + v142 + true + MultiByte + + + Application + true + v142 + MultiByte + + + Application + false + v142 + true + MultiByte + + + + + + + + + + + + + + + + + + + + + $(SolutionDir)\bin + + + $(SolutionDir)\bin + + + $(SolutionDir)\bin + + + $(SolutionDir)\bin + + + + Level4 + Disabled + true + false + true + + + + + Console + + + + + Level4 + Disabled + true + false + true + + + true + + + Console + + + + + Level4 + MaxSpeed + true + true + true + false + true + + + NDEBUG;_MBCS;%(PreprocessorDefinitions) + false + AnySuitable + + + true + true + Console + + + + + Level4 + MaxSpeed + true + true + true + false + true + + + NDEBUG;_MBCS;%(PreprocessorDefinitions) + false + true + AnySuitable + + + true + true + Console + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/basisu.vcxproj.filters b/basisu.vcxproj.filters new file mode 100644 index 0000000..c023f4b --- /dev/null +++ b/basisu.vcxproj.filters @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + detex + + + detex + + + detex + + + transcoder + + + + + + + + + + + + + + + + + + + detex + + + detex + + + detex + + + detex + + + transcoder + + + transcoder + + + transcoder + + + transcoder + + + + + transcoder + + + transcoder + + + transcoder + + + + + {7a54aaad-1d10-4bdf-b8e9-c14ed2263ed8} + + + {977e9455-f354-422a-b698-08778483328c} + + + \ No newline at end of file diff --git a/basisu_backend.cpp b/basisu_backend.cpp new file mode 100644 index 0000000..36b6f18 --- /dev/null +++ b/basisu_backend.cpp @@ -0,0 +1,1424 @@ +// basisu_backend.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// TODO: This code originally supported full ETC1 and ETC1S, so there's some legacy stuff in here. +// +#include "basisu_backend.h" + +#define DISABLE_CODEBOOK_REORDERING (0) +#define BASISU_BACKEND_VERIFY(c) verify(c, __LINE__); + +namespace basisu +{ + const uint32_t TOTAL_MACROBLOCK_DIFF_BITS = 4; + const uint32_t TOTAL_MACROBLOCK_FLIP_BITS = 4; + + // TODO + static void verify(bool condition, int line) + { + if (!condition) + { + fprintf(stderr, "basisu_backend: verify() failed at line %i!\n", line); + abort(); + } + } + + basisu_backend::basisu_backend() + { + clear(); + } + + void basisu_backend::clear() + { + m_pFront_end = NULL; + m_params.clear(); + m_output.clear(); + } + + void basisu_backend::init(basisu_frontend *pFront_end, basisu_backend_params ¶ms, const basisu_backend_slice_desc_vec &slice_descs, const basist::etc1_global_selector_codebook *pGlobal_sel_codebook) + { + m_pFront_end = pFront_end; + m_params = params; + m_slices = slice_descs; + m_pGlobal_sel_codebook = pGlobal_sel_codebook; + + debug_printf("basisu_backend::Init: Slices: %u, ETC1S: %u, DeltaSelectorRDOQualityThresh: %f, UseGlobalSelCodebook: %u, GlobalSelCodebookPalBits: %u, GlobalSelCodebookModBits: %u, Use hybrid selector codebooks: %u\n", + m_slices.size(), + params.m_etc1s, + params.m_delta_selector_rdo_quality_thresh, + params.m_use_global_sel_codebook, + params.m_global_sel_codebook_pal_bits, + params.m_global_sel_codebook_mod_bits, + params.m_use_hybrid_sel_codebooks); + + for (uint32_t i = 0; i < m_slices.size(); i++) + { + debug_printf("Slice: %u, OrigWidth: %u, OrigHeight: %u, Width: %u, Height: %u, NumBlocksX: %u, NumBlocksY: %u, NumMacroBlocksX: %u, NumMacroBlocksY: %u, FirstBlockIndex: %u\n", + i, + m_slices[i].m_orig_width, m_slices[i].m_orig_height, + m_slices[i].m_width, m_slices[i].m_height, + m_slices[i].m_num_blocks_x, m_slices[i].m_num_blocks_y, + m_slices[i].m_num_macroblocks_x, m_slices[i].m_num_macroblocks_y, + m_slices[i].m_first_block_index); + } + } + + void basisu_backend::create_endpoint_palette() + { + const basisu_frontend &r = *m_pFront_end; + + m_endpoint_palette.resize(r.get_total_endpoint_clusters()); + for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++) + { + etc1_endpoint_palette_entry &e = m_endpoint_palette[i]; + + e.m_color5_valid = r.get_endpoint_cluster_color_is_used(i, false); + if (e.m_color5_valid) + { + e.m_color5 = r.get_endpoint_cluster_unscaled_color(i, false); + e.m_inten5 = r.get_endpoint_cluster_inten_table(i, false); + } + else + { + BASISU_BACKEND_VERIFY(false); + } + } + } + + void basisu_backend::create_selector_palette() + { + const basisu_frontend &r = *m_pFront_end; + + m_selector_palette.resize(r.get_total_selector_clusters()); + + if (m_params.m_use_global_sel_codebook) + { + m_global_selector_palette_desc.resize(r.get_total_selector_clusters()); + + for (int i = 0; i < static_cast(r.get_total_selector_clusters()); i++) + { + basist::etc1_selector_palette_entry &selector_pal_entry = m_selector_palette[i]; + + etc1_global_selector_cb_entry_desc &pal_entry_desc = m_global_selector_palette_desc[i]; + pal_entry_desc.m_pal_index = r.get_selector_cluster_global_selector_entry_ids()[i].m_palette_index; + pal_entry_desc.m_mod_index = r.get_selector_cluster_global_selector_entry_ids()[i].m_modifier.get_index(); + + pal_entry_desc.m_was_used = true; + if (m_params.m_use_hybrid_sel_codebooks) + pal_entry_desc.m_was_used = r.get_selector_cluster_uses_global_cb_vec()[i]; + + if (pal_entry_desc.m_was_used) + { + const etc_block &selector_bits = r.get_selector_cluster_selector_bits(i); + (void)selector_bits; + + basist::etc1_selector_palette_entry global_pal_entry(m_pGlobal_sel_codebook->get_entry(r.get_selector_cluster_global_selector_entry_ids()[i])); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + selector_pal_entry(x, y) = global_pal_entry(x, y); + + assert(selector_bits.get_selector(x, y) == global_pal_entry(x, y)); + } + } + } + else + { + const etc_block &selector_bits = r.get_selector_cluster_selector_bits(i); + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + selector_pal_entry[y * 4 + x] = static_cast(selector_bits.get_selector(x, y)); + } + } + } + else + { + for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++) + { + basist::etc1_selector_palette_entry &s = m_selector_palette[i]; + + const etc_block &selector_bits = r.get_selector_cluster_selector_bits(i); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + s[y * 4 + x] = static_cast(selector_bits.get_selector(x, y)); + } + } + } + } + } + + // endpoint palette + // 5:5:5 and predicted 4:4:4 colors, 1 or 2 3-bit intensity table indices + // selector palette + // 4x4 2-bit selectors + + // per-macroblock: + // 4 diff bits + // 4 flip bits + // Endpoint template index, 1-8 endpoint indices + // Alternately, if no template applies, we can send 4 ETC1S bits followed by 4-8 endpoint indices + // 4 selector indices + + float basisu_backend::selector_zeng_similarity_func(uint32_t index_a, uint32_t index_b, void *pContext) + { + basisu_backend& backend = *static_cast(pContext); + + const basist::etc1_selector_palette_entry &a = backend.m_selector_palette[index_a]; + const basist::etc1_selector_palette_entry &b = backend.m_selector_palette[index_b]; + + float total = static_cast(a.calc_hamming_dist(b)); + + float weight = 1.0f - clamp(total * (1.0f / 32.0f), 0.0f, 1.0f); + return weight; + } + + void basisu_backend::create_macroblocks() + { + const basisu_frontend &r = *m_pFront_end; + + m_slice_macroblocks.resize(m_slices.size()); + + uint_vec all_endpoint_indices; + uint_vec all_selector_indices; + + uint32_t total_template_exceptions = 0; + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + + const uint32_t width = m_slices[slice_index].m_width; + const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + + const uint32_t num_macroblocks_x = m_slices[slice_index].m_num_macroblocks_x; + const uint32_t num_macroblocks_y = m_slices[slice_index].m_num_macroblocks_y; + + m_slice_macroblocks[slice_index].resize(num_macroblocks_x, num_macroblocks_y); + + for (uint32_t macroblock_y = 0; macroblock_y < num_macroblocks_y; macroblock_y++) + { + const uint32_t y = macroblock_y * 2; + + const int x_start = (macroblock_y & 1) ? (num_macroblocks_x - 1) : 0; + const int x_end = (macroblock_y & 1) ? -1 : num_macroblocks_x; + const int x_dir = (macroblock_y & 1) ? -1 : 1; + + for (int macroblock_x = x_start; macroblock_x != x_end; macroblock_x += x_dir) + { + const uint32_t x = macroblock_x * 2; + + uint32_t block_indices[4]; + block_indices[0] = first_block_index + x + y * num_blocks_x; + block_indices[1] = first_block_index + minimum(x + 1, num_blocks_x - 1) + y * num_blocks_x; + block_indices[2] = first_block_index + x + minimum(y + 1, num_blocks_y - 1) * num_blocks_x; + block_indices[3] = first_block_index + minimum(x + 1, num_blocks_x - 1) + minimum(y + 1, num_blocks_y - 1) * num_blocks_x; + + etc_block macroblock[4]; + for (uint32_t i = 0; i < 4; i++) + macroblock[i] = r.get_output_block(block_indices[i]); + + uint32_t flip_bits = 0; + uint32_t diff_bits = 0; + for (uint32_t k = 0; k < 4; k++) + { + flip_bits = (flip_bits << 1) | (macroblock[k].get_flip_bit() ? 1 : 0); + diff_bits = (diff_bits << 1) | (macroblock[k].get_diff_bit() ? 1 : 0); + } + + etc1_macroblock m; + + m.m_diff_bits = static_cast(diff_bits); + m.m_flip_bits = static_cast(flip_bits); + + uint_vec endpoint_indices; + + for (uint32_t i = 0; i < 4; i++) + { + endpoint_indices.push_back(r.get_subblock_endpoint_cluster_index(block_indices[i], 0)); + endpoint_indices.push_back(r.get_subblock_endpoint_cluster_index(block_indices[i], 1)); + + if (macroblock[i].get_diff_bit()) + { + uint32_t e0 = r.get_subblock_endpoint_cluster_index(block_indices[i], 0); + uint32_t e1 = r.get_subblock_endpoint_cluster_index(block_indices[i], 1); + + color_rgba c0(r.get_endpoint_cluster_unscaled_color(e0, false)); + color_rgba c1(r.get_endpoint_cluster_unscaled_color(e1, false)); + + etc_block test_block; + if (!test_block.set_block_color5_check(c0, c1)) + { + BASISU_BACKEND_VERIFY(0); + } + } + + m.m_selector_indices.push_back(r.get_block_selector_cluster_index(block_indices[i])); + } + + int_vec endpoint_palette; + uint8_t endpoint_palette_indices[8]; + uint32_t n = 0; + + for (uint32_t ty = 0; ty < 2; ty++) + { + for (uint32_t tx = 0; tx < 2; tx++) + { + for (uint32_t t = 0; t < 2; t++) + { + int endpoint_index = r.get_subblock_endpoint_cluster_index(block_indices[tx + ty * 2], t); + + uint32_t p; + for (p = 0; p < endpoint_palette.size(); p++) + if (endpoint_palette[p] == endpoint_index) + break; + + if (p >= endpoint_palette.size()) + { + endpoint_palette.push_back(endpoint_index); + } + + endpoint_palette_indices[n++] = static_cast(p); + } + } + } + + uint32_t t; + for (t = 0; t < basist::TOTAL_ENDPOINT_INDEX_TEMPLATES; t++) + { + if (memcmp(endpoint_palette_indices, basist::g_endpoint_index_templates[t].m_local_indices, 8) == 0) + break; + } + + // TODO: There shouldn't be any exceptions in ETC1S + if (t == basist::TOTAL_ENDPOINT_INDEX_TEMPLATES) + { + endpoint_palette.resize(0); + n = 0; + clear_obj(endpoint_palette_indices); + + for (uint32_t i = 0; i < 4; i++) + { + uint32_t endpoint_index0 = r.get_subblock_endpoint_cluster_index(block_indices[i], 0); + uint32_t endpoint_index1 = r.get_subblock_endpoint_cluster_index(block_indices[i], 1); + + endpoint_palette_indices[n++] = static_cast(endpoint_palette.size()); + endpoint_palette.push_back(endpoint_index0); + + if (endpoint_index0 != endpoint_index1) + { + endpoint_palette.push_back(endpoint_index1); + } + + endpoint_palette_indices[n++] = static_cast(endpoint_palette.size() - 1); + } + + for (t = 0; t < basist::TOTAL_ENDPOINT_INDEX_TEMPLATES; t++) + { + if (memcmp(endpoint_palette_indices, basist::g_endpoint_index_templates[t].m_local_indices, 8) == 0) + break; + } + + BASISU_BACKEND_VERIFY(t != basist::TOTAL_ENDPOINT_INDEX_TEMPLATES); + + total_template_exceptions++; + } + + m.m_template_index = t; + m.m_endpoint_indices = endpoint_palette; + + for (uint32_t i = 0; i < 4; i++) + { + if (!macroblock[i].get_diff_bit()) + continue; + + uint32_t l0 = basist::g_endpoint_index_templates[t].m_local_indices[i * 2 + 0]; + uint32_t l1 = basist::g_endpoint_index_templates[t].m_local_indices[i * 2 + 1]; + + uint32_t e0 = endpoint_palette[l0]; + uint32_t e1 = endpoint_palette[l1]; + + //uint32_t e0 = r.get_subblock_endpoint_cluster_index(block_indices[i], 0); + //uint32_t e1 = r.get_subblock_endpoint_cluster_index(block_indices[i], 1); + + color_rgba c0(r.get_endpoint_cluster_unscaled_color(e0, false)); + color_rgba c1(r.get_endpoint_cluster_unscaled_color(e1, false)); + + etc_block test_block; + if (!test_block.set_block_color5_check(c0, c1)) + { + BASISU_BACKEND_VERIFY(0); + } + } + + m_slice_macroblocks[slice_index](macroblock_x, macroblock_y) = m; + + for (uint32_t i = 0; i < endpoint_palette.size(); i++) + all_endpoint_indices.push_back(endpoint_palette[i]); + + for (uint32_t i = 0; i < m.m_selector_indices.size(); i++) + all_selector_indices.push_back(m.m_selector_indices[i]); + + } // macroblock_x + + } // macroblock_y + } // slice + + debug_printf("Total template exception: %u out of %u %3.1f%%\n", total_template_exceptions, get_total_macroblocks(), total_template_exceptions * 100.0f / get_total_macroblocks()); + +#if DISABLE_CODEBOOK_REORDERING + m_endpoint_remap_table_old_to_new.resize(r.get_total_endpoint_clusters()); + for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++) + m_endpoint_remap_table_old_to_new[i] = i; + + m_selector_remap_table_old_to_new.resize(r.get_total_selector_clusters()); + for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++) + m_selector_remap_table_old_to_new[i] = i; +#else + { + //create_zeng_reorder_table(r.get_total_endpoint_clusters(), all_endpoint_indices.size(), all_endpoint_indices.get_ptr(), m_endpoint_remap_table_old_to_new, NULL, NULL, 0.0f); + + palette_index_reorderer reorderer; + reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0); + m_endpoint_remap_table_old_to_new = reorderer.get_remap_table(); + } + + // Maps old to new selector indices + { + //const float selector_similarity_func_weight = 1.0f; + //create_zeng_reorder_table(r.get_total_selector_clusters(), all_selector_indices.size(), all_selector_indices.get_ptr(), m_selector_remap_table_old_to_new, selector_zeng_similarity_func, this, selector_similarity_func_weight); + //create_zeng_reorder_table(r.get_total_selector_clusters(), all_selector_indices.size(), all_selector_indices.get_ptr(), m_selector_remap_table_old_to_new, NULL, NULL, 0.0f); + + palette_index_reorderer reorderer; + reorderer.init((uint32_t)all_selector_indices.size(), &all_selector_indices[0], r.get_total_selector_clusters(), nullptr, nullptr, 0); + m_selector_remap_table_old_to_new = reorderer.get_remap_table(); + } + +#endif + m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters()); + for (uint32_t i = 0; i < m_endpoint_remap_table_old_to_new.size(); i++) + m_endpoint_remap_table_new_to_old[m_endpoint_remap_table_old_to_new[i]] = i; + + // Maps new to old selector indices + m_selector_remap_table_new_to_old.resize(r.get_total_selector_clusters()); + for (uint32_t i = 0; i < m_selector_remap_table_old_to_new.size(); i++) + m_selector_remap_table_new_to_old[m_selector_remap_table_old_to_new[i]] = i; + + if (!m_params.m_use_global_sel_codebook) + optimize_selector_palette_order(all_selector_indices); + } + + void basisu_backend::optimize_selector_palette_order(const uint_vec &all_selector_indices) + { + const basisu_frontend &r = *m_pFront_end; + + uint_vec new_selector_hist(r.get_total_selector_clusters()); + for (uint32_t i = 0; i < all_selector_indices.size(); i++) + new_selector_hist[m_selector_remap_table_old_to_new[all_selector_indices[i]]]++; + + uint32_t max_hist_value = 0; + uint32_t max_hist_value_index = 0; + for (uint32_t i = 0; i < new_selector_hist.size(); i++) + { + if (new_selector_hist[i] > max_hist_value) + { + max_hist_value = new_selector_hist[i]; + max_hist_value_index = i; + } + } + + uint_vec optimized_selector_order; + + const uint32_t N = 32; + for (uint32_t i = 0; i < r.get_total_selector_clusters(); i += N) + { + const uint32_t e = minimum(i + N, r.get_total_selector_clusters()); + + if (do_excl_ranges_overlap(i, e, static_cast(max_hist_value_index) - 16, static_cast(max_hist_value_index) + 16)) + { + for (uint32_t j = i; j < e; j++) + optimized_selector_order.push_back(j); + continue; + } + + basist::etc1_selector_palette_entry prev_entry(m_selector_palette[m_selector_remap_table_new_to_old[i]]); + + optimized_selector_order.push_back(i); + + uint_vec remaining_entries; + for (uint32_t j = i + 1; j < e; j++) + remaining_entries.push_back(j); + + for (uint32_t j = i + 1; j < e; j++) + { + uint32_t best_dist = UINT32_MAX; + uint32_t best_entry = 0; + + for (uint32_t k = 0; k < remaining_entries.size(); k++) + { + uint32_t dist = prev_entry.calc_hamming_dist(m_selector_palette[m_selector_remap_table_new_to_old[remaining_entries[k]]]); + if (dist < best_dist) + { + best_dist = dist; + best_entry = k; + } + } + + optimized_selector_order.push_back(remaining_entries[best_entry]); + + prev_entry = m_selector_palette[m_selector_remap_table_new_to_old[remaining_entries[best_entry]]]; + + remaining_entries.erase(remaining_entries.begin() + best_entry); + } + } + + uint_vec temp(r.get_total_selector_clusters()); + for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++) + temp[i] = m_selector_remap_table_new_to_old[optimized_selector_order[i]]; + + m_selector_remap_table_new_to_old = temp; + + for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++) + m_selector_remap_table_old_to_new[m_selector_remap_table_new_to_old[i]] = i; + } + + bool basisu_backend::encode_image() + { + const basisu_frontend &r = *m_pFront_end; + + uint_vec endpoint_histogram(r.get_total_endpoint_clusters() * 2); + uint_vec selector_histogram(r.get_total_selector_clusters() * 2); + uint_vec actual_selector_histogram(r.get_total_selector_clusters()); + + // TODO: Choose the size in an intelligent way (try different sizes?) + const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64; + basist::approx_move_to_front selector_history_buf(MAX_SELECTOR_HISTORY_BUF_SIZE); + histogram selector_history_buf_histogram(MAX_SELECTOR_HISTORY_BUF_SIZE); + + uint32_t total_used_selector_history_buf = 0; + + histogram delta_endpoint_histogram(r.get_total_endpoint_clusters() * 2); + histogram delta_selector_histogram(MAX_SELECTOR_HISTORY_BUF_SIZE + r.get_total_selector_clusters() * 2 + 1); + histogram template_histogram(basist::TOTAL_ENDPOINT_INDEX_TEMPLATES); + + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); + + const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = r.get_total_selector_clusters() * 2; + const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + MAX_SELECTOR_HISTORY_BUF_SIZE; + + histogram selector_history_buf_rle_histogram(1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); + + uint32_t total_selector_indices_remapped = 0; + + std::vector selector_syms(m_slices.size()); + + m_output.m_slice_image_crcs.resize(m_slices.size()); + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + const uint32_t width = m_slices[slice_index].m_width; + const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + const uint32_t num_macroblocks_x = m_slices[slice_index].m_num_macroblocks_x; + const uint32_t num_macroblocks_y = m_slices[slice_index].m_num_macroblocks_y; + + selector_history_buf.reset(); + + int prev_endpoint_index = 0; + int prev_selector_index = 0; + int selector_history_buf_rle_count = 0; + + gpu_image gi; + gi.init(cETC1, width, height); + + for (uint32_t macroblock_y = 0; macroblock_y < num_macroblocks_y; macroblock_y++) + { + const uint32_t y = macroblock_y * 2; + + const int x_start = (macroblock_y & 1) ? (num_macroblocks_x - 1) : 0; + const int x_end = (macroblock_y & 1) ? -1 : num_macroblocks_x; + const int x_dir = (macroblock_y & 1) ? -1 : 1; + + for (int macroblock_x = x_start; macroblock_x != x_end; macroblock_x += x_dir) + { + const uint32_t x = macroblock_x * 2; + + uint32_t block_indices[4]; + block_indices[0] = first_block_index + x + y * num_blocks_x; + block_indices[1] = first_block_index + minimum(x + 1, num_blocks_x - 1) + y * num_blocks_x; + block_indices[2] = first_block_index + x + minimum(y + 1, num_blocks_y - 1) * num_blocks_x; + block_indices[3] = first_block_index + minimum(x + 1, num_blocks_x - 1) + minimum(y + 1, num_blocks_y - 1) * num_blocks_x; + + etc1_macroblock &m = m_slice_macroblocks[slice_index](macroblock_x, macroblock_y); + + template_histogram.inc(m.m_template_index); + + for (uint32_t i = 0; i < m.m_endpoint_indices.size(); i++) + { + int idx = m_endpoint_remap_table_old_to_new[m.m_endpoint_indices[i]]; + + int delta_idx = idx - prev_endpoint_index; + prev_endpoint_index = idx; + + m.m_endpoint_indices[i] = idx; + m.m_endpoint_delta_indices.push_back(delta_idx); + + delta_endpoint_histogram.inc(delta_idx + r.get_total_endpoint_clusters()); + + endpoint_histogram[r.get_total_endpoint_clusters() + delta_idx]++; + } + + for (uint32_t i = 0; i < m.m_selector_indices.size(); i++) + { + int idx = m_selector_remap_table_old_to_new[m.m_selector_indices[i]]; + + int selector_history_buf_index = -1; + +#if 1 + if (m_params.m_delta_selector_rdo_quality_thresh > 0.0f) + { + const pixel_block &src_pixels = r.get_source_pixel_block(block_indices[i]); + + etc_block etc_blk(r.get_output_block(block_indices[i])); + + color_rgba etc_blk_unpacked[16]; + unpack_etc1(etc_blk, etc_blk_unpacked); + + uint64_t cur_err = 0; + for (uint32_t p = 0; p < 16; p++) + cur_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); + + uint64_t best_trial_err = UINT64_MAX; + int best_trial_idx = 0; + uint32_t best_trial_history_buf_idx = 0; + + //int cur_delta_idx = idx - prev_selector_index; + + etc_block best_trial_etc_block; + + const float SELECTOR_REMAP_THRESH = maximum(1.0f, m_params.m_delta_selector_rdo_quality_thresh); //2.5f; + + for (uint32_t j = 0; j < selector_history_buf.size(); j++) + { + int trial_idx = selector_history_buf[j]; + + for (uint32_t sy = 0; sy < 4; sy++) + for (uint32_t sx = 0; sx < 4; sx++) + etc_blk.set_selector(sx, sy, m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]](sx, sy)); + + unpack_etc1(etc_blk, etc_blk_unpacked); + + uint64_t trial_err = 0; + for (uint32_t p = 0; p < 16; p++) + trial_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); + + if (trial_err <= cur_err * SELECTOR_REMAP_THRESH) + { + //int trial_delta_idx = trial_idx - prev_selector_index; + + if (trial_err < best_trial_err) + { + best_trial_err = trial_err; + best_trial_idx = trial_idx; + best_trial_etc_block = etc_blk; + best_trial_history_buf_idx = j; + } + } + } + + if (best_trial_err != UINT64_MAX) + { + idx = best_trial_idx; + + //total_selector_indices_remapped++; + + total_used_selector_history_buf++; + + selector_history_buf_index = best_trial_history_buf_idx; + + selector_history_buf_histogram.inc(best_trial_history_buf_idx); + } + } +#endif + +#if 1 + if ((selector_history_buf_index < 0) && (m_params.m_delta_selector_rdo_quality_thresh > 0.0f)) + { + const pixel_block &src_pixels = r.get_source_pixel_block(block_indices[i]); + + etc_block etc_blk(r.get_output_block(block_indices[i])); + + color_rgba etc_blk_unpacked[16]; + unpack_etc1(etc_blk, etc_blk_unpacked); + + uint64_t cur_err = 0; + for (uint32_t p = 0; p < 16; p++) + cur_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); + + uint64_t best_trial_err = UINT64_MAX; + int best_trial_idx = 0; + + int cur_delta_idx = idx - prev_selector_index; + + etc_block best_trial_etc_block; + + const float SELECTOR_REMAP_THRESH = maximum(1.0f, m_params.m_delta_selector_rdo_quality_thresh); //2.5f; + + for (int d = -cur_delta_idx + 1; d < cur_delta_idx; d++) + { + int trial_idx = prev_selector_index + d; + if (trial_idx < 0) + continue; + else if (trial_idx >= static_cast(r.get_total_selector_clusters())) + continue; + + if (trial_idx == idx) + continue; + + //etc_blk.set_raw_selector_bits(r.get_selector_cluster_selector_bits(m_selector_remap_table_new_to_old[trial_idx]).get_raw_selector_bits()); + for (uint32_t sy = 0; sy < 4; sy++) + for (uint32_t sx = 0; sx < 4; sx++) + etc_blk.set_selector(sx, sy, m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]](sx, sy)); + + unpack_etc1(etc_blk, etc_blk_unpacked); + + uint64_t trial_err = 0; + for (uint32_t p = 0; p < 16; p++) + trial_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); + + if (trial_err < cur_err * SELECTOR_REMAP_THRESH) + { + int trial_delta_idx = trial_idx - prev_selector_index; + + const int N = r.get_total_selector_clusters() / 4; + if (iabs(trial_delta_idx) < (uint32_t)N) + { + float f = iabs(trial_delta_idx) / float(N); + + f = powf(f, 2.0f); + + trial_err = static_cast(trial_err * lerp(.4f, 1.0f, f)); + } + + if (trial_err < best_trial_err) + { + best_trial_err = trial_err; + best_trial_idx = trial_idx; + best_trial_etc_block = etc_blk; + } + } + } + + if (best_trial_err != UINT64_MAX) + { + idx = best_trial_idx; + + total_selector_indices_remapped++; + } + } // if (m_params.m_delta_selector_rdo_quality_thresh >= 1.0f) +#endif + + int delta_idx = idx - prev_selector_index; + prev_selector_index = idx; + + m.m_selector_indices[i] = m_selector_remap_table_new_to_old[idx]; + + if ((selector_history_buf_rle_count) && (selector_history_buf_index != 0)) + { + if (selector_history_buf_rle_count >= (int)SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH) + { + selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX); + selector_syms[slice_index].push_back(selector_history_buf_rle_count); + + int run_sym = selector_history_buf_rle_count - SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH; + if (run_sym >= ((int)SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1)) + selector_history_buf_rle_histogram.inc(SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1); + else + selector_history_buf_rle_histogram.inc(run_sym); + + delta_selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX); + } + else + { + for (int k = 0; k < selector_history_buf_rle_count; k++) + { + uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0; + + selector_syms[slice_index].push_back(sym_index); + + delta_selector_histogram.inc(sym_index); + } + } + + selector_history_buf_rle_count = 0; + } + + if (selector_history_buf_index >= 0) + { + if (selector_history_buf_index == 0) + selector_history_buf_rle_count++; + else + { + uint32_t delta_indices_sym = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + selector_history_buf_index; + + selector_syms[slice_index].push_back(delta_indices_sym); + + delta_selector_histogram.inc(delta_indices_sym); + } + } + else + { + uint32_t delta_indices_sym = delta_idx + r.get_total_selector_clusters(); + + selector_syms[slice_index].push_back(delta_indices_sym); + + delta_selector_histogram.inc(delta_indices_sym); + } + + m.m_selector_delta_indices.push_back(delta_idx); + m.m_selector_history_buf_indices.push_back(selector_history_buf_index); + + actual_selector_histogram[idx]++; + selector_histogram[r.get_total_selector_clusters() + delta_idx]++; + + if (selector_history_buf_index < 0) + selector_history_buf.add(idx); + else if (selector_history_buf.size()) + selector_history_buf.use(selector_history_buf_index); + } + + for (uint32_t i = 0; i < 4; i++) + { + const uint32_t block_x = macroblock_x * 2 + (i & 1); + const uint32_t block_y = macroblock_y * 2 + (i / 2); + if ((block_x >= gi.get_blocks_x()) || (block_y >= gi.get_blocks_y())) + continue; + + etc_block &output_block = *(etc_block *)gi.get_block_ptr(block_x, block_y); + + output_block.set_diff_bit(((m.m_diff_bits << i) & 8) != 0); + output_block.set_flip_bit(((m.m_flip_bits << i) & 8) != 0); + + const basist::endpoint_index_template &t = basist::g_endpoint_index_templates[m.m_template_index]; + + uint32_t e0 = m_endpoint_remap_table_new_to_old[m.m_endpoint_indices[t.m_local_indices[i * 2 + 0]]]; + uint32_t e1 = m_endpoint_remap_table_new_to_old[m.m_endpoint_indices[t.m_local_indices[i * 2 + 1]]]; + + if (output_block.get_diff_bit()) + { + BASISU_BACKEND_VERIFY(m_endpoint_palette[e0].m_color5_valid); + BASISU_BACKEND_VERIFY(m_endpoint_palette[e1].m_color5_valid); + + if (!output_block.set_block_color5_check(m_endpoint_palette[e0].m_color5, m_endpoint_palette[e1].m_color5)) + { + BASISU_BACKEND_VERIFY(0); + } + + output_block.set_inten_table(0, m_endpoint_palette[e0].m_inten5); + output_block.set_inten_table(1, m_endpoint_palette[e1].m_inten5); + } + else + { + BASISU_BACKEND_VERIFY(false); + } + + uint32_t selector_idx = m.m_selector_indices[i]; + const basist::etc1_selector_palette_entry &selectors = m_selector_palette[selector_idx]; + for (uint32_t sy = 0; sy < 4; sy++) + for (uint32_t sx = 0; sx < 4; sx++) + output_block.set_selector(sx, sy, selectors(sx, sy)); + } + + } // macroblock_x + + } // macroblock_y + + if (selector_history_buf_rle_count) + { + if (selector_history_buf_rle_count >= (int)SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH) + { + selector_syms[slice_index].push_back(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX); + selector_syms[slice_index].push_back(selector_history_buf_rle_count); + + int run_sym = selector_history_buf_rle_count - SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH; + if (run_sym >= ((int)SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1)) + selector_history_buf_rle_histogram.inc(SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1); + else + selector_history_buf_rle_histogram.inc(run_sym); + + delta_selector_histogram.inc(SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX); + } + else + { + for (int i = 0; i < selector_history_buf_rle_count; i++) + { + uint32_t sym_index = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + 0; + + selector_syms[slice_index].push_back(sym_index); + + delta_selector_histogram.inc(sym_index); + } + } + + selector_history_buf_rle_count = 0; + } + + m_output.m_slice_image_crcs[slice_index] = basist::crc16(gi.get_ptr(), gi.get_size_in_bytes(), 0); + + if (m_params.m_debug_images) + { + image gi_unpacked; + gi.unpack(gi_unpacked); + + char buf[256]; +#ifdef _WIN32 + sprintf_s(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); +#else + snprintf(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); +#endif + save_png(buf, gi_unpacked); + } + + } // slice_index + + debug_printf("Total selector indices remapped: %u %3.2f%%, Used history buf: %u %3.2f%%\n", + total_selector_indices_remapped, total_selector_indices_remapped * 100.0f / (get_total_macroblocks() * 4), + total_used_selector_history_buf, total_used_selector_history_buf * 100.0f / (get_total_macroblocks() * 4)); + + if (m_params.m_debug_images) + { + //draw_histogram_chart("delta_endpoint_hist.png", "Delta Endpoint Histogram", endpoint_histogram); + //draw_histogram_chart("delta_selector_hist.png", "Delta Selector Histogram", selector_histogram); + //draw_histogram_chart("selector_hist.png", "Selector Histogram", actual_selector_histogram); + } + + double delta_endpoint_entropy = delta_endpoint_histogram.get_entropy() / delta_endpoint_histogram.get_total(); + double delta_selector_entropy = delta_selector_histogram.get_entropy() / delta_selector_histogram.get_total(); + double template_entropy = template_histogram.get_entropy() / template_histogram.get_total(); + + debug_printf("Entropy: AvgEndpoints/macroblock: %3.3f DeltaEndpoint: %3.3f DeltaSelector: %3.3f Template: %3.3f\n", + static_cast(delta_endpoint_histogram.get_total()) / get_total_macroblocks(), + delta_endpoint_entropy, delta_selector_entropy, template_entropy); + + huffman_encoding_table template_model; + if (!template_model.init(template_histogram, 16)) + { + error_printf("template_model.init() failed!"); + return false; + } + + huffman_encoding_table delta_endpoint_model; + if (!delta_endpoint_model.init(delta_endpoint_histogram, 16)) + { + error_printf("delta_endpoint_model.init() failed!"); + return false; + } + + BASISU_ASSUME(basisu_frontend::cMaxEndpointClusterBits <= 15); + uint32_t max_delta_selector_code_size = ceil_log2i(r.get_total_selector_clusters() * 2) + 2; + + max_delta_selector_code_size = clamp(max_delta_selector_code_size, 10, 15); + + if (m_params.m_debug_images) + { + uint_vec delta_selector_plot_histogram(delta_selector_histogram.size()); + for (uint32_t i = 0; i < delta_selector_histogram.size(); i++) + delta_selector_plot_histogram[i] = delta_selector_histogram[i]; + //draw_histogram_chart("delta_selector_symbol_hist.png", "Delta Selector Symbol Histogram", delta_selector_plot_histogram); + } + + huffman_encoding_table delta_selector_model; + if (!delta_selector_model.init(delta_selector_histogram, max_delta_selector_code_size)) + { + error_printf("delta_selector_model.init() failed!"); + return false; + } + + if (!selector_history_buf_rle_histogram.get_total()) + selector_history_buf_rle_histogram.inc(0); + + huffman_encoding_table selector_history_buf_rle_model; + if (!selector_history_buf_rle_model.init(selector_history_buf_rle_histogram, 15)) + { + error_printf("selector_history_buf_rle_model.init() failed!"); + return false; + } + + bitwise_coder coder; + coder.init(1024 * 1024 * 4); + + uint32_t template_model_bits = coder.emit_huffman_table(template_model); + uint32_t delta_endpoint_model_bits = coder.emit_huffman_table(delta_endpoint_model); + uint32_t delta_selector_model_bits = coder.emit_huffman_table(delta_selector_model); + uint32_t selector_history_buf_run_sym_bits = coder.emit_huffman_table(selector_history_buf_rle_model); + + coder.put_bits(MAX_SELECTOR_HISTORY_BUF_SIZE, 13); + + const uint32_t SELECTOR_HISTORY_BUF_RUN_RICE_BITS = 3; + coder.put_bits(SELECTOR_HISTORY_BUF_RUN_RICE_BITS, 4); + + debug_printf("Model sizes: Template: %u DeltaEndpoint: %u (%3.3f bpp) DeltaSelector: %u (%3.3f bpp) SelectorHistBufRLE: %u (%3.3f bpp)\n", + (template_model_bits + 7) / 8, + (delta_endpoint_model_bits + 7) / 8, delta_endpoint_model_bits / float(get_total_input_texels()), + (delta_selector_model_bits + 7) / 8, delta_selector_model_bits / float(get_total_input_texels()), + (selector_history_buf_run_sym_bits + 7) / 8, selector_history_buf_run_sym_bits / float(get_total_input_texels())); + + coder.flush(); + + m_output.m_slice_image_tables = coder.get_bytes(); + + uint32_t total_template_bits = 0, total_delta_endpoint_bits = 0, total_delta_selector_bits = 0; + + uint32_t total_image_bytes = 0; + + m_output.m_slice_image_data.resize(m_slices.size()); + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t width = m_slices[slice_index].m_width; + const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + const uint32_t num_macroblocks_x = m_slices[slice_index].m_num_macroblocks_x; + const uint32_t num_macroblocks_y = m_slices[slice_index].m_num_macroblocks_y; + + coder.init(1024 * 1024 * 4); + + uint32_t cur_selector_sym_ofs = 0; + uint32_t selector_rle_count = 0; + + for (uint32_t macroblock_y = 0; macroblock_y < num_macroblocks_y; macroblock_y++) + { + const int x_start = (macroblock_y & 1) ? (num_macroblocks_x - 1) : 0; + const int x_end = (macroblock_y & 1) ? -1 : num_macroblocks_x; + const int x_dir = (macroblock_y & 1) ? -1 : 1; + + for (int macroblock_x = x_start; macroblock_x != x_end; macroblock_x += x_dir) + { + const etc1_macroblock &m = m_slice_macroblocks[slice_index](macroblock_x, macroblock_y); + + total_template_bits += coder.put_code(m.m_template_index, template_model); + + for (uint32_t i = 0; i < m.m_endpoint_delta_indices.size(); i++) + total_delta_endpoint_bits += coder.put_code(m.m_endpoint_delta_indices[i] + r.get_total_endpoint_clusters(), delta_endpoint_model); + + for (uint32_t i = 0; i < 4; i++) + { + if (!selector_rle_count) + { + uint32_t selector_sym_index = selector_syms[slice_index][cur_selector_sym_ofs++]; + + if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX) + selector_rle_count = selector_syms[slice_index][cur_selector_sym_ofs++]; + + total_delta_selector_bits += coder.put_code(selector_sym_index, delta_selector_model); + + if (selector_sym_index == SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX) + { + int run_sym = selector_rle_count - SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH; + if (run_sym >= ((int)SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1)) + { + total_delta_selector_bits += coder.put_code(SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL - 1, selector_history_buf_rle_model); + total_delta_selector_bits += coder.put_rice(selector_rle_count - SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH, SELECTOR_HISTORY_BUF_RUN_RICE_BITS); + } + else + total_delta_selector_bits += coder.put_code(run_sym, selector_history_buf_rle_model); + } + } + + if (selector_rle_count) + selector_rle_count--; + } + + } // macroblock_x + + } // macroblock_y + + BASISU_BACKEND_VERIFY(cur_selector_sym_ofs == selector_syms[slice_index].size()); + + coder.flush(); + + m_output.m_slice_image_data[slice_index] = coder.get_bytes(); + + total_image_bytes += (uint32_t)coder.get_bytes().size(); + + debug_printf("Slice %u compressed size: %u bytes, %3.3f bits per slice texel\n", slice_index, m_output.m_slice_image_data[slice_index].size(), m_output.m_slice_image_data[slice_index].size() * 8.0f / (m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height)); + + } // slice_index + + const double total_texels = static_cast(get_total_input_texels()); + const double total_macroblocks = static_cast(get_total_macroblocks()); + + debug_printf("Total template bits: %u bytes: %u bits/texel: %3.3f bits/macroblock: %3.3f\n", total_template_bits, total_template_bits / 8, total_template_bits / total_texels, total_template_bits / total_macroblocks); + debug_printf("Total delta endpoint bits: %u bytes: %u bits/texel: %3.3f bits/macroblock: %3.3f\n", total_delta_endpoint_bits, total_delta_endpoint_bits / 8, total_delta_endpoint_bits / total_texels, total_delta_endpoint_bits / total_macroblocks); + debug_printf("Total delta selector bits: %u bytes: %u bits/texel: %3.3f bits/macroblock: %3.3f\n", total_delta_selector_bits, total_delta_selector_bits / 8, total_delta_selector_bits / total_texels, total_delta_selector_bits / total_macroblocks); + + debug_printf("Total table bytes: %u, Total image bytes: %u, %3.3f bits/texel\n", m_output.m_slice_image_tables.size(), total_image_bytes, total_image_bytes * 8.0f / total_texels); + + return true; + } + + bool basisu_backend::encode_endpoint_palette() + { + const basisu_frontend &r = *m_pFront_end; + + histogram color5_delta_hist(32 * 2 - 1); + histogram inten5_delta_hist(8 * 2 - 1); + + color_rgba prev_color5(0, 0, 0, 0); + int prev_inten5 = 0; + + // Maps NEW to OLD endpoints + uint_vec endpoint_remap_table_inv(r.get_total_endpoint_clusters()); + for (uint32_t old_endpoint_index = 0; old_endpoint_index < m_endpoint_remap_table_old_to_new.size(); old_endpoint_index++) + endpoint_remap_table_inv[m_endpoint_remap_table_old_to_new[old_endpoint_index]] = old_endpoint_index; + + for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++) + { + const uint32_t old_endpoint_index = endpoint_remap_table_inv[new_endpoint_index]; + + int delta_r5 = m_endpoint_palette[old_endpoint_index].m_color5[0] - prev_color5[0]; + int delta_g5 = m_endpoint_palette[old_endpoint_index].m_color5[1] - prev_color5[1]; + int delta_b5 = m_endpoint_palette[old_endpoint_index].m_color5[2] - prev_color5[2]; + int delta_inten5 = m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten5; + + prev_color5[0] = m_endpoint_palette[old_endpoint_index].m_color5[0]; + prev_color5[1] = m_endpoint_palette[old_endpoint_index].m_color5[1]; + prev_color5[2] = m_endpoint_palette[old_endpoint_index].m_color5[2]; + prev_inten5 = m_endpoint_palette[old_endpoint_index].m_inten5; + + color5_delta_hist.inc(31 + delta_r5); + color5_delta_hist.inc(31 + delta_g5); + color5_delta_hist.inc(31 + delta_b5); + inten5_delta_hist.inc(7 + delta_inten5); + } + + huffman_encoding_table color5_delta_model; + if (!color5_delta_model.init(color5_delta_hist, 16)) + { + error_printf("color5_delta_model.init() failed!"); + return false; + } + + huffman_encoding_table inten5_delta_model; + if (!inten5_delta_model.init(inten5_delta_hist, 16)) + { + error_printf("inten5_delta_model.init() failed!"); + return false; + } + + bitwise_coder coder; + + coder.init(1024 * 1024); + + coder.emit_huffman_table(color5_delta_model); + coder.emit_huffman_table(inten5_delta_model); + + prev_color5.set(0, 0, 0, 0); + prev_inten5 = 0; + + for (uint32_t q = 0; q < r.get_total_endpoint_clusters(); q++) + { + const uint32_t i = endpoint_remap_table_inv[q]; + + int delta_r5 = m_endpoint_palette[i].m_color5[0] - prev_color5[0]; + int delta_g5 = m_endpoint_palette[i].m_color5[1] - prev_color5[1]; + int delta_b5 = m_endpoint_palette[i].m_color5[2] - prev_color5[2]; + int delta_inten5 = m_endpoint_palette[i].m_inten5 - prev_inten5; + + prev_color5[0] = m_endpoint_palette[i].m_color5[0]; + prev_color5[1] = m_endpoint_palette[i].m_color5[1]; + prev_color5[2] = m_endpoint_palette[i].m_color5[2]; + prev_inten5 = m_endpoint_palette[i].m_inten5; + + coder.put_code(31 + delta_r5, color5_delta_model); + coder.put_code(31 + delta_g5, color5_delta_model); + coder.put_code(31 + delta_b5, color5_delta_model); + coder.put_code(7 + delta_inten5, inten5_delta_model); + + } // q + + coder.flush(); + + m_output.m_endpoint_palette = coder.get_bytes(); + + debug_printf("Endpoint palette size: %u, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n", + m_output.m_endpoint_palette.size(), m_output.m_endpoint_palette.size() * 8.0f / r.get_total_endpoint_clusters(), m_output.m_endpoint_palette.size() * 8.0f / get_total_input_texels()); + + return true; + } + + bool basisu_backend::encode_selector_palette() + { + const basisu_frontend &r = *m_pFront_end; + + if ((m_params.m_use_global_sel_codebook) && (!m_params.m_use_hybrid_sel_codebooks)) + { + histogram global_mod_indices(1 << m_params.m_global_sel_codebook_mod_bits); + + for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) + global_mod_indices.inc(m_global_selector_palette_desc[q].m_mod_index); + + huffman_encoding_table global_pal_model, global_mod_model; + + if (!global_mod_model.init(global_mod_indices, 16)) + { + error_printf("global_mod_model.init() failed!"); + return false; + } + + bitwise_coder coder; + coder.init(1024 * 1024); + + coder.put_bits(1, 1); // use global codebook + + coder.put_bits(m_params.m_global_sel_codebook_pal_bits, 4); // pal bits + coder.put_bits(m_params.m_global_sel_codebook_mod_bits, 4); // mod bits + + uint32_t mod_model_bits = 0; + if (m_params.m_global_sel_codebook_mod_bits) + mod_model_bits = coder.emit_huffman_table(global_mod_model); + + uint32_t total_pal_bits = 0; + uint32_t total_mod_bits = 0; + for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) + { + const uint32_t i = m_selector_remap_table_new_to_old[q]; + + if (m_params.m_global_sel_codebook_pal_bits) + { + coder.put_bits(m_global_selector_palette_desc[i].m_pal_index, m_params.m_global_sel_codebook_pal_bits); + total_pal_bits += m_params.m_global_sel_codebook_pal_bits; + } + + if (m_params.m_global_sel_codebook_mod_bits) + total_mod_bits += coder.put_code(m_global_selector_palette_desc[i].m_mod_index, global_mod_model); + } + + coder.flush(); + + m_output.m_selector_palette = coder.get_bytes(); + + debug_printf("Modifier model bits: %u Avg per entry: %3.3f\n", mod_model_bits, mod_model_bits / float(r.get_total_selector_clusters())); + debug_printf("Palette bits: %u Avg per entry: %3.3f, Modifier bits: %u Avg per entry: %3.3f\n", total_pal_bits, total_pal_bits / float(r.get_total_selector_clusters()), total_mod_bits, total_mod_bits / float(r.get_total_selector_clusters())); + } + else if (m_params.m_use_hybrid_sel_codebooks) + { + huff2D used_global_cb_bitflag_huff2D(1, 8); + + histogram global_mod_indices(1 << m_params.m_global_sel_codebook_mod_bits); + + for (uint32_t s = 0; s < r.get_total_selector_clusters(); s++) + { + const uint32_t q = m_selector_remap_table_new_to_old[s]; + + const bool used_global_cb_flag = r.get_selector_cluster_uses_global_cb_vec()[q]; + + used_global_cb_bitflag_huff2D.emit(used_global_cb_flag); + + global_mod_indices.inc(m_global_selector_palette_desc[q].m_mod_index); + } + + huffman_encoding_table global_mod_indices_model; + if (!global_mod_indices_model.init(global_mod_indices, 16)) + { + error_printf("global_mod_indices_model.init() failed!"); + return false; + } + + bitwise_coder coder; + coder.init(1024 * 1024); + + coder.put_bits(0, 1); // use global codebook + coder.put_bits(1, 1); // uses hybrid codebooks + + coder.put_bits(m_params.m_global_sel_codebook_pal_bits, 4); // pal bits + coder.put_bits(m_params.m_global_sel_codebook_mod_bits, 4); // mod bits + + used_global_cb_bitflag_huff2D.start_encoding(16); + coder.emit_huffman_table(used_global_cb_bitflag_huff2D.get_encoding_table()); + + if (m_params.m_global_sel_codebook_mod_bits) + coder.emit_huffman_table(global_mod_indices_model); + + uint32_t total_global_cb_entries = 0; + uint32_t total_pal_bits = 0; + uint32_t total_mod_bits = 0; + uint32_t total_selectors = 0; + uint32_t total_selector_bits = 0; + uint32_t total_flag_bits = 0; + + for (uint32_t s = 0; s < r.get_total_selector_clusters(); s++) + { + const uint32_t q = m_selector_remap_table_new_to_old[s]; + + total_flag_bits += used_global_cb_bitflag_huff2D.emit_next_sym(coder); + + const bool used_global_cb_flag = r.get_selector_cluster_uses_global_cb_vec()[q]; + + if (used_global_cb_flag) + { + total_global_cb_entries++; + + total_pal_bits += coder.put_bits(r.get_selector_cluster_global_selector_entry_ids()[q].m_palette_index, m_params.m_global_sel_codebook_pal_bits); + total_mod_bits += coder.put_code(r.get_selector_cluster_global_selector_entry_ids()[q].m_modifier.get_index(), global_mod_indices_model); + } + else + { + total_selectors++; + total_selector_bits += 32; + + for (uint32_t j = 0; j < 4; j++) + coder.put_bits(m_selector_palette[q].get_byte(j), 8); + } + } + + coder.flush(); + + m_output.m_selector_palette = coder.get_bytes(); + + debug_printf("Total global CB entries: %u %3.2f%%\n", total_global_cb_entries, total_global_cb_entries * 100.0f / r.get_total_selector_clusters()); + debug_printf("Total selector entries: %u %3.2f%%\n", total_selectors, total_selectors * 100.0f / r.get_total_selector_clusters()); + debug_printf("Total pal bits: %u, mod bits: %u, selector bits: %u, flag bits: %u\n", total_pal_bits, total_mod_bits, total_selector_bits, total_flag_bits); + } + else + { + histogram delta_selector_pal_histogram(256); + + for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) + { + if (!q) + continue; + + const basist::etc1_selector_palette_entry &cur = m_selector_palette[m_selector_remap_table_new_to_old[q]]; + const basist::etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]); + + for (uint32_t j = 0; j < 4; j++) + delta_selector_pal_histogram.inc(cur.get_byte(j) ^ predictor.get_byte(j)); + } + + if (!delta_selector_pal_histogram.get_total()) + delta_selector_pal_histogram.inc(0); + + huffman_encoding_table delta_selector_pal_model; + if (!delta_selector_pal_model.init(delta_selector_pal_histogram, 16)) + { + error_printf("delta_selector_pal_model.init() failed!"); + return false; + } + + bitwise_coder coder; + coder.init(1024 * 1024); + + coder.put_bits(0, 1); // use global codebook + coder.put_bits(0, 1); // uses hybrid codebooks + + coder.put_bits(0, 1); // raw bytes + + coder.emit_huffman_table(delta_selector_pal_model); + + for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) + { + if (!q) + { + for (uint32_t j = 0; j < 4; j++) + coder.put_bits(m_selector_palette[m_selector_remap_table_new_to_old[q]].get_byte(j), 8); + continue; + } + + const basist::etc1_selector_palette_entry &cur = m_selector_palette[m_selector_remap_table_new_to_old[q]]; + const basist::etc1_selector_palette_entry predictor(m_selector_palette[m_selector_remap_table_new_to_old[q - 1]]); + + for (uint32_t j = 0; j < 4; j++) + coder.put_code(cur.get_byte(j) ^ predictor.get_byte(j), delta_selector_pal_model); + } + + coder.flush(); + + m_output.m_selector_palette = coder.get_bytes(); + + if (m_output.m_selector_palette.size() >= r.get_total_selector_clusters() * 4) + { + coder.init(1024 * 1024); + + coder.put_bits(0, 1); // use global codebook + coder.put_bits(0, 1); // uses hybrid codebooks + + coder.put_bits(1, 1); // raw bytes + + for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) + { + const uint32_t i = m_selector_remap_table_new_to_old[q]; + + for (uint32_t j = 0; j < 4; j++) + coder.put_bits(m_selector_palette[i].get_byte(j), 8); + } + + coder.flush(); + + m_output.m_selector_palette = coder.get_bytes(); + } + + } // if (m_params.m_use_global_sel_codebook) + + debug_printf("Selector palette bytes: %u, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n", m_output.m_selector_palette.size(), m_output.m_selector_palette.size() * 8.0f / r.get_total_selector_clusters(), m_output.m_selector_palette.size() * 8.0f / get_total_input_texels()); + + return true; + } + + uint32_t basisu_backend::encode() + { + const basisu_frontend &r = *m_pFront_end; + + m_output.m_slice_desc = m_slices; + m_output.m_etc1s = m_params.m_etc1s; + m_output.m_num_endpoints = r.get_total_endpoint_clusters(); + m_output.m_num_selectors = r.get_total_selector_clusters(); + + create_endpoint_palette(); + create_selector_palette(); + + create_macroblocks(); + + if (!encode_image()) + return 0; + + if (!encode_endpoint_palette()) + return 0; + + if (!encode_selector_palette()) + return 0; + + uint32_t total_compressed_bytes = (uint32_t)(m_output.m_slice_image_tables.size() + m_output.m_endpoint_palette.size() + m_output.m_selector_palette.size()); + for (uint32_t i = 0; i < m_output.m_slice_image_data.size(); i++) + total_compressed_bytes += (uint32_t)m_output.m_slice_image_data[i].size(); + + debug_printf("Wrote %u bytes, %3.3f bits/texel\n", total_compressed_bytes, total_compressed_bytes * 8.0f / get_total_input_texels()); + + return total_compressed_bytes; + } + +} // namespace basisu diff --git a/basisu_backend.h b/basisu_backend.h new file mode 100644 index 0000000..f854e15 --- /dev/null +++ b/basisu_backend.h @@ -0,0 +1,331 @@ +// basisu_backend.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "transcoder/basisu.h" +#include "basisu_enc.h" +#include "transcoder/basisu_transcoder_internal.h" +#include "transcoder/basisu_global_selector_palette.h" +#include "basisu_frontend.h" + +namespace basisu +{ + struct etc1_macroblock + { + etc1_macroblock() + { + clear(); + } + + uint8_t m_diff_bits; + uint8_t m_flip_bits; + + int m_template_index; + + int_vec m_endpoint_indices; + int_vec m_selector_indices; + + int_vec m_endpoint_delta_indices; + int_vec m_selector_delta_indices; + int_vec m_selector_history_buf_indices; + + void clear() + { + m_diff_bits = 0; + m_flip_bits = 0; + m_template_index = 0; + m_endpoint_indices.clear(); + m_selector_indices.clear(); + m_endpoint_delta_indices.clear(); + m_selector_delta_indices.clear(); + m_selector_history_buf_indices.clear(); + } + }; + + typedef std::vector etc1_macroblock_vec; + typedef vector2D etc1_macroblock_vec2D; + + struct etc1_endpoint_palette_entry + { + etc1_endpoint_palette_entry() + { + clear(); + } + + color_rgba m_color5; + uint32_t m_inten5; + bool m_color5_valid; + + void clear() + { + clear_obj(*this); + } + }; + + typedef std::vector etc1_endpoint_palette_entry_vec; + + struct basisu_backend_params + { + bool m_etc1s; + bool m_debug, m_debug_images; + float m_delta_selector_rdo_quality_thresh; // 1.25f + + bool m_use_global_sel_codebook; + uint32_t m_global_sel_codebook_pal_bits; + uint32_t m_global_sel_codebook_mod_bits; + bool m_use_hybrid_sel_codebooks; + + basisu_backend_params() + { + clear(); + } + + void clear() + { + m_etc1s = false; + m_debug = false; + m_debug_images = false; + m_delta_selector_rdo_quality_thresh = 0.0f;//2.5f; + + m_use_global_sel_codebook = false; + m_global_sel_codebook_pal_bits = ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_PAL_BITS; + m_global_sel_codebook_mod_bits = basist::etc1_global_palette_entry_modifier::cTotalBits; + m_use_hybrid_sel_codebooks = false; + } + }; + + struct basisu_backend_slice_desc + { + uint32_t m_first_block_index; + + uint32_t m_orig_width; + uint32_t m_orig_height; + + uint32_t m_width; + uint32_t m_height; + + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + + uint32_t m_num_macroblocks_x; + uint32_t m_num_macroblocks_y; + + uint32_t m_source_file_index; + uint32_t m_mip_index; + bool m_alpha; + }; + + typedef std::vector basisu_backend_slice_desc_vec; + + struct basisu_backend_output + { + bool m_etc1s; + + uint32_t m_num_endpoints; + uint32_t m_num_selectors; + + uint8_vec m_endpoint_palette; + uint8_vec m_selector_palette; + + basisu_backend_slice_desc_vec m_slice_desc; + + uint8_vec m_slice_image_tables; + std::vector m_slice_image_data; + uint16_vec m_slice_image_crcs; + + basisu_backend_output() + { + clear(); + } + + void clear() + { + m_etc1s = false; + + m_num_endpoints = 0; + m_num_selectors = 0; + + m_endpoint_palette.clear(); + m_selector_palette.clear(); + m_slice_desc.clear(); + m_slice_image_tables.clear(); + m_slice_image_data.clear(); + m_slice_image_crcs.clear(); + } + + uint32_t get_output_size_estimate() const + { + uint32_t total_compressed_bytes = (uint32_t)(m_slice_image_tables.size() + m_endpoint_palette.size() + m_selector_palette.size()); + for (uint32_t i = 0; i < m_slice_image_data.size(); i++) + total_compressed_bytes += (uint32_t)m_slice_image_data[i].size(); + + return total_compressed_bytes; + } + }; + + class basisu_backend + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_backend); + + public: + + basisu_backend(); + + void clear(); + + void init(basisu_frontend *pFront_end, basisu_backend_params ¶ms, const basisu_backend_slice_desc_vec &slice_desc, const basist::etc1_global_selector_codebook *pGlobal_sel_codebook); + + uint32_t encode(); + + const basisu_backend_output &get_output() const { return m_output; } + + private: + const basisu_frontend *m_pFront_end; + basisu_backend_params m_params; + basisu_backend_slice_desc_vec m_slices; + basisu_backend_output m_output; + const basist::etc1_global_selector_codebook *m_pGlobal_sel_codebook; + + etc1_endpoint_palette_entry_vec m_endpoint_palette; + basist::etc1_selector_palette_entry_vec m_selector_palette; + + struct etc1_global_selector_cb_entry_desc + { + uint32_t m_pal_index; + uint32_t m_mod_index; + bool m_was_used; + }; + + typedef std::vector etc1_global_selector_cb_entry_desc_vec; + + etc1_global_selector_cb_entry_desc_vec m_global_selector_palette_desc; + + std::vector m_slice_macroblocks; + + // Maps OLD to NEW endpoint/selector indices + uint_vec m_endpoint_remap_table_old_to_new; + uint_vec m_endpoint_remap_table_new_to_old; + + uint_vec m_selector_remap_table_old_to_new; + + // Maps NEW to OLD endpoint/selector indices + uint_vec m_selector_remap_table_new_to_old; + + uint32_t get_total_slices() const + { + return (uint32_t)m_slices.size(); + } + + uint32_t get_total_slice_blocks() const + { + return m_pFront_end->get_total_output_blocks(); + } + + uint32_t get_block_index(uint32_t slice_index, uint32_t block_x, uint32_t block_y) const + { + const basisu_backend_slice_desc &slice = m_slices[slice_index]; + + assert((block_x < slice.m_num_blocks_x) && (block_y < slice.m_num_blocks_y)); + + return slice.m_first_block_index + block_y * slice.m_num_blocks_x + block_x; + } + + uint32_t get_num_macroblocks_x(uint32_t slice_index) const + { + return m_slices[slice_index].m_num_macroblocks_x; + } + + uint32_t get_num_macroblocks_y(uint32_t slice_index) const + { + return m_slices[slice_index].m_num_macroblocks_y; + } + + uint32_t get_total_macroblocks(uint32_t slice_index) const + { + return m_slices[slice_index].m_num_macroblocks_x * m_slices[slice_index].m_num_macroblocks_y; + } + + uint32_t get_total_macroblocks() const + { + uint32_t total_macroblocks = 0; + for (uint32_t i = 0; i < m_slices.size(); i++) + total_macroblocks += get_total_macroblocks(i); + return total_macroblocks; + } + + // Returns the total number of input texels, not counting padding up to blocks/macroblocks. + uint32_t get_total_input_texels(uint32_t slice_index) const + { + return m_slices[slice_index].m_orig_width * m_slices[slice_index].m_orig_height; + } + + uint32_t get_total_input_texels() const + { + uint32_t total_texels = 0; + for (uint32_t i = 0; i < m_slices.size(); i++) + total_texels += get_total_input_texels(i); + return total_texels; + } + + int find_slice(uint32_t block_index, uint32_t *pBlock_x, uint32_t *pBlock_y) const + { + for (uint32_t i = 0; i < m_slices.size(); i++) + { + if ((block_index >= m_slices[i].m_first_block_index) && (block_index < (m_slices[i].m_first_block_index + m_slices[i].m_num_blocks_x * m_slices[i].m_num_blocks_y))) + { + const uint32_t ofs = block_index - m_slices[i].m_first_block_index; + const uint32_t x = ofs % m_slices[i].m_num_blocks_x; + const uint32_t y = ofs / m_slices[i].m_num_blocks_x; + + if (pBlock_x) *pBlock_x = x; + if (pBlock_y) *pBlock_y = y; + + return i; + } + } + return -1; + } + + void create_endpoint_palette(); + + void create_selector_palette(); + + // endpoint palette + // 5:5:5 and predicted 4:4:4 colors, 1 or 2 3-bit intensity table indices + // selector palette + // 4x4 2-bit selectors + + // per-macroblock: + // 4 diff bits + // 4 flip bits + // Endpoint template index, 1-8 endpoint indices + // Alternately, if no template applies, we can send 4 ETC1S bits followed by 4-8 endpoint indices + // 4 selector indices + + float selector_zeng_similarity_func(uint32_t index_a, uint32_t index_b, void *pContext); + + void create_macroblocks(); + + void optimize_selector_palette_order(const uint_vec &all_selector_indices); + + bool encode_image(); + + bool encode_endpoint_palette(); + + bool encode_selector_palette(); + }; + +} // namespace basisu + diff --git a/basisu_basis_file.cpp b/basisu_basis_file.cpp new file mode 100644 index 0000000..b2799d0 --- /dev/null +++ b/basisu_basis_file.cpp @@ -0,0 +1,199 @@ +// basisu_basis_file.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_basis_file.h" +#include "transcoder/basisu_transcoder.h" + +// The output file version. Keep in sync with BASISD_SUPPORTED_BASIS_VERSION. +#define BASIS_FILE_VERSION (0x11) + +namespace basisu +{ + void basisu_file::create_header(const basisu_backend_output &encoder_output, uint32_t userdata0, uint32_t userdata1, bool y_flipped) + { + m_header.m_header_size = sizeof(basist::basis_file_header); + + m_header.m_data_size = m_total_file_size - sizeof(basist::basis_file_header); + + m_header.m_total_slices = (uint32_t)encoder_output.m_slice_desc.size(); + + m_header.m_total_images = 0; + for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) + m_header.m_total_images = maximum(m_header.m_total_images, encoder_output.m_slice_desc[i].m_source_file_index + 1); + + m_header.m_format = basist::cETC1; + m_header.m_flags = 0; + + if (encoder_output.m_etc1s) + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagETC1S; + + if (y_flipped) + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagYFlipped; + + for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) + { + if (encoder_output.m_slice_desc[i].m_alpha) + { + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagHasAlphaSlices; + break; + } + } + + m_header.m_userdata0 = userdata0; + m_header.m_userdata1 = userdata1; + + m_header.m_total_endpoints = encoder_output.m_num_endpoints; + m_header.m_endpoint_cb_file_ofs = m_endpoint_cb_file_ofs; + m_header.m_endpoint_cb_file_size = (uint32_t)encoder_output.m_endpoint_palette.size(); + + m_header.m_total_selectors = encoder_output.m_num_selectors; + m_header.m_selector_cb_file_ofs = m_selector_cb_file_ofs; + m_header.m_selector_cb_file_size = (uint32_t)encoder_output.m_selector_palette.size(); + + m_header.m_tables_file_ofs = m_tables_file_ofs; + m_header.m_tables_file_size = (uint32_t)encoder_output.m_slice_image_tables.size(); + + m_header.m_slice_desc_file_ofs = m_slice_descs_file_ofs; + } + + bool basisu_file::create_image_descs(const basisu_backend_output &encoder_output) + { + const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc; + + m_images_descs.resize(slice_descs.size()); + + uint64_t cur_slice_file_ofs = m_first_image_file_ofs; + for (uint32_t i = 0; i < slice_descs.size(); i++) + { + clear_obj(m_images_descs[i]); + + m_images_descs[i].m_image_index = slice_descs[i].m_source_file_index; + m_images_descs[i].m_level_index = slice_descs[i].m_mip_index; + + if (slice_descs[i].m_alpha) + m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsIsAlphaData; + + m_images_descs[i].m_orig_width = slice_descs[i].m_orig_width; + m_images_descs[i].m_orig_height = slice_descs[i].m_orig_height; + m_images_descs[i].m_num_blocks_x = slice_descs[i].m_num_blocks_x; + m_images_descs[i].m_num_blocks_y = slice_descs[i].m_num_blocks_y; + m_images_descs[i].m_slice_data_crc16 = encoder_output.m_slice_image_crcs[i]; + + if (encoder_output.m_slice_image_data[i].size() > UINT32_MAX) + { + error_printf("basisu_file::create_image_descs: Basis file too large\n"); + return false; + } + + const uint32_t image_size = (uint32_t)encoder_output.m_slice_image_data[i].size(); + + m_images_descs[i].m_file_ofs = (uint32_t)cur_slice_file_ofs; + m_images_descs[i].m_file_size = image_size; + + cur_slice_file_ofs += image_size; + if (cur_slice_file_ofs > UINT32_MAX) + { + error_printf("basisu_file::create_image_descs: Basis file too large\n"); + return false; + } + } + + assert(cur_slice_file_ofs == m_total_file_size); + return true; + } + + void basisu_file::create_comp_data(const basisu_backend_output &encoder_output) + { + const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc; + + append_vector(m_comp_data, reinterpret_cast(&m_header), sizeof(m_header)); + + assert(m_comp_data.size() == m_slice_descs_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&m_images_descs[0]), m_images_descs.size() * sizeof(m_images_descs[0])); + + assert(m_comp_data.size() == m_endpoint_cb_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_endpoint_palette[0]), encoder_output.m_endpoint_palette.size()); + + assert(m_comp_data.size() == m_selector_cb_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_selector_palette[0]), encoder_output.m_selector_palette.size()); + + assert(m_comp_data.size() == m_tables_file_ofs); + append_vector(m_comp_data, reinterpret_cast(&encoder_output.m_slice_image_tables[0]), encoder_output.m_slice_image_tables.size()); + + assert(m_comp_data.size() == m_first_image_file_ofs); + for (uint32_t i = 0; i < slice_descs.size(); i++) + append_vector(m_comp_data, &encoder_output.m_slice_image_data[i][0], encoder_output.m_slice_image_data[i].size()); + + assert(m_comp_data.size() == m_total_file_size); + } + + void basisu_file::fixup_crcs() + { + basist::basis_file_header *pHeader = reinterpret_cast(&m_comp_data[0]); + + pHeader->m_data_size = m_total_file_size - sizeof(basist::basis_file_header); + pHeader->m_data_crc16 = basist::crc16(&m_comp_data[0] + sizeof(basist::basis_file_header), m_total_file_size - sizeof(basist::basis_file_header), 0); + + pHeader->m_header_crc16 = basist::crc16(&pHeader->m_data_size, sizeof(basist::basis_file_header) - BASISU_OFFSETOF(basist::basis_file_header, m_data_size), 0); + + pHeader->m_sig = basist::basis_file_header::cBASISSigValue; + pHeader->m_ver = BASIS_FILE_VERSION;// basist::basis_file_header::cBASISFirstVersion; + } + + bool basisu_file::init(const basisu_backend_output &encoder_output, uint32_t userdata0, uint32_t userdata1, bool y_flipped) + { + clear(); + + const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc; + + // The Basis file uses 32-bit fields for lots of stuff, so make sure it's not too large. + uint64_t check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() + + (uint64_t)encoder_output.m_endpoint_palette.size() + (uint64_t)encoder_output.m_selector_palette.size() + (uint64_t)encoder_output.m_slice_image_tables.size(); + if (check_size >= 0xFFFF0000ULL) + { + error_printf("basisu_file::init: File is too large!\n"); + return false; + } + + m_header_file_ofs = 0; + m_slice_descs_file_ofs = sizeof(basist::basis_file_header); + m_endpoint_cb_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + m_selector_cb_file_ofs = m_endpoint_cb_file_ofs + (uint32_t)encoder_output.m_endpoint_palette.size(); + m_tables_file_ofs = m_selector_cb_file_ofs + (uint32_t)encoder_output.m_selector_palette.size(); + m_first_image_file_ofs = m_tables_file_ofs + (uint32_t)encoder_output.m_slice_image_tables.size(); + + uint64_t total_file_size = m_first_image_file_ofs; + for (uint32_t i = 0; i < encoder_output.m_slice_image_data.size(); i++) + total_file_size += encoder_output.m_slice_image_data[i].size(); + if (total_file_size >= 0xFFFF0000ULL) + { + error_printf("basisu_file::init: File is too large!\n"); + return false; + } + + m_total_file_size = (uint32_t)total_file_size; + + create_header(encoder_output, userdata0, userdata1, y_flipped); + + if (!create_image_descs(encoder_output)) + return false; + + create_comp_data(encoder_output); + + fixup_crcs(); + + return true; + } + +} // namespace basisu diff --git a/basisu_basis_file.h b/basisu_basis_file.h new file mode 100644 index 0000000..ffbb119 --- /dev/null +++ b/basisu_basis_file.h @@ -0,0 +1,70 @@ +// basisu_basis_file.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "transcoder/basisu_file_headers.h" +#include "basisu_backend.h" + +namespace basisu +{ + class basisu_file + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_file); + + public: + basisu_file() + { + } + + void clear() + { + m_comp_data.clear(); + + clear_obj(m_header); + m_images_descs.clear(); + + m_header_file_ofs = 0; + m_slice_descs_file_ofs = 0; + m_endpoint_cb_file_ofs = 0; + m_selector_cb_file_ofs = 0; + m_tables_file_ofs = 0; + m_first_image_file_ofs = 0; + m_total_file_size = 0; + } + + bool init(const basisu_backend_output& encoder_output, uint32_t userdata0 = 0, uint32_t userdata1 = 0, bool y_flipped = false); + + const uint8_vec &get_compressed_data() const { return m_comp_data; } + + private: + basist::basis_file_header m_header; + std::vector m_images_descs; + + uint8_vec m_comp_data; + + uint32_t m_header_file_ofs; + uint32_t m_slice_descs_file_ofs; + uint32_t m_endpoint_cb_file_ofs; + uint32_t m_selector_cb_file_ofs; + uint32_t m_tables_file_ofs; + uint32_t m_first_image_file_ofs; + uint32_t m_total_file_size; + + void create_header(const basisu_backend_output& encoder_output, uint32_t userdata0, uint32_t userdata1, bool y_flipped); + bool create_image_descs(const basisu_backend_output& encoder_output); + void create_comp_data(const basisu_backend_output& encoder_output); + void fixup_crcs(); + }; + +} // namespace basisu diff --git a/basisu_comp.cpp b/basisu_comp.cpp new file mode 100644 index 0000000..72d5725 --- /dev/null +++ b/basisu_comp.cpp @@ -0,0 +1,1059 @@ +// basisu_comp.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_comp.h" +#include "basisu_enc.h" +#include + +#define BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN 0 +#define DEBUG_RESIZE_TEXTURE_TO_64x64 (0) +#define DEBUG_EXTRACT_SINGLE_BLOCK (0) + +namespace basisu +{ + basis_compressor::basis_compressor() : + m_total_blocks(0), + m_auto_global_sel_pal(false), + m_basis_file_size(0), + m_basis_bits_per_texel(0), + m_any_source_image_has_alpha(false) + { + debug_printf("basis_compressor::basis_compressor\n"); + } + + bool basis_compressor::init(const basis_compressor_params ¶ms) + { + debug_printf("basis_compressor::init\n"); + + m_params = params; + + if (m_params.m_debug) + { + debug_printf("basis_compressor::init:\n"); + +#define PRINT_BOOL_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); +#define PRINT_INT_VALUE(v) debug_printf("%s: %i %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); +#define PRINT_UINT_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); +#define PRINT_FLOAT_VALUE(v) debug_printf("%s: %f %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); + + debug_printf("Has global selector codebook: %i\n", m_params.m_pSel_codebook != nullptr); + + debug_printf("Source images: %u, source filenames: %u, source alpha filenames: %i\n", + (uint32_t)m_params.m_source_images.size(), (uint32_t)m_params.m_source_filenames.size(), (uint32_t)m_params.m_source_alpha_filenames.size()); + + PRINT_BOOL_VALUE(m_y_flip); + PRINT_BOOL_VALUE(m_debug); + PRINT_BOOL_VALUE(m_debug_images); + PRINT_BOOL_VALUE(m_global_sel_pal); + PRINT_BOOL_VALUE(m_no_auto_global_sel_pal); + PRINT_BOOL_VALUE(m_no_endpoint_refinement); + PRINT_BOOL_VALUE(m_no_hybrid_sel_cb); + PRINT_BOOL_VALUE(m_perceptual); + PRINT_BOOL_VALUE(m_no_selector_rdo); + PRINT_BOOL_VALUE(m_read_source_images); + PRINT_BOOL_VALUE(m_write_output_basis_files); + PRINT_BOOL_VALUE(m_faster); + PRINT_BOOL_VALUE(m_compute_stats); + PRINT_BOOL_VALUE(m_check_for_alpha) + PRINT_BOOL_VALUE(m_force_alpha) + PRINT_BOOL_VALUE(m_seperate_rg_to_color_alpha); + + PRINT_FLOAT_VALUE(m_hybrid_sel_cb_quality_thresh); + + PRINT_INT_VALUE(m_global_pal_bits); + PRINT_INT_VALUE(m_global_mod_bits); + + PRINT_FLOAT_VALUE(m_selector_rdo_thresh); + + PRINT_BOOL_VALUE(m_mip_gen); + PRINT_BOOL_VALUE(m_mip_renormalize); + PRINT_BOOL_VALUE(m_mip_wrapping); + PRINT_BOOL_VALUE(m_mip_srgb); + PRINT_FLOAT_VALUE(m_mip_premultiplied); + PRINT_FLOAT_VALUE(m_mip_scale); + PRINT_INT_VALUE(m_mip_smallest_dimension); + debug_printf("m_mip_filter: %s\n", m_params.m_mip_filter.c_str()); + + debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_max_endpoint_clusters); + debug_printf("m_max_selector_clusters: %u\n", m_params.m_max_selector_clusters); + debug_printf("m_quality_level: %i\n", m_params.m_quality_level); + +#undef PRINT_BOOL_VALUE +#undef PRINT_INT_VALUE +#undef PRINT_UINT_VALUE +#undef PRINT_FLOAT_VALUE + } + + if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size())) + { + assert(0); + return false; + } + + return true; + } + + basis_compressor::error_code basis_compressor::process() + { + debug_printf("basis_compressor::process\n"); + + if (!read_source_images()) + return cECFailedReadingSourceImages; + + if (!process_frontend()) + return cECFailedFrontEnd; + + if (!extract_frontend_texture_data()) + return cECFailedFontendExtract; + + if (!process_backend()) + return cECFailedBackend; + + if (!create_basis_file_and_transcode()) + return cECFailedCreateBasisFile; + + if (!write_output_files_and_compute_stats()) + return cECFailedWritingOutput; + + return cECSuccess; + } + + bool basis_compressor::generate_mipmaps(const image &img, std::vector &mips, bool has_alpha) + { + debug_printf("basis_compressor::generate_mipmaps\n"); + + uint32_t total_levels = 1; + uint32_t w = img.get_width(), h = img.get_height(); + while (maximum(w, h) > (uint32_t)m_params.m_mip_smallest_dimension) + { + w = maximum(w >> 1U, 1U); + h = maximum(h >> 1U, 1U); + total_levels++; + } + +#if BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN + // Requires stb_image_resize + stbir_filter filter = STBIR_FILTER_DEFAULT; + if (m_params.m_mip_filter == "box") + filter = STBIR_FILTER_BOX; + else if (m_params.m_mip_filter == "triangle") + filter = STBIR_FILTER_TRIANGLE; + else if (m_params.m_mip_filter == "cubic") + filter = STBIR_FILTER_CUBICBSPLINE; + else if (m_params.m_mip_filter == "catmull") + filter = STBIR_FILTER_CATMULLROM; + else if (m_params.m_mip_filter == "mitchell") + filter = STBIR_FILTER_MITCHELL; + + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum(1, img.get_width() >> level); + const uint32_t level_height = maximum(1, img.get_height() >> level); + + image &level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + int result = stbir_resize_uint8_generic( + (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba), + (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba), + has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0, + m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, + nullptr); + + if (result == 0) + { + error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n"); + return false; + } + + if (m_params.m_mip_renormalize) + level_img.renormalize_normal_map(); + } +#else + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum(1, img.get_width() >> level); + const uint32_t level_height = maximum(1, img.get_height() >> level); + + image &level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + bool status = image_resample(img, level_img, m_params.m_mip_srgb, m_params.m_mip_filter.c_str(), m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3); + if (!status) + { + error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n"); + return false; + } + + if (m_params.m_mip_renormalize) + level_img.renormalize_normal_map(); + } +#endif + + return true; + } + + bool basis_compressor::read_source_images() + { + debug_printf("basis_compressor::read_source_images\n"); + + const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : (uint32_t)m_params.m_source_images.size(); + if (!total_source_files) + return false; + + m_stats.resize(0); + m_slice_descs.resize(0); + m_source_images.resize(0); + + m_total_blocks = 0; + uint32_t total_macroblocks = 0; + + m_any_source_image_has_alpha = false; + + std::vector source_images; + std::vector source_filenames; + + // First load all source images, and determine if any have an alpha channel. + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + const char *pSource_filename = ""; + + image file_image; + + if (m_params.m_read_source_images) + { + pSource_filename = m_params.m_source_filenames[source_file_index].c_str(); + + // Load the source image + if (!load_png(pSource_filename, file_image)) + { + error_printf("Failed reading source image: %s\n", pSource_filename); + return false; + } + + printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height()); + + // Optionally load another image and put a grayscale version of it into the alpha channel. + if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size())) + { + const char *pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str(); + + image alpha_data; + + if (!load_png(pSource_alpha_image, alpha_data)) + { + error_printf("Failed reading source image: %s\n", pSource_alpha_image); + return false; + } + + printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height()); + + alpha_data.crop(file_image.get_width(), file_image.get_height()); + + for (uint32_t y = 0; y < file_image.get_height(); y++) + for (uint32_t x = 0; x < file_image.get_width(); x++) + file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma(); + } + } + else + { + file_image = m_params.m_source_images[source_file_index]; + } + + if (m_params.m_seperate_rg_to_color_alpha) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < file_image.get_height(); y++) + for (uint32_t x = 0; x < file_image.get_width(); x++) + { + const color_rgba &c = file_image(x, y); + file_image(x, y).set_noclamp_rgba(c.r, c.r, c.r, c.g); + } + } + + bool has_alpha = false; + if ((m_params.m_force_alpha) || (m_params.m_seperate_rg_to_color_alpha)) + has_alpha = true; + else if (!m_params.m_check_for_alpha) + file_image.set_alpha(255); + else if (file_image.has_alpha()) + has_alpha = true; + + if (has_alpha) + m_any_source_image_has_alpha = true; + + debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha); + + if (m_params.m_y_flip) + file_image.flip_y(); + +#if DEBUG_EXTRACT_SINGLE_BLOCK + image block_image(4, 4); + const uint32_t block_x = 0; + const uint32_t block_y = 0; + block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0); + file_image = block_image; +#endif + +#if DEBUG_RESIZE_TEXTURE_TO_64x64 + file_image.resize(64, 64); +#endif + + if ((!file_image.get_width()) || (!file_image.get_height())) + { + error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n"); + return false; + } + + if ((file_image.get_width() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (file_image.get_height() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) + { + error_printf("basis_compressor::read_source_images: Source image is too large!\n"); + return false; + } + + source_images.push_back(file_image); + source_filenames.push_back(pSource_filename); + } + + debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha); + + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + image &file_image = source_images[source_file_index]; + const std::string &source_filename = source_filenames[source_file_index]; + + std::vector slices; + + slices.reserve(32); + slices.push_back(file_image); + + if (m_params.m_mip_gen) + { + if (!generate_mipmaps(file_image, slices, m_any_source_image_has_alpha)) + return false; + } + + uint_vec mip_indices(slices.size()); + for (uint32_t i = 0; i < slices.size(); i++) + mip_indices[i] = i; + + if (m_any_source_image_has_alpha) + { + // If source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. + std::vector alpha_slices; + uint_vec new_mip_indices; + + alpha_slices.reserve(slices.size() * 2); + + for (uint32_t i = 0; i < slices.size(); i++) + { + image lvl_rgb(slices[i]); + image lvl_a(lvl_rgb); + + for (uint32_t y = 0; y < lvl_a.get_height(); y++) + { + for (uint32_t x = 0; x < lvl_a.get_width(); x++) + { + uint8_t a = lvl_a(x, y).a; + lvl_a(x, y).set_noclamp_rgba(a, a, a, 255); + } + } + + lvl_rgb.set_alpha(255); + + alpha_slices.push_back(lvl_rgb); + new_mip_indices.push_back(i); + + alpha_slices.push_back(lvl_a); + new_mip_indices.push_back(i); + } + + slices.swap(alpha_slices); + mip_indices.swap(new_mip_indices); + } + + assert(slices.size() == mip_indices.size()); + + for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++) + { + const bool is_alpha_slice = m_any_source_image_has_alpha && ((slice_index & 1) != 0); + + image &source_image = slices[slice_index]; + const uint32_t orig_width = source_image.get_width(); + const uint32_t orig_height = source_image.get_height(); + + // Enlarge the source image to 4x4 block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks. + source_image.crop_dup_borders(source_image.get_block_width(4) * 4, source_image.get_block_height(4) * 4); + + if (m_params.m_debug_images) + { + save_png(string_format("basis_debug_source_image_%u_%u.png", source_file_index, slice_index).c_str(), source_image); + } + + enlarge_vector(m_stats, 1); + enlarge_vector(m_source_images, 1); + enlarge_vector(m_slice_descs, 1); + + const uint32_t dest_image_index = (uint32_t)m_stats.size() - 1; + + m_stats[dest_image_index].m_filename = source_filename.c_str(); + m_stats[dest_image_index].m_width = orig_width; + m_stats[dest_image_index].m_height = orig_height; + + m_source_images[dest_image_index] = source_image; + + debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), orig_width, orig_height, source_image.get_width(), source_image.get_height()); + + basisu_backend_slice_desc &slice_desc = m_slice_descs[dest_image_index]; + + slice_desc.m_first_block_index = m_total_blocks; + + slice_desc.m_orig_width = orig_width; + slice_desc.m_orig_height = orig_height; + + slice_desc.m_width = source_image.get_width(); + slice_desc.m_height = source_image.get_height(); + + slice_desc.m_num_blocks_x = source_image.get_block_width(4); + slice_desc.m_num_blocks_y = source_image.get_block_height(4); + + slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1; + slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1; + + slice_desc.m_source_file_index = source_file_index; + + slice_desc.m_mip_index = mip_indices[slice_index]; + + slice_desc.m_alpha = is_alpha_slice; + + m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + total_macroblocks += slice_desc.m_num_macroblocks_x * slice_desc.m_num_macroblocks_y; + + } // slice_index + + } // source_file_index + + debug_printf("Total blocks: %u, Total macroblocks: %u\n", m_total_blocks, total_macroblocks); + + // Make sure we don't have too many slices + if (m_slice_descs.size() > BASISU_MAX_SLICES) + { + error_printf("Too many slices!\n"); + return false; + } + + // Basic sanity check on the slices + for (uint32_t i = 1; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1]; + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + // Make sure images are in order + int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index; + if (image_delta > 1) + return false; + + // Make sure mipmap levels are in order + if (!image_delta) + { + int level_delta = (int)slice_desc.m_mip_index - (int)prev_slice_desc.m_mip_index; + if (level_delta > 1) + return false; + } + } + + printf("Total basis file slices: %u\n", (uint32_t)m_slice_descs.size()); + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u\n", + i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, slice_desc.m_width, slice_desc.m_height, slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index); + + if (m_any_source_image_has_alpha) + { + // Alpha slices must be at odd slice indices + if (slice_desc.m_alpha) + { + if ((i & 1) == 0) + return false; + + const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1]; + + // Make sure previous slice has this image's color data + if (prev_slice_desc.m_source_file_index != slice_desc.m_source_file_index) + return false; + if (prev_slice_desc.m_alpha) + return false; + if (prev_slice_desc.m_mip_index != slice_desc.m_mip_index) + return false; + if (prev_slice_desc.m_num_blocks_x != slice_desc.m_num_blocks_x) + return false; + if (prev_slice_desc.m_num_blocks_y != slice_desc.m_num_blocks_y) + return false; + } + else if (i & 1) + return false; + } + else if (slice_desc.m_alpha) + { + return false; + } + + if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height)) + return false; + } + + return true; + } + + bool basis_compressor::process_frontend() + { + debug_printf("basis_compressor::process_frontend\n"); + + m_source_blocks.resize(m_total_blocks); + + for (uint32_t slice_index = 0; slice_index < m_source_images.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; + const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; + + const image &source_image = m_source_images[slice_index]; + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + source_image.extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4); + } + +#if 0 + // TODO + basis_etc1_pack_params pack_params; + pack_params.m_quality = cETCQualityMedium; + pack_params.m_perceptual = m_params.m_perceptual; + pack_params.m_use_color4 = false; + + pack_etc1_block_context pack_context; + + std::unordered_set endpoint_hash; + std::unordered_set selector_hash; + + for (uint32_t i = 0; i < m_source_blocks.size(); i++) + { + etc_block blk; + pack_etc1_block(blk, m_source_blocks[i].get_ptr(), pack_params, pack_context); + + const color_rgba c0(blk.get_block_color(0, false)); + endpoint_hash.insert((c0.r | (c0.g << 5) | (c0.b << 10)) | (blk.get_inten_table(0) << 16)); + + const color_rgba c1(blk.get_block_color(1, false)); + endpoint_hash.insert((c1.r | (c1.g << 5) | (c1.b << 10)) | (blk.get_inten_table(1) << 16)); + + selector_hash.insert(blk.get_raw_selector_bits()); + } + + const uint32_t total_unique_endpoints = (uint32_t)endpoint_hash.size(); + const uint32_t total_unique_selectors = (uint32_t)selector_hash.size(); + + if (m_params.m_debug) + { + debug_printf("Unique endpoints: %u, unique selectors: %u\n", total_unique_endpoints, total_unique_selectors); + } +#endif + + const double total_texels = m_total_blocks * 16.0f; + + int endpoint_clusters = m_params.m_max_endpoint_clusters; + int selector_clusters = m_params.m_max_selector_clusters; + + if (endpoint_clusters > basisu_frontend::cMaxEndpointClusters) + { + error_printf("Too many endpoint clusters! (%u but max is %u)\n", endpoint_clusters, basisu_frontend::cMaxEndpointClusters); + return false; + } + if (selector_clusters > basisu_frontend::cMaxSelectorClusters) + { + error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters); + return false; + } + + if (m_params.m_quality_level != -1) + { + const float quality = saturate(m_params.m_quality_level / 255.0f); + + float color_endpoint_quality = quality; + float color_selector_quality = quality; + + const float bits_per_endpoint_cluster = 22.0f; + const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f + int max_endpoints = static_cast((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster); + max_endpoints = clamp(max_endpoints, 256, 3072); //basisu_frontend::cMaxEndpointClusters); + max_endpoints = minimum(max_endpoints, m_total_blocks); + + const float mid = 128.0f / 255.0f; + + const float endpoint_split_point = 0.5f; + if (color_endpoint_quality <= mid) + color_endpoint_quality = lerp(0.0f, endpoint_split_point, powf(color_endpoint_quality / mid, .65f)); + else + color_endpoint_quality = lerp(endpoint_split_point, 1.0f, powf((color_endpoint_quality - mid) / (1.0f - mid), 1.5f)); + + if (max_endpoints < 64) + max_endpoints = 64; + endpoint_clusters = clamp((uint32_t)(.5f + lerp(32, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); + + float bits_per_selector_cluster = m_params.m_global_sel_pal ? 21.0f : 31.0f; + + const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f + int max_selectors = static_cast((max_desired_selector_cluster_bits_per_texel * total_texels) / bits_per_selector_cluster); + max_selectors = clamp(max_selectors, 256, basisu_frontend::cMaxSelectorClusters); + max_selectors = minimum(max_selectors, m_total_blocks); + + color_selector_quality = powf(color_selector_quality, 1.65f); + + if (max_selectors < 96) + max_selectors = 96; + selector_clusters = clamp((uint32_t)(.5f + lerp(96, static_cast(max_selectors), color_selector_quality)), 8, basisu_frontend::cMaxSelectorClusters); + + debug_printf("Max endpoints: %u (out of %u), max selectors: %u (out of %u)\n", endpoint_clusters, max_endpoints, selector_clusters, max_selectors); + + if (m_params.m_quality_level >= 223) + { + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .25f; + } + else if (m_params.m_quality_level >= 192) + { + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .5f; + } + else if (m_params.m_quality_level >= 160) + { + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .75f; + } + else if (m_params.m_quality_level >= 129) + { + float l = (quality - 129 / 255.0f) / ((160 - 129) / 255.0f); + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= lerp(1.0f, .75f, l); + } + } + + m_auto_global_sel_pal = false; + if (!m_params.m_global_sel_pal && !m_params.m_no_auto_global_sel_pal) + { + const float bits_per_selector_cluster = 31.0f; + double selector_codebook_bpp_est = (bits_per_selector_cluster * selector_clusters) / total_texels; + debug_printf("selector_codebook_bpp_est: %f\n", selector_codebook_bpp_est); + const float force_global_sel_pal_bpp_threshold = .15f; + if ((total_texels <= 128.0f*128.0f) && (selector_codebook_bpp_est > force_global_sel_pal_bpp_threshold)) + { + m_auto_global_sel_pal = true; + debug_printf("Auto global selector palette enabled\n"); + } + } + + basisu_frontend::params p; + p.m_num_source_blocks = m_total_blocks; + p.m_pSource_blocks = &m_source_blocks[0]; + p.m_max_endpoint_clusters = endpoint_clusters; + p.m_max_selector_clusters = selector_clusters; + p.m_perceptual = m_params.m_perceptual; + p.m_endpoint_refinement = !m_params.m_no_endpoint_refinement; + p.m_debug_stats = m_params.m_debug; + p.m_debug_images = m_params.m_debug_images; + p.m_faster = m_params.m_faster; + + if ((m_params.m_global_sel_pal) || (m_auto_global_sel_pal)) + { + p.m_pGlobal_sel_codebook = m_params.m_pSel_codebook; + p.m_num_global_sel_codebook_pal_bits = m_params.m_global_pal_bits; + p.m_num_global_sel_codebook_mod_bits = m_params.m_global_mod_bits; + p.m_use_hybrid_selector_codebooks = !m_params.m_no_hybrid_sel_cb; + p.m_hybrid_codebook_quality_thresh = m_params.m_hybrid_sel_cb_quality_thresh; + } + + if (!m_frontend.init(p)) + { + error_printf("basisu_frontend::init() failed!\n"); + return false; + } + + m_frontend.compress(); + + if (m_params.m_debug_images) + { + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + char filename[1024]; +#ifdef _WIN32 + sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); +#else + snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); +#endif + m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true); + +#ifdef _WIN32 + sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); +#else + snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); +#endif + m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false); + } + } + + return true; + } + + bool basis_compressor::extract_frontend_texture_data() + { + debug_printf("basis_compressor::extract_frontend_texture_data\n"); + + m_frontend_output_textures.resize(m_slice_descs.size()); + m_best_etc1s_images.resize(m_slice_descs.size()); + m_best_etc1s_images_unpacked.resize(m_slice_descs.size()); + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; + const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + + m_frontend_output_textures[i].init(cETC1, width, height); + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + memcpy(m_frontend_output_textures[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_output_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); + +#if 0 + if (m_params.m_debug_images) + { + char filename[1024]; + sprintf_s(filename, sizeof(filename), "rdo_etc_frontend_%u_", i); + write_etc1_vis_images(m_frontend_output_textures[i], filename); + } +#endif + + m_best_etc1s_images[i].init(cETC1, width, height); + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + memcpy(m_best_etc1s_images[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_etc1s_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); + + m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i]); + } + + return true; + } + + bool basis_compressor::process_backend() + { + debug_printf("basis_compressor::process_backend\n"); + + basisu_backend_params backend_params; + backend_params.m_debug = m_params.m_debug; + backend_params.m_debug_images = m_params.m_debug_images; + backend_params.m_etc1s = true; + if (!m_params.m_no_selector_rdo) + backend_params.m_delta_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh; + + backend_params.m_use_global_sel_codebook = (m_frontend.get_params().m_pGlobal_sel_codebook != NULL); + backend_params.m_global_sel_codebook_pal_bits = m_frontend.get_params().m_num_global_sel_codebook_pal_bits; + backend_params.m_global_sel_codebook_mod_bits = m_frontend.get_params().m_num_global_sel_codebook_mod_bits; + backend_params.m_use_hybrid_sel_codebooks = m_frontend.get_params().m_use_hybrid_selector_codebooks; + + m_backend.init(&m_frontend, backend_params, m_slice_descs, m_params.m_pSel_codebook); + uint32_t total_packed_bytes = m_backend.encode(); + + if (!total_packed_bytes) + { + error_printf("basis_compressor::encode() failed!\n"); + return false; + } + + debug_printf("Total packed bytes (estimated): %u\n", total_packed_bytes); + + return true; + } + + bool basis_compressor::create_basis_file_and_transcode() + { + debug_printf("basis_compressor::create_basis_file_and_transcode\n"); + + const basisu_backend_output &encoded_output = m_backend.get_output(); + + if (!m_basis_file.init(encoded_output, 0, 0, m_params.m_y_flip)) + { + error_printf("basis_compressor::write_output_files_and_compute_stats: basisu_backend:init() failed!\n"); + return false; + } + + const uint8_vec &comp_data = m_basis_file.get_compressed_data(); + + m_output_basis_file = comp_data; + + // Verify the compressed data by transcoding it to ETC1/BC1 and validating the CRC's. + basist::basisu_transcoder decoder(m_params.m_pSel_codebook); + if (!decoder.validate_file_checksums(&comp_data[0], (uint32_t)comp_data.size(), true)) + { + error_printf("decoder.validate_file_checksums() failed!\n"); + return false; + } + + m_decoded_output_textures.resize(m_slice_descs.size()); + m_decoded_output_textures_unpacked.resize(m_slice_descs.size()); + + m_decoded_output_textures_bc1.resize(m_slice_descs.size()); + m_decoded_output_textures_unpacked_bc1.resize(m_slice_descs.size()); + + interval_timer tm; + tm.start(); + + if (!decoder.start_decoding(&comp_data[0], (uint32_t)comp_data.size())) + { + error_printf("decoder.start_decoding() failed!\n"); + return false; + } + + debug_printf("basisu_comppressor::start_decoding() took %3.3fms\n", tm.get_elapsed_ms()); + + uint32_t total_orig_pixels = 0; + uint32_t total_texels = 0; + + double total_time_etc1 = 0; + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(cETC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + tm.start(); + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::cETC1, 8)) + { + error_printf("Transcoding failed to ETC1 on slice %u!\n", i); + return false; + } + + total_time_etc1 += tm.get_elapsed_secs(); + + uint32_t image_crc16 = basist::crc16(decoded_texture.get_ptr(), decoded_texture.get_size_in_bytes(), 0); + if (image_crc16 != m_backend.get_output().m_slice_image_crcs[i]) + { + error_printf("Decoded image data CRC check failed on slice %u!\n", i); + return EXIT_FAILURE; + } + debug_printf("Decoded image data CRC check succeeded on slice %i\n", i); + + m_decoded_output_textures[i] = decoded_texture; + + total_orig_pixels += m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height; + total_texels += m_slice_descs[i].m_width * m_slice_descs[i].m_height; + } + + tm.start(); + + basist::basisu_transcoder_init(); + + debug_printf("basist::basisu_transcoder_init: Took %f ms\n", tm.get_elapsed_ms()); + + double total_time_bc1 = 0; + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(cBC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + tm.start(); + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::cBC1, 8)) + { + error_printf("Transcoding failed to BC1 on slice %u!\n", i); + return false; + } + + total_time_bc1 += tm.get_elapsed_secs(); + + m_decoded_output_textures_bc1[i] = decoded_texture; + } + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]); + m_decoded_output_textures_bc1[i].unpack(m_decoded_output_textures_unpacked_bc1[i]); + } + + debug_printf("Transcoded to ETC1 in %3.3fms, %f texels/sec\n", total_time_etc1 * 1000.0f, total_orig_pixels / total_time_etc1); + + debug_printf("Transcoded to BC1 in %3.3fms, %f texels/sec\n", total_time_bc1 * 1000.0f, total_orig_pixels / total_time_bc1); + + debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels); + + m_output_blocks.resize(0); + + uint32_t total_orig_texels = 0; + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; + + const uint32_t total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + + assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks); + + memcpy(enlarge_vector(m_output_blocks, total_blocks), m_decoded_output_textures[slice_index].get_ptr(), sizeof(etc_block) * total_blocks); + } + + m_basis_file_size = (uint32_t)comp_data.size(); + m_basis_bits_per_texel = (comp_data.size() * 8.0f) / total_orig_texels; + + return true; + } + + bool basis_compressor::write_output_files_and_compute_stats() + { + debug_printf("basis_compressor::write_output_files_and_compute_stats\n"); + + if (m_params.m_write_output_basis_files) + { + const uint8_vec &comp_data = m_basis_file.get_compressed_data(); + + std::string basis_filename(m_params.m_out_filename); + string_remove_extension(basis_filename); + basis_filename += ".basis"; + + if (!write_vec_to_file(basis_filename.c_str(), comp_data)) + { + error_printf("Failed writing output data to file \"%s\"\n", basis_filename.c_str()); + return false; + } + + printf("Wrote output .basis file \"%s\"\n", basis_filename.c_str()); + } + + m_stats.resize(m_slice_descs.size()); + + uint32_t total_orig_texels = 0; + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; + + if (m_params.m_compute_stats) + { + printf("Slice: %u\n", slice_index); + + image_stats &s = m_stats[slice_index]; + + image_metrics em; + + // best possible ETC1S stats + em.calc(m_source_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0); + em.print("Unquantized ETC1S Luma: "); + + s.m_best_luma_psnr = static_cast(em.m_psnr); + s.m_best_luma_ssim = static_cast(em.m_ssim); + + em.calc(m_source_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3); + em.print("Unquantized ETC1S RGB Avg: "); + + s.m_best_rgb_avg_psnr = static_cast(em.m_psnr); + + // .basis ETC1S stats + em.calc(m_source_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0); + em.print(".basis ETC1S Luma: "); + + s.m_basis_etc1_luma_psnr = static_cast(em.m_psnr); + s.m_basis_etc1_luma_ssim = static_cast(em.m_ssim); + + em.calc(m_source_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); + em.print(".basis ETC1S RGB Avg: "); + + //debug_printf(".basis ETC1 Luma SSIM per bit/texel*1000: %3.3f\n", 1000.0f * s.m_basis_etc1_luma_ssim / ((m_backend.get_output().get_output_size_estimate() * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + + // .basis BC1 stats + em.calc(m_source_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 0); + em.print(".basis BC1 Luma: "); + + s.m_basis_bc1_luma_psnr = static_cast(em.m_psnr); + s.m_basis_bc1_luma_ssim = static_cast(em.m_ssim); + + em.calc(m_source_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 3); + em.print(".basis BC1 RGB Avg: "); + + s.m_basis_bc1_rgb_avg_psnr = static_cast(em.m_psnr); + } + + if (m_frontend.get_params().m_debug_images) + { + std::string out_basename; + if (m_params.m_out_filename.size()) + string_get_filename(m_params.m_out_filename.c_str(), out_basename); + else if (m_params.m_source_filenames.size()) + string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); + + string_remove_extension(out_basename); + out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); + + // Write "best" ETC1S debug images + { + gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]); + best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file(out_basename + "_best_etc1s.ktx", best_etc1s_gpu_image); + + image best_etc1s_unpacked; + best_etc1s_gpu_image.unpack(best_etc1s_unpacked); + save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked); + } + + // Write decoded ETC1S debug images + { + gpu_image decoded_etc1s(m_decoded_output_textures[slice_index]); + decoded_etc1s.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file(out_basename + "_decoded_etc1s.ktx", decoded_etc1s); + + image temp(m_decoded_output_textures_unpacked[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_decoded_etc1s.png", temp); + } + + // Write decoded BC1 debug images + { + gpu_image decoded_bc1(m_decoded_output_textures_bc1[slice_index]); + decoded_bc1.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file(out_basename + "_decoded_bc1.ktx", decoded_bc1); + + image temp(m_decoded_output_textures_unpacked_bc1[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_decoded_bc1.png", temp); + } + } + } + + return true; + } + +} // namespace basisu diff --git a/basisu_comp.h b/basisu_comp.h new file mode 100644 index 0000000..d847fd2 --- /dev/null +++ b/basisu_comp.h @@ -0,0 +1,396 @@ +// basisu_comp.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_frontend.h" +#include "basisu_backend.h" +#include "basisu_basis_file.h" +#include "transcoder/basisu_global_selector_palette.h" +#include "transcoder/basisu_transcoder.h" + +#define BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION (16384) + +const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f; +const int BASISU_DEFAULT_QUALITY = 128; +const float BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH = 2.0f; + +const uint32_t BSISU_MAX_IMAGE_DIMENSION = 16384; +const uint32_t BASISU_QUALITY_MIN = 1; +const uint32_t BASISU_QUALITY_MAX = 255; +const uint32_t BASISU_MAX_ENDPOINT_CLUSTERS = 8192; +const uint32_t BASISU_MAX_SELECTOR_CLUSTERS = 7936; + +const uint32_t BASISU_MAX_SLICES = 0xFFFFFF; + +namespace basisu +{ + struct image_stats + { + image_stats() + { + clear(); + } + + void clear() + { + m_filename.clear(); + m_width = 0; + m_height = 0; + + m_basis_etc1_rgb_avg_psnr = 0.0f; + m_basis_etc1_luma_psnr = 0.0f; + m_basis_etc1_luma_ssim = 0.0f; + m_basis_bits_per_texel = 0.0f; + m_basis_orig_size = 0; + m_basis_compressed_size = 0; + + m_best_rgb_avg_psnr = 0.0f; + m_best_luma_psnr = 0.0f; + m_best_luma_ssim = 0.0f; + m_best_bits_per_texel = 0.0f; + m_best_orig_size = 0; + m_best_compressed_size = 0; + } + + std::string m_filename; + uint32_t m_width; + uint32_t m_height; + + // .basis compressed + float m_basis_etc1_rgb_avg_psnr; + float m_basis_etc1_luma_psnr; + float m_basis_etc1_luma_ssim; + float m_basis_bits_per_texel; + uint64_t m_basis_orig_size; + uint64_t m_basis_compressed_size; + + float m_basis_bc1_rgb_avg_psnr; + float m_basis_bc1_luma_psnr; + float m_basis_bc1_luma_ssim; + + // Normal (highest quality) compressed (ETC1S, not full ETC1) + float m_best_rgb_avg_psnr; + float m_best_luma_psnr; + float m_best_luma_ssim; + float m_best_bits_per_texel; + uint64_t m_best_orig_size; + uint64_t m_best_compressed_size; + }; + + template + struct bool_param + { + bool_param() : + m_value(def), + m_changed(false) + { + } + + void clear() + { + m_value = def; + m_changed = false; + } + + operator bool() const + { + return m_value; + } + + bool operator= (bool v) + { + m_value = v; + m_changed = true; + return m_value; + } + + bool was_changed() const { return m_changed; } + void set_changed(bool flag) { m_changed = flag; } + + bool m_value; + bool m_changed; + }; + + template + struct param + { + param(T def, T min_v, T max_v) : + m_value(def), + m_def(def), + m_min(min_v), + m_max(max_v), + m_changed(false) + { + } + + void clear() + { + m_value = m_def; + m_changed = false; + } + + operator T() const + { + return m_value; + } + + T operator= (T v) + { + m_value = clamp(v, m_min, m_max); + m_changed = true; + return m_value; + } + + T operator *= (T v) + { + m_value *= v; + m_changed = true; + return m_value; + } + + bool was_changed() const { return m_changed; } + void set_changed(bool flag) { m_changed = flag; } + + T m_value; + T m_def; + T m_min; + T m_max; + bool m_changed; + }; + + struct basis_compressor_params + { + basis_compressor_params() : + m_hybrid_sel_cb_quality_thresh(BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH, 0.0f, 1e+10f), + m_global_pal_bits(8, 0, ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_PAL_BITS), + m_global_mod_bits(8, 0, basist::etc1_global_palette_entry_modifier::cTotalBits), + m_selector_rdo_thresh(BASISU_DEFAULT_SELECTOR_RDO_THRESH, 0.0f, 1e+10f), + m_pSel_codebook(NULL), + m_max_endpoint_clusters(512), + m_max_selector_clusters(512), + m_quality_level(-1), + m_mip_scale(1.0f, .000125f, 4.0f), + m_mip_smallest_dimension(1, 1, 16384) + { + clear(); + } + + void clear() + { + m_pSel_codebook = NULL; + + m_source_filenames.clear(); + m_source_alpha_filenames.clear(); + + m_source_images.clear(); + + m_out_filename.clear(); + + m_y_flip.clear(); + m_debug.clear(); + m_debug_images.clear(); + m_global_sel_pal.clear(); + m_no_auto_global_sel_pal.clear(); + m_no_endpoint_refinement.clear(); + m_no_hybrid_sel_cb.clear(); + m_perceptual.clear(); + m_no_selector_rdo.clear(); + m_selector_rdo_thresh.clear(); + m_read_source_images.clear(); + m_write_output_basis_files.clear(); + m_faster.clear(); + m_compute_stats.clear(); + m_check_for_alpha.clear(); + m_force_alpha.clear(); + m_seperate_rg_to_color_alpha.clear(); + m_hybrid_sel_cb_quality_thresh.clear(); + m_global_pal_bits.clear(); + m_global_mod_bits.clear(); + + m_mip_gen.clear(); + m_mip_scale.clear(); + m_mip_filter = "kaiser"; + m_mip_scale = 1.0f; + m_mip_srgb.clear(); + m_mip_premultiplied.clear(); + m_mip_renormalize.clear(); + m_mip_wrapping.clear(); + m_mip_smallest_dimension.clear(); + + m_max_endpoint_clusters = 0; + m_max_selector_clusters = 0; + m_quality_level = -1; + } + + // Pointer to the global selector codebook, or nullptr to not use a global selector codebook + const basist::etc1_global_selector_codebook *m_pSel_codebook; + + // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read. + // Otherwise, the compressor processes the images in m_source_images. + std::vector m_source_filenames; + std::vector m_source_alpha_filenames; + + std::vector m_source_images; + // TODO: Allow caller to supply their own mipmaps + + // Filename of the output basis file + std::string m_out_filename; + + // The params are done this way so we can detect when the user has explictly changed them. + + // Flip images across Y axis + bool_param m_y_flip; + + // Output debug information during compression + bool_param m_debug; + + // m_debug_images is pretty slow + bool_param m_debug_images; + + bool_param m_global_sel_pal; + bool_param m_no_auto_global_sel_pal; + + // Frontend/backend codec parameters + bool_param m_no_endpoint_refinement; + bool_param m_no_hybrid_sel_cb; + + // Use perceptual sRGB colorspace metrics (for normal maps, etc.) + bool_param m_perceptual; + + // Disable selector RDO, for faster compression but larger files + bool_param m_no_selector_rdo; + param m_selector_rdo_thresh; + + // Read source images from m_source_filenames/m_source_alpha_filenames + bool_param m_read_source_images; + + // Write the output basis file to disk using m_out_filename + bool_param m_write_output_basis_files; + + // If true, the compressor disables some optional but slower refinement stages + bool_param m_faster; + + // Compute and display image metrics + bool_param m_compute_stats; + + // Check to see if any input image has an alpha channel, if so then the output basis file will have alpha channels + bool_param m_check_for_alpha; + + // Always put alpha slices in the output basis file, even when the input doesn't have alpha + bool_param m_force_alpha; + + // Split the R channel to RGB and the G channel to alpha, then write a basis file with alpha channels + bool_param m_seperate_rg_to_color_alpha; + + // Global/hybrid selector codebook parameters + param m_hybrid_sel_cb_quality_thresh; + param m_global_pal_bits; + param m_global_mod_bits; + + // mipmap generation parameters + bool_param m_mip_gen; + param m_mip_scale; + std::string m_mip_filter; + bool_param m_mip_srgb; + bool_param m_mip_premultiplied; // not currently supported + bool_param m_mip_renormalize; + bool_param m_mip_wrapping; + param m_mip_smallest_dimension; + + // Codebook size (quality) control. + // If m_quality_level != -1, it controls the quality level. It ranges from [0,255]. + // Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly. + uint32_t m_max_endpoint_clusters; + uint32_t m_max_selector_clusters; + int m_quality_level; + }; + + class basis_compressor + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basis_compressor); + + public: + basis_compressor(); + + bool init(const basis_compressor_params ¶ms); + + enum error_code + { + cECSuccess = 0, + cECFailedReadingSourceImages, + cECFailedFrontEnd, + cECFailedFontendExtract, + cECFailedBackend, + cECFailedCreateBasisFile, + cECFailedWritingOutput + }; + + error_code process(); + + const uint8_vec &get_output_basis_file() const { return m_output_basis_file; } + const etc_block_vec &get_output_blocks() const { return m_output_blocks; } + + const std::vector &get_stats() const { return m_stats; } + + uint32_t get_basis_file_size() const { return m_basis_file_size; } + double get_basis_bits_per_texel() const { return m_basis_bits_per_texel; } + + private: + basis_compressor_params m_params; + + std::vector m_source_images; + + std::vector m_stats; + + uint32_t m_basis_file_size; + double m_basis_bits_per_texel; + + basisu_backend_slice_desc_vec m_slice_descs; + + uint32_t m_total_blocks; + bool m_auto_global_sel_pal; + + basisu_frontend m_frontend; + pixel_block_vec m_source_blocks; + + std::vector m_frontend_output_textures; + + std::vector m_best_etc1s_images; + std::vector m_best_etc1s_images_unpacked; + + basisu_backend m_backend; + + basisu_file m_basis_file; + + std::vector m_decoded_output_textures; + std::vector m_decoded_output_textures_unpacked; + std::vector m_decoded_output_textures_bc1; + std::vector m_decoded_output_textures_unpacked_bc1; + + uint8_vec m_output_basis_file; + etc_block_vec m_output_blocks; + + bool m_any_source_image_has_alpha; + + bool read_source_images(); + bool process_frontend(); + bool extract_frontend_texture_data(); + bool process_backend(); + bool create_basis_file_and_transcode(); + bool write_output_files_and_compute_stats(); + bool generate_mipmaps(const image &img, std::vector &mips, bool has_alpha); + }; + +} // namespace basisu + diff --git a/basisu_enc.cpp b/basisu_enc.cpp new file mode 100644 index 0000000..3e585ef --- /dev/null +++ b/basisu_enc.cpp @@ -0,0 +1,1128 @@ +// basisu_enc.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_enc.h" +#include "lodepng.h" +#include "basisu_resampler.h" +#include "basisu_resampler_filters.h" +#include "basisu_etc.h" +#include "transcoder/basisu_transcoder.h" + +#if defined(_WIN32) +// For QueryPerformanceCounter/QueryPerformanceFrequency +#define WIN32_LEAN_AND_MEAN +#include +#endif + +namespace basisu +{ + uint64_t interval_timer::g_init_ticks, interval_timer::g_freq; + double interval_timer::g_timer_freq; + + // Encoder library initialization (just call once at startup) + void basisu_encoder_init() + { + pack_etc1_block_init(); + basist::basisu_transcoder_init(); + } + + void error_printf(const char *pFmt, ...) + { + va_list args; + va_start(args, pFmt); + vfprintf(stderr, pFmt, args); + va_end(args); + } + +#if defined(_WIN32) + inline void query_counter(timer_ticks* pTicks) + { + QueryPerformanceCounter(reinterpret_cast(pTicks)); + } + inline void query_counter_frequency(timer_ticks* pTicks) + { + QueryPerformanceFrequency(reinterpret_cast(pTicks)); + } +#elif defined(__APPLE__) +#include + inline void query_counter(timer_ticks* pTicks) + { + struct timeval cur_time; + gettimeofday(&cur_time, NULL); + *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); + } + inline void query_counter_frequency(timer_ticks* pTicks) + { + *pTicks = 1000000; + } +#elif defined(__GNUC__) +#include + inline void query_counter(timer_ticks* pTicks) + { + struct timeval cur_time; + gettimeofday(&cur_time, NULL); + *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); + } + inline void query_counter_frequency(timer_ticks* pTicks) + { + *pTicks = 1000000; + } +#else +#error TODO +#endif + + interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) + { + if (!g_timer_freq) + init(); + } + + void interval_timer::start() + { + query_counter(&m_start_time); + m_started = true; + m_stopped = false; + } + + void interval_timer::stop() + { + assert(m_started); + query_counter(&m_stop_time); + m_stopped = true; + } + + double interval_timer::get_elapsed_secs() const + { + assert(m_started); + if (!m_started) + return 0; + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + query_counter(&stop_time); + + timer_ticks delta = stop_time - m_start_time; + return delta * g_timer_freq; + } + + void interval_timer::init() + { + if (!g_timer_freq) + { + query_counter_frequency(&g_freq); + g_timer_freq = 1.0f / g_freq; + query_counter(&g_init_ticks); + } + } + + timer_ticks interval_timer::get_ticks() + { + if (!g_timer_freq) + init(); + timer_ticks ticks; + query_counter(&ticks); + return ticks - g_init_ticks; + } + + double interval_timer::ticks_to_secs(timer_ticks ticks) + { + if (!g_timer_freq) + init(); + return ticks * g_timer_freq; + } + + bool load_png(const char* pFilename, image& img) + { + std::vector buffer; + unsigned err = lodepng::load_file(buffer, std::string(pFilename)); + if (err) + return false; + + std::vector out; + unsigned w = 0, h = 0; + + err = lodepng::decode(out, w, h, &buffer[0], buffer.size()); + if ((err != 0) || (!w) || (!h)) + return false; + + if (out.size() != (w * h * 4)) + return false; + + img.resize(w, h); + + memcpy(img.get_ptr(), &out[0], out.size()); + + return true; + } + + bool save_png(const char* pFilename, const image & img, uint32_t image_save_flags, uint32_t grayscale_comp) + { + if (!img.get_total_pixels()) + return false; + + std::vector out; + unsigned err = 0; + + if (image_save_flags & cImageSaveGrayscale) + { + uint8_vec g_pixels(img.get_width() * img.get_height()); + uint8_t *pDst = &g_pixels[0]; + + for (uint32_t y = 0; y < img.get_height(); y++) + for (uint32_t x = 0; x < img.get_width(); x++) + *pDst++ = img(x, y)[grayscale_comp]; + + err = lodepng::encode(out, (const uint8_t*)& g_pixels[0], img.get_width(), img.get_height(), LCT_GREY, 8); + } + else + { + bool has_alpha = img.has_alpha(); + if ((!has_alpha) || ((image_save_flags & cImageSaveIgnoreAlpha) != 0)) + { + uint8_vec rgb_pixels(img.get_width() * 3 * img.get_height()); + uint8_t *pDst = &rgb_pixels[0]; + + for (uint32_t y = 0; y < img.get_height(); y++) + { + for (uint32_t x = 0; x < img.get_width(); x++) + { + const color_rgba& c = img(x, y); + pDst[0] = c.r; + pDst[1] = c.g; + pDst[2] = c.b; + pDst += 3; + } + } + + err = lodepng::encode(out, (const uint8_t*)& rgb_pixels[0], img.get_width(), img.get_height(), LCT_RGB, 8); + } + else + { + err = lodepng::encode(out, (const uint8_t*)img.get_ptr(), img.get_width(), img.get_height(), LCT_RGBA, 8); + } + } + + err = lodepng::save_file(out, std::string(pFilename)); + if (err) + return false; + + return true; + } + + bool read_file_to_vec(const char* pFilename, uint8_vec& data) + { + FILE* pFile = nullptr; +#ifdef _WIN32 + fopen_s(&pFile, pFilename, "rb"); +#else + pFile = fopen(pFilename, "rb"); +#endif + if (!pFile) + return false; + + fseek(pFile, 0, SEEK_END); +#ifdef _WIN32 + int64_t filesize = _ftelli64(pFile); +#else + int64_t filesize = ftello(pFile); +#endif + if (filesize < 0) + { + fclose(pFile); + return false; + } + fseek(pFile, 0, SEEK_SET); + + if (sizeof(size_t) == sizeof(uint32_t)) + { + if (filesize > 0x70000000) + { + // File might be too big to load safely in one alloc + fclose(pFile); + return false; + } + } + + data.resize((size_t)filesize); + + if (filesize) + { + if (fread(&data[0], 1, (size_t)filesize, pFile) != (size_t)filesize) + { + fclose(pFile); + return false; + } + } + + fclose(pFile); + return true; + } + + bool write_data_to_file(const char* pFilename, const void* pData, size_t len) + { + FILE* pFile = nullptr; +#ifdef _WIN32 + fopen_s(&pFile, pFilename, "wb"); +#else + pFile = fopen(pFilename, "wb"); +#endif + if (!pFile) + return false; + + if (len) + { + if (fwrite(pData, 1, len, pFile) != len) + { + fclose(pFile); + return false; + } + } + + return fclose(pFile) != EOF; + } + + float linear_to_srgb(float l) + { + assert(l >= 0.0f && l <= 1.0f); + if (l < .0031308f) + return saturate(l * 12.92f); + else + return saturate(1.055f * powf(l, 1.0f/2.4f) - .055f); + } + + float srgb_to_linear(float s) + { + assert(s >= 0.0f && s <= 1.0f); + if (s < .04045f) + return saturate(s * (1.0f/12.92f)); + else + return saturate(powf((s + .055f) * (1.0f/1.055f), 2.4f)); + } + + bool image_resample(const image &src, image &dst, bool srgb, + const char *pFilter, float filter_scale, + bool wrapping, + uint32_t first_comp, uint32_t num_comps) + { + assert((first_comp + num_comps) <= 4); + + const int cMaxComps = 4; + + const uint32_t src_w = src.get_width(), src_h = src.get_height(); + const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height(); + + if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION) + { + printf("Image is too large!\n"); + return false; + } + + if (!src_w || !src_h || !dst_w || !dst_h) + return false; + + if ((num_comps < 1) || (num_comps > cMaxComps)) + return false; + + if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION)) + { + printf("Image is too large!\n"); + return false; + } + + if ((src_w == dst_w) && (src_h == dst_h)) + { + dst = src; + return true; + } + + float srgb_to_linear_table[256]; + if (srgb) + { + for (int i = 0; i < 256; ++i) + srgb_to_linear_table[i] = srgb_to_linear((float)i * (1.0f/255.0f)); + } + + const int LINEAR_TO_SRGB_TABLE_SIZE = 8192; + uint8_t linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE]; + + if (srgb) + { + for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i) + linear_to_srgb_table[i] = (uint8_t)clamp((int)(255.0f * linear_to_srgb((float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1))) + .5f), 0, 255); + } + + std::vector samples[cMaxComps]; + Resampler *resamplers[cMaxComps]; + + resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h, + wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, + pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0); + samples[0].resize(src_w); + + for (uint32_t i = 1; i < num_comps; ++i) + { + resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h, + wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, + pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0); + samples[i].resize(src_w); + } + + uint32_t dst_y = 0; + + for (uint32_t src_y = 0; src_y < src_h; ++src_y) + { + const color_rgba *pSrc = &src(0, src_y); + + // Put source lines into resampler(s) + for (uint32_t x = 0; x < src_w; ++x) + { + for (uint32_t c = 0; c < num_comps; ++c) + { + const uint32_t comp_index = first_comp + c; + const uint32_t v = (*pSrc)[comp_index]; + + if (!srgb || (comp_index == 3)) + samples[c][x] = v * (1.0f / 255.0f); + else + samples[c][x] = srgb_to_linear_table[v]; + } + + pSrc++; + } + + for (uint32_t c = 0; c < num_comps; ++c) + { + if (!resamplers[c]->put_line(&samples[c][0])) + { + for (uint32_t i = 0; i < num_comps; i++) + delete resamplers[i]; + return false; + } + } + + // Now retrieve any output lines + for (;;) + { + uint32_t c; + for (c = 0; c < num_comps; ++c) + { + const uint32_t comp_index = first_comp + c; + + const float *pOutput_samples = resamplers[c]->get_line(); + if (!pOutput_samples) + break; + + const bool linear_flag = !srgb || (comp_index == 3); + + color_rgba *pDst = &dst(0, dst_y); + + for (uint32_t x = 0; x < dst_w; x++) + { + // TODO: Add dithering + if (linear_flag) + { + int j = (int)(255.0f * pOutput_samples[x] + .5f); + (*pDst)[comp_index] = (uint8_t)clamp(j, 0, 255); + } + else + { + int j = (int)((LINEAR_TO_SRGB_TABLE_SIZE - 1) * pOutput_samples[x] + .5f); + (*pDst)[comp_index] = linear_to_srgb_table[clamp(j, 0, LINEAR_TO_SRGB_TABLE_SIZE - 1)]; + } + + pDst++; + } + } + if (c < num_comps) + break; + + ++dst_y; + } + } + + for (uint32_t i = 0; i < num_comps; ++i) + delete resamplers[i]; + + return true; + } + + void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms) + { + // See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen + if (!num_syms) + return; + + if (1 == num_syms) + { + A[0].m_key = 1; + return; + } + + A[0].m_key += A[1].m_key; + + int s = 2, r = 0, next; + for (next = 1; next < (num_syms - 1); ++next) + { + if ((s >= num_syms) || (A[r].m_key < A[s].m_key)) + { + A[next].m_key = A[r].m_key; + A[r].m_key = static_cast(next); + ++r; + } + else + { + A[next].m_key = A[s].m_key; + ++s; + } + + if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key)) + { + A[next].m_key = static_cast(A[next].m_key + A[r].m_key); + A[r].m_key = static_cast(next); + ++r; + } + else + { + A[next].m_key = static_cast(A[next].m_key + A[s].m_key); + ++s; + } + } + A[num_syms - 2].m_key = 0; + + for (next = num_syms - 3; next >= 0; --next) + { + A[next].m_key = 1 + A[A[next].m_key].m_key; + } + + int num_avail = 1, num_used = 0, depth = 0; + r = num_syms - 2; + next = num_syms - 1; + while (num_avail > 0) + { + for ( ; (r >= 0) && ((int)A[r].m_key == depth); ++num_used, --r ) + ; + + for ( ; num_avail > num_used; --next, --num_avail) + A[next].m_key = static_cast(depth); + + num_avail = 2 * num_used; + num_used = 0; + ++depth; + } + } + + void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) + { + int i; + uint32_t total = 0; + if (code_list_len <= 1) + return; + + for (i = max_code_size + 1; i <= cHuffmanMaxSupportedInternalCodeSize; i++) + pNum_codes[max_code_size] += pNum_codes[i]; + + for (i = max_code_size; i > 0; i--) + total += (((uint32_t)pNum_codes[i]) << (max_code_size - i)); + + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) + { + if (pNum_codes[i]) + { + pNum_codes[i]--; + pNum_codes[i + 1] += 2; + break; + } + } + + total--; + } + } + + sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1) + { + uint32_t total_passes = 2, pass_shift, pass, i, hist[256 * 2]; + sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; + + clear_obj(hist); + + for (i = 0; i < num_syms; i++) + { + uint32_t freq = pSyms0[i].m_key; + hist[freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } + + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) + total_passes--; + + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const uint32_t *pHist = &hist[pass << 8]; + uint32_t offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + } + + for (i = 0; i < num_syms; i++) + pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + + sym_freq *t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } + + return pCur_syms; + } + + bool huffman_encoding_table::init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size) + { + if (max_code_size > cHuffmanMaxSupportedCodeSize) + return false; + if ((!num_syms) || (num_syms > cHuffmanMaxSyms)) + return false; + + uint32_t total_used_syms = 0; + for (uint32_t i = 0; i < num_syms; i++) + if (pFreq[i]) + total_used_syms++; + + if (!total_used_syms) + return false; + + std::vector sym_freq0(total_used_syms), sym_freq1(total_used_syms); + for (uint32_t i = 0, j = 0; i < num_syms; i++) + { + if (pFreq[i]) + { + sym_freq0[j].m_key = pFreq[i]; + sym_freq0[j++].m_sym_index = static_cast(i); + } + } + + sym_freq *pSym_freq = canonical_huffman_radix_sort_syms(total_used_syms, &sym_freq0[0], &sym_freq1[0]); + + canonical_huffman_calculate_minimum_redundancy(pSym_freq, total_used_syms); + + int num_codes[cHuffmanMaxSupportedInternalCodeSize + 1]; + clear_obj(num_codes); + + for (uint32_t i = 0; i < total_used_syms; i++) + { + if (pSym_freq[i].m_key > cHuffmanMaxSupportedInternalCodeSize) + return false; + + num_codes[pSym_freq[i].m_key]++; + } + + canonical_huffman_enforce_max_code_size(num_codes, total_used_syms, max_code_size); + + m_code_sizes.resize(0); + m_code_sizes.resize(num_syms); + + m_codes.resize(0); + m_codes.resize(num_syms); + + for (uint32_t i = 1, j = total_used_syms; i <= max_code_size; i++) + for (uint32_t l = num_codes[i]; l > 0; l--) + m_code_sizes[pSym_freq[--j].m_sym_index] = static_cast(i); + + uint32_t next_code[cHuffmanMaxSupportedInternalCodeSize + 1]; + + next_code[1] = 0; + for (uint32_t j = 0, i = 2; i <= max_code_size; i++) + next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (uint32_t i = 0; i < num_syms; i++) + { + uint32_t rev_code = 0, code, code_size; + if ((code_size = m_code_sizes[i]) == 0) + continue; + if (code_size > cHuffmanMaxSupportedInternalCodeSize) + return false; + code = next_code[code_size]++; + for (uint32_t l = code_size; l > 0; l--, code >>= 1) + rev_code = (rev_code << 1) | (code & 1); + m_codes[i] = static_cast(rev_code); + } + + return true; + } + + bool huffman_encoding_table::init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size) + { + if ((!num_syms) || (num_syms > cHuffmanMaxSyms)) + return false; + + uint16_vec sym_freq(num_syms); + + uint32_t max_freq = 0; + for (uint32_t i = 0; i < num_syms; i++) + max_freq = maximum(max_freq, pSym_freq[i]); + + if (max_freq < UINT16_MAX) + { + for (uint32_t i = 0; i < num_syms; i++) + sym_freq[i] = static_cast(pSym_freq[i]); + } + else + { + for (uint32_t i = 0; i < num_syms; i++) + if (pSym_freq[i]) + sym_freq[i] = static_cast(maximum((pSym_freq[i] * 65534U + (max_freq >> 1)) / max_freq, 1)); + } + + return init(num_syms, &sym_freq[0], max_code_size); + } + + void bitwise_coder::end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len) + { + if (run_size) + { + if (run_size < cHuffmanSmallRepeatSizeMin) + { + while (run_size--) + syms.push_back(static_cast(len)); + } + else if (run_size <= cHuffmanSmallRepeatSizeMax) + { + syms.push_back(static_cast(cHuffmanSmallRepeatCode | ((run_size - cHuffmanSmallRepeatSizeMin) << 6))); + } + else + { + assert((run_size >= cHuffmanBigRepeatSizeMin) && (run_size <= cHuffmanBigRepeatSizeMax)); + syms.push_back(static_cast(cHuffmanBigRepeatCode | ((run_size - cHuffmanBigRepeatSizeMin) << 6))); + } + } + + run_size = 0; + } + + void bitwise_coder::end_zero_run(uint16_vec &syms, uint32_t &run_size) + { + if (run_size) + { + if (run_size < cHuffmanSmallZeroRunSizeMin) + { + while (run_size--) + syms.push_back(0); + } + else if (run_size <= cHuffmanSmallZeroRunSizeMax) + { + syms.push_back(static_cast(cHuffmanSmallZeroRunCode | ((run_size - cHuffmanSmallZeroRunSizeMin) << 6))); + } + else + { + assert((run_size >= cHuffmanBigZeroRunSizeMin) && (run_size <= cHuffmanBigZeroRunSizeMax)); + syms.push_back(static_cast(cHuffmanBigZeroRunCode | ((run_size - cHuffmanBigZeroRunSizeMin) << 6))); + } + } + + run_size = 0; + } + + uint32_t bitwise_coder::emit_huffman_table(const huffman_encoding_table &tab) + { + const uint64_t start_bits = m_total_bits; + + const uint8_vec &code_sizes = tab.get_code_sizes(); + + uint32_t total_used = tab.get_total_used_codes(); + put_bits(total_used, cHuffmanMaxSymsLog2); + + if (!total_used) + return 0; + + uint16_vec syms; + syms.reserve(total_used + 16); + + uint32_t prev_code_len = UINT_MAX, zero_run_size = 0, nonzero_run_size = 0; + + for (uint32_t i = 0; i <= total_used; ++i) + { + const uint32_t code_len = (i == total_used) ? 0xFF : code_sizes[i]; + assert((code_len == 0xFF) || (code_len <= 16)); + + if (code_len) + { + end_zero_run(syms, zero_run_size); + + if (code_len != prev_code_len) + { + end_nonzero_run(syms, nonzero_run_size, prev_code_len); + if (code_len != 0xFF) + syms.push_back(static_cast(code_len)); + } + else if (++nonzero_run_size == cHuffmanBigRepeatSizeMax) + end_nonzero_run(syms, nonzero_run_size, prev_code_len); + } + else + { + end_nonzero_run(syms, nonzero_run_size, prev_code_len); + + if (++zero_run_size == cHuffmanBigZeroRunSizeMax) + end_zero_run(syms, zero_run_size); + } + + prev_code_len = code_len; + } + + histogram h(cHuffmanTotalCodelengthCodes); + for (uint32_t i = 0; i < syms.size(); i++) + h.inc(syms[i] & 63); + + huffman_encoding_table ct; + if (!ct.init(h, 7)) + return 0; + + assert(cHuffmanTotalSortedCodelengthCodes == cHuffmanTotalCodelengthCodes); + + uint32_t total_codelength_codes; + for (total_codelength_codes = cHuffmanTotalSortedCodelengthCodes; total_codelength_codes > 0; total_codelength_codes--) + if (ct.get_code_sizes()[g_huffman_sorted_codelength_codes[total_codelength_codes - 1]]) + break; + + assert(total_codelength_codes); + + put_bits(total_codelength_codes, 5); + for (uint32_t i = 0; i < total_codelength_codes; i++) + put_bits(ct.get_code_sizes()[g_huffman_sorted_codelength_codes[i]], 3); + + for (uint32_t i = 0; i < syms.size(); ++i) + { + const uint32_t l = syms[i] & 63, e = syms[i] >> 6; + + put_code(l, ct); + + if (l == cHuffmanSmallZeroRunCode) + put_bits(e, cHuffmanSmallZeroRunExtraBits); + else if (l == cHuffmanBigZeroRunCode) + put_bits(e, cHuffmanBigZeroRunExtraBits); + else if (l == cHuffmanSmallRepeatCode) + put_bits(e, cHuffmanSmallRepeatExtraBits); + else if (l == cHuffmanBigRepeatCode) + put_bits(e, cHuffmanBigRepeatExtraBits); + } + + return (uint32_t)(m_total_bits - start_bits); + } + + bool huffman_test(int rand_seed) + { + histogram h(19); + + // Feed in a fibonacci sequence to force large codesizes + h[0] += 1; h[1] += 1; h[2] += 2; h[3] += 3; + h[4] += 5; h[5] += 8; h[6] += 13; h[7] += 21; + h[8] += 34; h[9] += 55; h[10] += 89; h[11] += 144; + h[12] += 233; h[13] += 377; h[14] += 610; h[15] += 987; + h[16] += 1597; h[17] += 2584; h[18] += 4181; + + huffman_encoding_table etab; + etab.init(h, 16); + + { + bitwise_coder c; + c.init(1024); + + c.emit_huffman_table(etab); + for (int i = 0; i < 19; i++) + c.put_code(i, etab); + + c.flush(); + + basist::bitwise_decoder d; + d.init(&c.get_bytes()[0], static_cast(c.get_bytes().size())); + + basist::huffman_decoding_table dtab; + bool success = d.read_huffman_table(dtab); + if (!success) + { + assert(0); + printf("Failure 5\n"); + return false; + } + + for (uint32_t i = 0; i < 19; i++) + { + uint32_t s = d.decode_huffman(dtab); + if (s != i) + { + assert(0); + printf("Failure 5\n"); + return false; + } + } + } + + basisu::rand r; + r.seed(rand_seed); + + for (int iter = 0; iter < 500000; iter++) + { + printf("%u\n", iter); + + uint32_t max_sym = r.irand(0, 8193); + uint32_t num_codes = r.irand(1, 10000); + uint_vec syms(num_codes); + + for (uint32_t i = 0; i < num_codes; i++) + { + if (r.bit()) + syms[i] = r.irand(0, max_sym); + else + { + int s = (int)(r.gaussian((float)max_sym / 2, (float)maximum(1, max_sym / 2)) + .5f); + s = basisu::clamp(s, 0, max_sym); + + syms[i] = s; + } + + } + + histogram h1(max_sym + 1); + for (uint32_t i = 0; i < num_codes; i++) + h1[syms[i]]++; + + huffman_encoding_table etab2; + if (!etab2.init(h1, 16)) + { + assert(0); + printf("Failed 0\n"); + return false; + } + + bitwise_coder c; + c.init(1024); + + c.emit_huffman_table(etab2); + + for (uint32_t i = 0; i < num_codes; i++) + c.put_code(syms[i], etab2); + + c.flush(); + + basist::bitwise_decoder d; + d.init(&c.get_bytes()[0], (uint32_t)c.get_bytes().size()); + + basist::huffman_decoding_table dtab; + bool success = d.read_huffman_table(dtab); + if (!success) + { + assert(0); + printf("Failed 2\n"); + return false; + } + + for (uint32_t i = 0; i < num_codes; i++) + { + uint32_t s = d.decode_huffman(dtab); + if (s != syms[i]) + { + assert(0); + printf("Failed 4\n"); + return false; + } + } + + } + return true; + } + + void palette_index_reorderer::init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) + { + assert((num_syms > 0) && (num_indices > 0)); + assert((dist_func_weight >= 0.0f) && (dist_func_weight <= 1.0f)); + + clear(); + + m_remap_table.resize(num_syms); + m_entries_picked.reserve(num_syms); + m_total_count_to_picked.resize(num_syms); + + if (num_indices <= 1) + return; + + prepare_hist(num_syms, num_indices, pIndices); + find_initial(num_syms); + + while (m_entries_to_do.size()) + { + // Find the best entry to move into the picked list. + uint32_t best_entry; + double best_count; + find_next_entry(best_entry, best_count, pDist_func, pCtx, dist_func_weight); + + // We now have chosen an entry to place in the picked list, now determine which side it goes on. + const uint32_t entry_to_move = m_entries_to_do[best_entry]; + + float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight); + + // Put entry_to_move either on the "left" or "right" side of the picked entries + if (side <= 0) + m_entries_picked.push_back(entry_to_move); + else + m_entries_picked.insert(m_entries_picked.begin(), entry_to_move); + + // Erase best_entry from the todo list + m_entries_to_do.erase(m_entries_to_do.begin() + best_entry); + + // We've just moved best_entry to the picked list, so now we need to update m_total_count_to_picked[] to factor the additional count to best_entry + for (uint32_t i = 0; i < m_entries_to_do.size(); i++) + m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], entry_to_move, num_syms); + } + + for (uint32_t i = 0; i < num_syms; i++) + m_remap_table[m_entries_picked[i]] = i; + } + + void palette_index_reorderer::prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices) + { + m_hist.resize(0); + m_hist.resize(num_syms * num_syms); + + for (uint32_t i = 0; i < num_indices; i++) + { + const uint32_t idx = pIndices[i]; + inc_hist(idx, (i < (num_indices - 1)) ? pIndices[i + 1] : -1, num_syms); + inc_hist(idx, (i > 0) ? pIndices[i - 1] : -1, num_syms); + } + } + + void palette_index_reorderer::find_initial(uint32_t num_syms) + { + uint32_t max_count = 0, max_index = 0; + for (uint32_t i = 0; i < num_syms * num_syms; i++) + if (m_hist[i] > max_count) + max_count = m_hist[i], max_index = i; + + uint32_t a = max_index / num_syms, b = max_index % num_syms; + + m_entries_picked.push_back(a); + m_entries_picked.push_back(b); + + for (uint32_t i = 0; i < num_syms; i++) + if ((i != b) && (i != a)) + m_entries_to_do.push_back(i); + + for (uint32_t i = 0; i < m_entries_to_do.size(); i++) + for (uint32_t j = 0; j < m_entries_picked.size(); j++) + m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], m_entries_picked[j], num_syms); + } + + void palette_index_reorderer::find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) + { + best_entry = 0; + best_count = 0; + + for (uint32_t i = 0; i < m_entries_to_do.size(); i++) + { + const uint32_t u = m_entries_to_do[i]; + double total_count = m_total_count_to_picked[u]; + + if (pDist_func) + { + float w = maximum((*pDist_func)(u, m_entries_picked.front(), pCtx), (*pDist_func)(u, m_entries_picked.back(), pCtx)); + assert((w >= 0.0f) && (w <= 1.0f)); + total_count = (total_count + 1.0f) * lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, w); + } + + if (total_count <= best_count) + continue; + + best_entry = i; + best_count = total_count; + } + } + + float palette_index_reorderer::pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) + { + float which_side = 0; + + int l_count = 0, r_count = 0; + for (uint32_t j = 0; j < m_entries_picked.size(); j++) + { + const int count = get_hist(entry_to_move, m_entries_picked[j], num_syms), r = ((int)m_entries_picked.size() + 1 - 2 * (j + 1)); + which_side += static_cast(r * count); + if (r >= 0) + l_count += r * count; + else + r_count += -r * count; + } + + if (pDist_func) + { + float w_left = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.front(), pCtx)); + float w_right = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.back(), pCtx)); + which_side = w_left * l_count - w_right * r_count; + } + return which_side; + } + + void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error) + { + assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); + + const uint32_t width = std::min(a.get_width(), b.get_width()); + const uint32_t height = std::min(a.get_height(), b.get_height()); + + double hist[256]; + clear_obj(hist); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_rgba &ca = a(x, y), &cb = b(x, y); + + if (total_chans) + { + for (uint32_t c = 0; c < total_chans; c++) + hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++; + } + else + { + hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++; + } + } + } + + m_max = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint32_t i = 0; i < 256; i++) + { + if (hist[i]) + { + m_max = std::max(m_max, (float)i); + double v = i * hist[i]; + sum += v; + sum2 += i * v; + } + } + + double total_values = (double)width * (double)height; + if (avg_comp_error) + total_values *= (double)clamp(total_chans, 1, 4); + + m_mean = (float)clamp(sum / total_values, 0.0f, 255.0); + m_mean_squared = (float)clamp(sum2 / total_values, 0.0f, 255.0 * 255.0); + m_rms = (float)sqrt(m_mean_squared); + m_psnr = m_rms ? (float)clamp(log10(255.0 / m_rms) * 20.0, 0.0f, 300.0f) : 1e+10f; + } + +} // namespace basisu diff --git a/basisu_enc.h b/basisu_enc.h new file mode 100644 index 0000000..4e558ea --- /dev/null +++ b/basisu_enc.h @@ -0,0 +1,2244 @@ +// basisu_enc.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "transcoder/basisu.h" +#include "basisu_enc.h" +#include "transcoder/basisu_transcoder_internal.h" + +#ifndef _WIN32 +#include +#endif + +namespace basisu +{ + // Encoder library initialization + void basisu_encoder_init(); + + void error_printf(const char *pFmt, ...); + + // Linear algebra + + template + class vec + { + protected: + T m_v[N]; + + public: + enum { num_elements = N }; + + inline vec() { } + inline vec(eZero) { set_zero(); } + + explicit inline vec(T val) { set(val); } + inline vec(T v0, T v1) { set(v0, v1); } + inline vec(T v0, T v1, T v2) { set(v0, v1, v2); } + inline vec(T v0, T v1, T v2, T v3) { set(v0, v1, v2, v3); } + inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; } + template inline vec(const vec &other) { set(other); } + + inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; } + inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; } + + inline T getX() const { return m_v[0]; } + inline T getY() const { static_assert(N >= 2, "N too small"); return m_v[1]; } + inline T getZ() const { static_assert(N >= 3, "N too small"); return m_v[2]; } + inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; } + + inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false; return true; } + inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; } + + inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; } + + template + inline vec &set(const vec &other) + { + uint32_t i; + if (static_cast(&other) == static_cast(this)) + return *this; + const uint32_t m = minimum(OtherN, N); + for (i = 0; i < m; i++) + m_v[i] = static_cast(other[i]); + for (; i < N; i++) + m_v[i] = 0; + return *this; + } + + inline vec &set_component(uint32_t index, T val) { assert(index < N); m_v[index] = val; return *this; } + inline vec &set(T val) { for (uint32_t i = 0; i < N; i++) m_v[i] = val; return *this; } + inline void clear_elements(uint32_t s, uint32_t e) { assert(e <= N); for (uint32_t i = s; i < e; i++) m_v[i] = 0; } + + inline vec &set(T v0, T v1) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + clear_elements(2, N); + } + return *this; + } + + inline vec &set(T v0, T v1, T v2) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + if (N >= 3) + { + m_v[2] = v2; + clear_elements(3, N); + } + } + return *this; + } + + inline vec &set(T v0, T v1, T v2, T v3) + { + m_v[0] = v0; + if (N >= 2) + { + m_v[1] = v1; + if (N >= 3) + { + m_v[2] = v2; + + if (N >= 4) + { + m_v[3] = v3; + clear_elements(5, N); + } + } + } + return *this; + } + + inline vec &operator=(const vec &rhs) { if (this != &rhs) for (uint32_t i = 0; i < N; i++) m_v[i] = rhs.m_v[i]; return *this; } + template inline vec &operator=(const vec &rhs) { set(rhs); return *this; } + + inline const T *get_ptr() const { return reinterpret_cast(&m_v[0]); } + inline T *get_ptr() { return reinterpret_cast(&m_v[0]); } + + inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } + inline vec operator+ () const { return *this; } + inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } + inline vec &operator-= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] -= other.m_v[i]; return *this; } + inline vec &operator/= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] /= other.m_v[i]; return *this; } + inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } + inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } + inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } + + friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } + friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } + friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } + friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } + friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } + friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } + + static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } + + inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } + + inline T norm() const { return dot_product(*this, *this); } + inline T length() const { return sqrt(norm()); } + + inline T squared_distance(const vec &other) const { T d2 = 0; for (uint32_t i = 0; i < N; i++) { T d = m_v[i] - other.m_v[i]; d2 += d * d; } return d2; } + + inline T distance(const vec &other) const { return squared_distance(other); } + + inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; } + + inline vec &clamp(T l, T h) + { + for (uint32_t i = 0; i < N; i++) + m_v[i] = basisu::clamp(m_v[i], l, h); + return *this; + } + }; + + typedef vec<4, double> vec4D; + typedef vec<3, double> vec3D; + typedef vec<2, double> vec2D; + typedef vec<1, double> vec1D; + + typedef vec<4, float> vec4F; + typedef vec<3, float> vec3F; + typedef vec<2, float> vec2F; + typedef vec<1, float> vec1F; + + template + class matrix + { + public: + typedef vec col_vec; + typedef vec row_vec; + + typedef T scalar_type; + + enum { rows = Rows, cols = Cols }; + + protected: + row_vec m_r[Rows]; + + public: + inline matrix() {} + inline matrix(eZero) { set_zero(); } + inline matrix(const matrix &other) { for (uint32_t i = 0; i < Rows; i++) m_r[i] = other.m_r[i]; } + inline matrix &operator=(const matrix &rhs) { if (this != &rhs) for (uint32_t i = 0; i < Rows; i++) m_r[i] = rhs.m_r[i]; return *this; } + + inline T operator()(uint32_t r, uint32_t c) const { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } + inline T &operator()(uint32_t r, uint32_t c) { assert((r < Rows) && (c < Cols)); return m_r[r][c]; } + + inline const row_vec &operator[](uint32_t r) const { assert(r < Rows); return m_r[r]; } + inline row_vec &operator[](uint32_t r) { assert(r < Rows); return m_r[r]; } + + inline matrix &set_zero() + { + for (uint32_t i = 0; i < Rows; i++) + m_r[i].set_zero(); + return *this; + } + + inline matrix &set_identity() + { + for (uint32_t i = 0; i < Rows; i++) + { + m_r[i].set_zero(); + if (i < Cols) + m_r[i][i] = 1.0f; + } + return *this; + } + }; + + template + inline VectorType compute_pca_from_covar(matrix &cmatrix) + { + VectorType axis; + if (N == 1) + axis.set(1.0f); + else + { + for (uint32_t i = 0; i < N; i++) + axis[i] = lerp(.75f, 1.25f, i * (1.0f / maximum(N - 1, 1))); + } + + VectorType prev_axis(axis); + + // Power iterations + for (uint32_t power_iter = 0; power_iter < 8; power_iter++) + { + VectorType trial_axis; + double max_sum = 0; + + for (uint32_t i = 0; i < N; i++) + { + double sum = 0; + for (uint32_t j = 0; j < N; j++) + sum += cmatrix[i][j] * axis[j]; + + trial_axis[i] = static_cast(sum); + + max_sum = maximum(fabs(sum), max_sum); + } + + if (max_sum != 0.0f) + trial_axis *= static_cast(1.0f / max_sum); + + VectorType delta_axis(prev_axis - trial_axis); + + prev_axis = axis; + axis = trial_axis; + + if (delta_axis.norm() < .0024f) + break; + } + + return axis.normalize_in_place(); + } + + template inline void indirect_sort(uint32_t num_indices, uint32_t* pIndices, const T* pKeys) + { + for (uint32_t i = 0; i < num_indices; i++) + pIndices[i] = i; + + std::sort( + pIndices, + pIndices + num_indices, + [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; } + ); + } + + // Simple 32-bit color class + + class color_rgba_i16 + { + public: + union + { + int16_t m_comps[4]; + + struct + { + int16_t r; + int16_t g; + int16_t b; + int16_t a; + }; + }; + + inline color_rgba_i16() + { + static_assert(sizeof(*this) == sizeof(int16_t)*4, "sizeof(*this) == sizeof(int16_t)*4"); + } + + inline color_rgba_i16(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba_i16 &set(int sr, int sg, int sb, int sa) + { + m_comps[0] = (int16_t)clamp(sr, INT16_MIN, INT16_MAX); + m_comps[1] = (int16_t)clamp(sg, INT16_MIN, INT16_MAX); + m_comps[2] = (int16_t)clamp(sb, INT16_MIN, INT16_MAX); + m_comps[3] = (int16_t)clamp(sa, INT16_MIN, INT16_MAX); + return *this; + } + }; + + class color_rgba + { + public: + union + { + uint8_t m_comps[4]; + + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; + + inline color_rgba() + { + static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); + } + + inline color_rgba(int y) + { + set(y); + } + + inline color_rgba(int y, int na) + { + set(y, na); + } + + inline color_rgba(int sr, int sg, int sb, int sa) + { + set(sr, sg, sb, sa); + } + + inline color_rgba(eNoClamp, int sr, int sg, int sb, int sa) + { + set_noclamp_rgba((uint8_t)sr, (uint8_t)sg, (uint8_t)sb, (uint8_t)sa); + } + + inline color_rgba& set_noclamp_y(int y) + { + m_comps[0] = (uint8_t)y; + m_comps[1] = (uint8_t)y; + m_comps[2] = (uint8_t)y; + m_comps[3] = (uint8_t)255; + return *this; + } + + inline color_rgba &set_noclamp_rgba(int sr, int sg, int sb, int sa) + { + m_comps[0] = (uint8_t)sr; + m_comps[1] = (uint8_t)sg; + m_comps[2] = (uint8_t)sb; + m_comps[3] = (uint8_t)sa; + return *this; + } + + inline color_rgba &set(int y) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = 255; + return *this; + } + + inline color_rgba &set(int y, int na) + { + m_comps[0] = static_cast(clamp(y, 0, 255)); + m_comps[1] = m_comps[0]; + m_comps[2] = m_comps[0]; + m_comps[3] = static_cast(clamp(na, 0, 255)); + return *this; + } + + inline color_rgba &set(int sr, int sg, int sb, int sa) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + m_comps[3] = static_cast(clamp(sa, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(int sr, int sg, int sb) + { + m_comps[0] = static_cast(clamp(sr, 0, 255)); + m_comps[1] = static_cast(clamp(sg, 0, 255)); + m_comps[2] = static_cast(clamp(sb, 0, 255)); + return *this; + } + + inline color_rgba &set_rgb(const color_rgba &other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } + inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } + + inline void clear() + { + m_comps[0] = 0; + m_comps[1] = 0; + m_comps[2] = 0; + m_comps[3] = 0; + } + + inline bool operator== (const color_rgba &rhs) const + { + if (m_comps[0] != rhs.m_comps[0]) return false; + if (m_comps[1] != rhs.m_comps[1]) return false; + if (m_comps[2] != rhs.m_comps[2]) return false; + if (m_comps[3] != rhs.m_comps[3]) return false; + return true; + } + + inline bool operator!= (const color_rgba &rhs) const + { + return !(*this == rhs); + } + + inline bool operator<(const color_rgba &rhs) const + { + for (int i = 0; i < 4; i++) + { + if (m_comps[i] < rhs.m_comps[i]) + return true; + else if (m_comps[i] != rhs.m_comps[i]) + return false; + } + return false; + } + + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + }; + + typedef std::vector color_rgba_vec; + + const color_rgba g_black_color(0, 0, 0, 255); + const color_rgba g_white_color(255, 255, 255, 255); + + inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1) + { + int dr = r0 - r1, dg = g0 - g1, db = b0 - b1; + return dr * dr + dg * dg + db * db; + } + + inline int color_distance(int r0, int g0, int b0, int a0, int r1, int g1, int b1, int a1) + { + int dr = r0 - r1, dg = g0 - g1, db = b0 - b1, da = a0 - a1; + return dr * dr + dg * dg + db * db + da * da; + } + + inline int color_distance(const color_rgba &c0, const color_rgba &c1, bool alpha) + { + if (alpha) + return color_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); + else + return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); + } + + // TODO: Allow user to control channel weightings. + inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha) + { + if (perceptual) + { + const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; + const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; + + const float cr1 = e1.r - l1; + const float cr2 = e2.r - l2; + + const float cb1 = e1.b - l1; + const float cb2 = e2.b - l2; + + const float dl = l1 - l2; + const float dcr = cr1 - cr2; + const float dcb = cb1 - cb2; + + uint32_t d = static_cast(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb); + + if (alpha) + { + int da = static_cast(e1.a) - static_cast(e2.a); + d += static_cast(128.0f*da*da); + } + + return d; + } + else + return color_distance(e1, e2, alpha); + } + + // String helpers + + inline int string_find_right(const std::string& filename, char c) + { + size_t result = filename.find_last_of(c); + return (result == std::string::npos) ? -1 : (int)result; + } + + inline std::string string_get_extension(const std::string &filename) + { + int sep = -1; +#ifdef _WIN32 + sep = string_find_right(filename, '\\'); +#endif + if (sep < 0) + sep = string_find_right(filename, '/'); + + int dot = string_find_right(filename, '.'); + if (dot <= sep) + return ""; + + std::string result(filename); + result.erase(0, dot + 1); + + return result; + } + + inline bool string_remove_extension(std::string &filename) + { + int sep = -1; +#ifdef _WIN32 + sep = string_find_right(filename, '\\'); +#endif + if (sep < 0) + sep = string_find_right(filename, '/'); + + int dot = string_find_right(filename, '.'); + if ((dot < sep) || (dot < 0)) + return false; + + filename.resize(dot); + + return true; + } + + inline std::string string_format(const char* pFmt, ...) + { + char buf[2048]; + + va_list args; + va_start(args, pFmt); +#ifdef _WIN32 + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsnprintf(buf, sizeof(buf), pFmt, args); +#endif + va_end(args); + + return std::string(buf); + } + + inline std::string string_tolower(const std::string& s) + { + std::string result(s); + for (size_t i = 0; i < result.size(); i++) + result[i] = (char)tolower((int)result[i]); + return result; + } + + inline char *strcpy_safe(char *pDst, size_t dst_len, const char *pSrc) + { + assert(pDst && pSrc && dst_len); + if (!dst_len) + return pDst; + + const size_t src_len = strlen(pSrc); + const size_t src_len_plus_terminator = src_len + 1; + + if (src_len_plus_terminator <= dst_len) + memcpy(pDst, pSrc, src_len_plus_terminator); + else + { + if (dst_len > 1) + memcpy(pDst, pSrc, dst_len - 1); + pDst[dst_len - 1] = '\0'; + } + + return pDst; + } + + inline bool string_ends_with(const std::string& s, char c) + { + return (s.size() != 0) && (s.back() == c); + } + + inline bool string_split_path(const char *p, std::string *pDrive, std::string *pDir, std::string *pFilename, std::string *pExt) + { +#ifdef _MSC_VER + char drive_buf[_MAX_DRIVE] = { 0 }; + char dir_buf[_MAX_DIR] = { 0 }; + char fname_buf[_MAX_FNAME] = { 0 }; + char ext_buf[_MAX_EXT] = { 0 }; + + errno_t error = _splitpath_s(p, + pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, + pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, + pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, + pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); + if (error != 0) + return false; + + if (pDrive) *pDrive = drive_buf; + if (pDir) *pDir = dir_buf; + if (pFilename) *pFilename = fname_buf; + if (pExt) *pExt = ext_buf; + return true; +#else + char dirtmp[1024], nametmp[1024]; + strcpy_safe(dirtmp, sizeof(dirtmp), p); + strcpy_safe(nametmp, sizeof(nametmp), p); + + if (pDrive) + pDrive->resize(0); + + const char *pDirName = dirname(dirtmp); + const char* pBaseName = basename(nametmp); + if ((!pDirName) || (!pBaseName)) + return false; + + if (pDir) + { + *pDir = pDirName; + if ((pDir->size()) && (pDir->back() != '/')) + *pDir += "/"; + } + + if (pFilename) + { + *pFilename = pBaseName; + string_remove_extension(*pFilename); + } + + if (pExt) + { + *pExt = pBaseName; + *pExt = string_get_extension(*pExt); + if (pExt->size()) + *pExt = "." + *pExt; + } + + return true; +#endif + } + + inline bool is_path_separator(char c) + { +#ifdef _WIN32 + return (c == '/') || (c == '\\'); +#else + return (c == '/'); +#endif + } + + inline bool is_drive_separator(char c) + { +#ifdef _WIN32 + return (c == ':'); +#else + (void)c; + return false; +#endif + } + + inline void string_combine_path(std::string &dst, const char *p, const char *q) + { + std::string temp(p); + if (temp.size() && !is_path_separator(q[0])) + { + if (!is_path_separator(temp.back())) + temp.append(1, BASISU_PATH_SEPERATOR_CHAR); + } + temp += q; + dst.swap(temp); + } + + inline void string_combine_path(std::string &dst, const char *p, const char *q, const char *r) + { + string_combine_path(dst, p, q); + string_combine_path(dst, dst.c_str(), r); + } + + inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt) + { + string_combine_path(dst, p, q, r); + if ((!string_ends_with(dst, '.')) && (pExt[0]) && (pExt[0] != '.')) + dst.append(1, '.'); + dst.append(pExt); + } + + inline bool string_get_pathname(const char *p, std::string &path) + { + std::string temp_drive, temp_path; + if (!string_split_path(p, &temp_drive, &temp_path, NULL, NULL)) + return false; + string_combine_path(path, temp_drive.c_str(), temp_path.c_str()); + return true; + } + + inline bool string_get_filename(const char *p, std::string &filename) + { + std::string temp_ext; + if (!string_split_path(p, nullptr, nullptr, &filename, &temp_ext)) + return false; + filename += temp_ext; + return true; + } + + class rand + { + std::mt19937 m_mt; + + public: + rand() { } + + rand(uint32_t s) { seed(s); } + void seed(uint32_t s) { m_mt.seed(s); } + + // between [l,h] + int irand(int l, int h) { std::uniform_int_distribution d(l, h); return d(m_mt); } + + uint32_t urand32() { return static_cast(irand(INT32_MIN, INT32_MAX)); } + + bool bit() { return irand(0, 1) == 1; } + + // between [l,h) + float frand(float l, float h) { std::uniform_real_distribution d(l, h); return d(m_mt); } + + float gaussian(float mean, float stddev) { std::normal_distribution d(mean, stddev); return d(m_mt); } + }; + + class priority_queue + { + public: + priority_queue() : + m_size(0) + { + } + + void clear() + { + m_heap.clear(); + m_size = 0; + } + + void init(uint32_t max_entries, uint32_t first_index, float first_priority) + { + m_heap.resize(max_entries + 1); + m_heap[1].m_index = first_index; + m_heap[1].m_priority = first_priority; + m_size = 1; + } + + inline uint32_t size() const { return m_size; } + + inline uint32_t get_top_index() const { return m_heap[1].m_index; } + inline float get_top_priority() const { return m_heap[1].m_priority; } + + inline void delete_top() + { + assert(m_size > 0); + m_heap[1] = m_heap[m_size]; + m_size--; + if (m_size) + down_heap(1); + } + + inline void add_heap(uint32_t index, float priority) + { + m_size++; + + uint32_t k = m_size; + + if (m_size >= m_heap.size()) + m_heap.resize(m_size + 1); + + for (;;) + { + uint32_t parent_index = k >> 1; + if ((!parent_index) || (m_heap[parent_index].m_priority > priority)) + break; + m_heap[k] = m_heap[parent_index]; + k = parent_index; + } + + m_heap[k].m_index = index; + m_heap[k].m_priority = priority; + } + + private: + struct entry + { + uint32_t m_index; + float m_priority; + }; + + std::vector m_heap; + uint32_t m_size; + + // Push down entry at index + inline void down_heap(uint32_t heap_index) + { + uint32_t orig_index = m_heap[heap_index].m_index; + const float orig_priority = m_heap[heap_index].m_priority; + + uint32_t child_index; + while ((child_index = (heap_index << 1)) <= m_size) + { + if ((child_index < m_size) && (m_heap[child_index].m_priority < m_heap[child_index + 1].m_priority)) ++child_index; + if (orig_priority > m_heap[child_index].m_priority) + break; + m_heap[heap_index] = m_heap[child_index]; + heap_index = child_index; + } + + m_heap[heap_index].m_index = orig_index; + m_heap[heap_index].m_priority = orig_priority; + } + }; + + // Tree structured vector quantization (TSVQ) + + template + class tree_vector_quant + { + public: + typedef std::pair training_vec_with_weight; + typedef std::vector< training_vec_with_weight > array_of_weighted_training_vecs; + + tree_vector_quant() + { + } + + void clear() + { + clear_vector(m_training_vecs); + clear_vector(m_nodes); + } + + void add_training_vec(const TrainingVectorType &v, uint32_t weight) { m_training_vecs.push_back(std::make_pair(v, weight)); } + + void retrieve(std::vector< std::vector > &codebook) const + { + for (uint32_t i = 0; i < m_nodes.size(); i++) + { + const tsvq_node &n = m_nodes[i]; + if (!n.is_leaf()) + continue; + + codebook.resize(codebook.size() + 1); + codebook.back() = n.m_training_vecs; + } + } + + void retrieve(std::vector &codebook) const + { + for (uint32_t i = 0; i < m_nodes.size(); i++) + { + const tsvq_node &n = m_nodes[i]; + if (!n.is_leaf()) + continue; + + codebook.resize(codebook.size() + 1); + codebook.back() = n.m_origin; + } + } + + bool generate(uint32_t max_size) + { + if (!m_training_vecs.size()) + return false; + + clear_vector(m_nodes); + m_nodes.reserve(max_size * 2 + 1); + + m_nodes.push_back(prepare_root()); + + priority_queue var_heap; + var_heap.init(max_size, 0, m_nodes[0].m_var); + + std::vector l_children, r_children; + + // Now split the worst nodes + l_children.reserve(m_training_vecs.size() + 1); + r_children.reserve(m_training_vecs.size() + 1); + + uint32_t total_leaf_nodes = 1; + + while ((var_heap.size()) && (total_leaf_nodes < max_size)) + { + const uint32_t node_index = var_heap.get_top_index(); + const tsvq_node &node = m_nodes[node_index]; + + assert(node.m_var == var_heap.get_top_priority()); + assert(node.is_leaf()); + + var_heap.delete_top(); + + if (node.m_training_vecs.size() > 1) + { + if (split_node(node_index, var_heap, l_children, r_children)) + { + // This removes one leaf node (making an internal node) and replaces it with two new leaves, so +1 total. + total_leaf_nodes += 1; + } + } + } + + return true; + } + + private: + class tsvq_node + { + public: + inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1) { } + + // vecs is erased + inline void set(const TrainingVectorType &org, uint64_t weight, float var, std::vector &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } + + inline bool is_leaf() const { return m_left_index < 0; } + + float m_var; + uint64_t m_weight; + TrainingVectorType m_origin; + int32_t m_left_index, m_right_index; + std::vector m_training_vecs; + }; + + typedef std::vector tsvq_node_vec; + tsvq_node_vec m_nodes; + + array_of_weighted_training_vecs m_training_vecs; + + tsvq_node prepare_root() const + { + double ttsum = 0.0f; + + // Prepare root node containing all training vectors + tsvq_node root; + root.m_training_vecs.reserve(m_training_vecs.size()); + + for (uint32_t i = 0; i < m_training_vecs.size(); i++) + { + const TrainingVectorType &v = m_training_vecs[i].first; + const uint32_t weight = m_training_vecs[i].second; + + root.m_training_vecs.push_back(i); + + root.m_origin += (v * static_cast(weight)); + root.m_weight += weight; + + ttsum += v.dot(v) * weight; + } + + root.m_var = static_cast(ttsum - (root.m_origin.dot(root.m_origin) / root.m_weight)); + + root.m_origin *= (1.0f / root.m_weight); + + return root; + } + + bool split_node(uint32_t node_index, priority_queue &var_heap, std::vector &l_children, std::vector &r_children) + { + TrainingVectorType l_child_org, r_child_org; + uint64_t l_weight = 0, r_weight = 0; + float l_var = 0.0f, r_var = 0.0f; + + // Compute initial left/right child origins + prep_split(m_nodes[node_index], l_child_org, r_child_org); + + // Use k-means iterations to refine these children vectors + if (!refine_split(m_nodes[node_index], l_child_org, l_weight, l_var, l_children, r_child_org, r_weight, r_var, r_children)) + return false; + + // Create children + const uint32_t l_child_index = (uint32_t)m_nodes.size(), r_child_index = (uint32_t)m_nodes.size() + 1; + + m_nodes[node_index].m_left_index = l_child_index; + m_nodes[node_index].m_right_index = r_child_index; + + m_nodes.resize(m_nodes.size() + 2); + + tsvq_node &l_child = m_nodes[l_child_index], &r_child = m_nodes[r_child_index]; + + l_child.set(l_child_org, l_weight, l_var, l_children); + r_child.set(r_child_org, r_weight, r_var, r_children); + + if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1)) + var_heap.add_heap(l_child_index, l_var); + + if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1)) + var_heap.add_heap(r_child_index, r_var); + + return true; + } + + TrainingVectorType compute_split_axis(const tsvq_node &node) const + { + const uint32_t N = TrainingVectorType::num_elements; + + matrix cmatrix(cZero); + + // Compute covariance matrix from weighted input vectors + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType v(m_training_vecs[node.m_training_vecs[i]].first - node.m_origin); + const TrainingVectorType w(static_cast(m_training_vecs[node.m_training_vecs[i]].second) * v); + + for (uint32_t x = 0; x < N; x++) + for (uint32_t y = x; y < N; y++) + cmatrix[x][y] = cmatrix[x][y] + v[x] * w[y]; + } + + const float renorm_scale = 1.0f / node.m_weight; + + for (uint32_t x = 0; x < N; x++) + for (uint32_t y = x; y < N; y++) + cmatrix[x][y] *= renorm_scale; + + // Diagonal flip + for (uint32_t x = 0; x < (N - 1); x++) + for (uint32_t y = x + 1; y < N; y++) + cmatrix[y][x] = cmatrix[x][y]; + + return compute_pca_from_covar(cmatrix); + } + + void prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const + { + const uint32_t N = TrainingVectorType::num_elements; + + if (2 == node.m_training_vecs.size()) + { + l_child_result = m_training_vecs[node.m_training_vecs[0]].first; + r_child_result = m_training_vecs[node.m_training_vecs[1]].first; + return; + } + + TrainingVectorType axis(compute_split_axis(node)), l_child(0.0f), r_child(0.0f); + double l_weight = 0.0f, r_weight = 0.0f; + + // Compute initial left/right children + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const float weight = (float)m_training_vecs[node.m_training_vecs[i]].second; + + const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; + + double t = (v - node.m_origin).dot(axis); + if (t >= 0.0f) + { + r_child += v * weight; + r_weight += weight; + } + else + { + l_child += v * weight; + l_weight += weight; + } + } + + if ((l_weight > 0.0f) && (r_weight > 0.0f)) + { + l_child_result = l_child * static_cast(1.0f / l_weight); + r_child_result = r_child * static_cast(1.0f / r_weight); + } + else + { + // Empty cell problem + l_child_result = node.m_origin; + r_child_result = node.m_origin; + + // Nudge the two cells apart and hope k-means can separate them. + for (uint32_t i = 0; i < N; i++) + { + l_child_result[i] -= .000125f; + r_child_result[i] += .000125f; + } + } + } + + bool refine_split(const tsvq_node &node, + TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, std::vector &l_children, + TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, std::vector &r_children) const + { + l_children.reserve(node.m_training_vecs.size()); + r_children.reserve(node.m_training_vecs.size()); + + float prev_total_variance = 1e+10f; + + // Refine left/right children locations using k-means iterations + const uint32_t cMaxIters = 6; + for (uint32_t iter = 0; iter < cMaxIters; iter++) + { + l_children.resize(0); + r_children.resize(0); + + TrainingVectorType new_l_child(cZero), new_r_child(cZero); + + double l_ttsum = 0.0f, r_ttsum = 0.0f; + + l_weight = 0; + r_weight = 0; + + for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) + { + const TrainingVectorType &v = m_training_vecs[node.m_training_vecs[i]].first; + const uint32_t weight = m_training_vecs[node.m_training_vecs[i]].second; + + double left_dist2 = l_child.squared_distance(v), right_dist2 = r_child.squared_distance(v); + + if (left_dist2 >= right_dist2) + { + new_r_child += (v * static_cast(weight)); + r_weight += weight; + + r_ttsum += weight * v.dot(v); + r_children.push_back(node.m_training_vecs[i]); + } + else + { + new_l_child += (v * static_cast(weight)); + l_weight += weight; + + l_ttsum += weight * v.dot(v); + l_children.push_back(node.m_training_vecs[i]); + } + } + + if ((!l_weight) || (!r_weight)) + return false; + + l_var = static_cast(l_ttsum - (new_l_child.dot(new_l_child) / l_weight)); + r_var = static_cast(r_ttsum - (new_r_child.dot(new_r_child) / r_weight)); + + new_l_child *= (1.0f / l_weight); + new_r_child *= (1.0f / r_weight); + + l_child = new_l_child; + r_child = new_r_child; + + float total_var = l_var + r_var; + const float cGiveupVariance = .00001f; + if (total_var < cGiveupVariance) + break; + + // Check to see if the variance has settled + const float cVarianceDeltaThresh = .00125f; + if (((prev_total_variance - total_var) / total_var) < cVarianceDeltaThresh) + break; + + prev_total_variance = total_var; + } + + return true; + } + }; + + // Canonical Huffman coding + + class histogram + { + std::vector m_hist; + + public: + histogram(uint32_t size = 0) { init(size); } + + void clear() + { + clear_vector(m_hist); + } + + void init(uint32_t size) + { + m_hist.resize(0); + m_hist.resize(size); + } + + inline uint32_t size() const { return static_cast(m_hist.size()); } + + inline const uint32_t &operator[] (uint32_t index) const + { + return m_hist[index]; + } + + inline uint32_t &operator[] (uint32_t index) + { + return m_hist[index]; + } + + inline void inc(uint32_t index) + { + m_hist[index]++; + } + + uint64_t get_total() const + { + uint64_t total = 0; + for (uint32_t i = 0; i < m_hist.size(); ++i) + total += m_hist[i]; + return total; + } + + double get_entropy() const + { + double total = static_cast(get_total()); + if (total == 0.0f) + return 0.0f; + + const double inv_total = 1.0f / total; + const double neg_inv_log2 = -1.0f / log(2.0f); + + double e = 0.0f; + for (uint32_t i = 0; i < m_hist.size(); i++) + if (m_hist[i]) + e += log(m_hist[i] * inv_total) * neg_inv_log2 * static_cast(m_hist[i]); + + return e; + } + }; + + struct sym_freq + { + uint16_t m_key, m_sym_index; + }; + + sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); + void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms); + void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size); + + class huffman_encoding_table + { + public: + huffman_encoding_table() + { + } + + void clear() + { + clear_vector(m_codes); + clear_vector(m_code_sizes); + } + + bool init(const histogram &h, uint32_t max_code_size = cHuffmanMaxSupportedCodeSize) + { + return init(h.size(), &h[0], max_code_size); + } + + bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size); + bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size); + + inline const uint16_vec &get_codes() const { return m_codes; } + inline const uint8_vec &get_code_sizes() const { return m_code_sizes; } + + uint32_t get_total_used_codes() const + { + for (int i = static_cast(m_code_sizes.size()) - 1; i >= 0; i--) + if (m_code_sizes[i]) + return i + 1; + return 0; + } + + private: + uint16_vec m_codes; + uint8_vec m_code_sizes; + }; + + class bitwise_coder + { + public: + bitwise_coder() : + m_bit_buffer(0), + m_bit_buffer_size(0), + m_total_bits(0) + { + } + + inline void clear() + { + clear_vector(m_bytes); + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + + inline const uint8_vec &get_bytes() const { return m_bytes; } + + inline uint64_t get_total_bits() const { return m_total_bits; } + inline void clear_total_bits() { m_total_bits = 0; } + + inline void init(uint32_t reserve_size = 1024) + { + m_bytes.reserve(reserve_size); + m_bytes.resize(0); + + m_bit_buffer = 0; + m_bit_buffer_size = 0; + m_total_bits = 0; + } + + inline uint32_t flush() + { + if (m_bit_buffer_size) + { + m_total_bits += 8; + append_byte(static_cast(m_bit_buffer)); + + m_bit_buffer = 0; + m_bit_buffer_size = 0; + + return 8; + } + + return 0; + } + + inline uint32_t put_bits(uint32_t bits, uint32_t num_bits) + { + assert(num_bits <= 32); + assert(bits < (1ULL << num_bits)); + + if (!num_bits) + return 0; + + m_total_bits += num_bits; + + uint64_t v = (static_cast(bits) << m_bit_buffer_size) | m_bit_buffer; + m_bit_buffer_size += num_bits; + + while (m_bit_buffer_size >= 8) + { + append_byte(static_cast(v)); + v >>= 8; + m_bit_buffer_size -= 8; + } + + m_bit_buffer = static_cast(v); + return num_bits; + } + + inline uint32_t put_code(uint32_t sym, const huffman_encoding_table &tab) + { + uint32_t code = tab.get_codes()[sym]; + uint32_t code_size = tab.get_code_sizes()[sym]; + assert(code_size >= 1); + put_bits(code, code_size); + return code_size; + } + + inline uint32_t put_rice(uint32_t v, uint32_t m) + { + assert(m); + + const uint64_t start_bits = m_total_bits; + + uint32_t q = v >> m, r = v & ((1 << m) - 1); + + for (; q > 16; q -= 16) + put_bits(0xFFFF, 16); + + put_bits((1 << q) - 1, q); + put_bits(r << 1, m + 1); + + return (uint32_t)(m_total_bits - start_bits); + } + + uint32_t emit_huffman_table(const huffman_encoding_table &tab); + + private: + uint8_vec m_bytes; + uint32_t m_bit_buffer, m_bit_buffer_size; + uint64_t m_total_bits; + + void append_byte(uint8_t c) + { + m_bytes.resize(m_bytes.size() + 1); + m_bytes.back() = c; + } + + static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len); + static void end_zero_run(uint16_vec &syms, uint32_t &run_size); + }; + + class huff2D + { + public: + huff2D() { } + huff2D(uint32_t bits_per_sym, uint32_t total_syms_per_group) { init(bits_per_sym, total_syms_per_group); } + + inline const histogram &get_histogram() const { return m_histogram; } + inline const huffman_encoding_table &get_encoding_table() const { return m_encoding_table; } + + inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group) + { + assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1); + + m_bits_per_sym = bits_per_sym; + m_total_syms_per_group = total_syms_per_group; + m_cur_sym_bits = 0; + m_cur_num_syms = 0; + m_decode_syms_remaining = 0; + m_next_decoder_group_index = 0; + + m_histogram.init(1 << (bits_per_sym * total_syms_per_group)); + } + + inline void clear() + { + m_group_bits.clear(); + + m_cur_sym_bits = 0; + m_cur_num_syms = 0; + m_decode_syms_remaining = 0; + m_next_decoder_group_index = 0; + } + + inline void emit(uint32_t sym) + { + m_cur_sym_bits |= (sym << (m_cur_num_syms * m_bits_per_sym)); + m_cur_num_syms++; + + if (m_cur_num_syms == m_total_syms_per_group) + flush(); + } + + inline void flush() + { + if (m_cur_num_syms) + { + m_group_bits.push_back(m_cur_sym_bits); + m_histogram.inc(m_cur_sym_bits); + + m_cur_sym_bits = 0; + m_cur_num_syms = 0; + } + } + + inline bool start_encoding(uint32_t code_size_limit = 16) + { + flush(); + + if (!m_encoding_table.init(m_histogram, code_size_limit)) + return false; + + m_decode_syms_remaining = 0; + m_next_decoder_group_index = 0; + + return true; + } + + inline uint32_t emit_next_sym(bitwise_coder &c) + { + uint32_t bits = 0; + + if (!m_decode_syms_remaining) + { + bits = c.put_code(m_group_bits[m_next_decoder_group_index++], m_encoding_table); + m_decode_syms_remaining = m_total_syms_per_group; + } + + m_decode_syms_remaining--; + return bits; + } + + inline void emit_flush() + { + m_decode_syms_remaining = 0; + } + + private: + uint_vec m_group_bits; + huffman_encoding_table m_encoding_table; + histogram m_histogram; + uint32_t m_bits_per_sym, m_total_syms_per_group, m_cur_sym_bits, m_cur_num_syms, m_next_decoder_group_index, m_decode_syms_remaining; + }; + + bool huffman_test(int rand_seed); + + // VQ index reordering + + class palette_index_reorderer + { + public: + palette_index_reorderer() + { + } + + void clear() + { + clear_vector(m_hist); + clear_vector(m_total_count_to_picked); + clear_vector(m_entries_picked); + clear_vector(m_entries_to_do); + clear_vector(m_remap_table); + } + + // returns [0,1] distance of entry i to entry j + typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx); + + void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); + + // Table remaps old to new symbol indices + inline const uint_vec &get_remap_table() const { return m_remap_table; } + + private: + uint_vec m_hist, m_total_count_to_picked, m_entries_picked, m_entries_to_do, m_remap_table; + + inline uint32_t get_hist(int i, int j, int n) const { return (i > j) ? m_hist[j * n + i] : m_hist[i * n + j]; } + inline void inc_hist(int i, int j, int n) { if ((i != j) && (i < j) && (i != -1) && (j != -1)) { assert(((uint32_t)i < (uint32_t)n) && ((uint32_t)j < (uint32_t)n)); m_hist[i * n + j]++; } } + + void prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices); + void find_initial(uint32_t num_syms); + void find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); + float pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); + }; + + // Simple 32-bit 2D image class + + class image + { + public: + image() : + m_width(0), m_height(0), m_pitch(0) + { + } + + image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + image(const image &other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + image &swap(image &other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + image &operator= (const image &rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + image &clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + clear_vector(m_pixels); + return *this; + } + + image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color) + { + return crop(w, h, p, background); + } + + image &set_all(const color_rgba &c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped(x + ix, y + iy, c); + return *this; + } + + image &crop_dup_borders(uint32_t w, uint32_t h) + { + uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if ((m_width > orig_w) && (orig_w)) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_clipped(x, y, get_clamped(x, y)); + } + + if ((m_height > orig_h) && (orig_h)) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_clipped(x, y, get_clamped(x, y)); + } + return *this; + } + + image &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba &background = g_black_color) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + color_rgba_vec cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + inline const color_rgba &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + inline color_rgba &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const color_rgba &get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline color_rgba &get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline color_rgba &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline image &set_clipped(int x, int y, const color_rgba &c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_height()) + break; + + set_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const image &extract_block_clamped(color_rgba *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + return *this; + } + + image &set_block_clipped(const color_rgba *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint32_t get_total_pixels() const { return m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const color_rgba_vec &get_pixels() const { return m_pixels; } + inline color_rgba_vec &get_pixels() { return m_pixels; } + + inline const color_rgba *get_ptr() const { return &m_pixels[0]; } + inline color_rgba *get_ptr() { return &m_pixels[0]; } + + bool has_alpha() const + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + if ((*this)(x, y).a < 255) + return true; + + return false; + } + + image &set_alpha(uint8_t a) + { + for (uint32_t y = 0; y < m_height; ++y) + for (uint32_t x = 0; x < m_width; ++x) + (*this)(x, y).a = a; + return *this; + } + + image &flip_y() + { + for (uint32_t y = 0; y < m_height / 2; ++y) + for (uint32_t x = 0; x < m_width; ++x) + std::swap((*this)(x, y), (*this)(x, m_height - 1 - y)); + return *this; + } + + // TODO: There are many ways to do this, not sure this is the best way. + image &renormalize_normal_map() + { + for (uint32_t y = 0; y < m_height; y++) + { + for (uint32_t x = 0; x < m_width; x++) + { + color_rgba &c = (*this)(x, y); + if ((c.r == 128) && (c.g == 128) && (c.b == 128)) + continue; + + vec3F v(c.r, c.g, c.b); + v = (v * (2.0f / 255.0f)) - vec3F(1.0f); + v.clamp(-1.0f, 1.0f); + + float length = v.length(); + const float cValidThresh = .077f; + if (length < cValidThresh) + { + c.set(128, 128, 128, c.a); + } + else if (fabs(length - 1.0f) > cValidThresh) + { + if (length) + v /= length; + + for (uint32_t i = 0; i < 3; i++) + c[i] = static_cast(clamp(floor((v[i] + 1.0f) * 255.0f * .5f + .5f), 0.0f, 255.0f)); + + if ((c.g == 128) && (c.r == 128)) + { + if (c.b < 128) + c.b = 0; + else + c.b = 255; + } + } + } + } + return *this; + } + + private: + uint32_t m_width, m_height, m_pitch; // all in pixels + color_rgba_vec m_pixels; + }; + + // Float images + + typedef std::vector vec4F_vec; + + class imagef + { + public: + imagef() : + m_width(0), m_height(0), m_pitch(0) + { + } + + imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + imagef(const imagef &other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + imagef &swap(imagef &other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + imagef &operator= (const imagef &rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + imagef &clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + clear_vector(m_pixels); + return *this; + } + + imagef &set(const image &src, const vec4F &scale = vec4F(1), const vec4F &bias = vec4F(0)) + { + const uint32_t width = src.get_width(); + const uint32_t height = src.get_height(); + + resize(width, height); + + for (int y = 0; y < (int)height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_rgba &src_pixel = src(x, y); + (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]); + } + } + + return *this; + } + + imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) + { + return resize(other.get_width(), other.get_height(), p, background); + } + + imagef &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1)) + { + return crop(w, h, p, background); + } + + imagef &set_all(const vec4F &c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + imagef &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F &c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped(x + ix, y + iy, c); + return *this; + } + + imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1)) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + vec4F_vec cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + inline const vec4F &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + inline vec4F &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const vec4F &get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline vec4F &get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline vec4F &get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline imagef &set_clipped(int x, int y, const vec4F &c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + imagef &blit(const imagef &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_height()) + break; + + set_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const imagef &extract_block_clamped(vec4F *pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + return *this; + } + + imagef &set_block_clipped(const vec4F *pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint32_t get_total_pixels() const { return m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const vec4F_vec &get_pixels() const { return m_pixels; } + inline vec4F_vec &get_pixels() { return m_pixels; } + + inline const vec4F *get_ptr() const { return &m_pixels[0]; } + inline vec4F *get_ptr() { return &m_pixels[0]; } + + private: + uint32_t m_width, m_height, m_pitch; // all in pixels + vec4F_vec m_pixels; + }; + + // Image metrics + + class image_metrics + { + public: + // TODO: Add ssim + float m_max, m_mean, m_mean_squared, m_rms, m_psnr, m_ssim; + + image_metrics() + { + clear(); + } + + void clear() + { + m_max = 0; + m_mean = 0; + m_mean_squared = 0; + m_rms = 0; + m_psnr = 0; + m_ssim = 0; + } + + void print(const char *pPrefix = nullptr) { printf("%sMax: %3.0f Mean: %3.3f RMS: %3.3f PSNR: %2.3f dB\n", pPrefix ? pPrefix : "", m_max, m_mean, m_rms, m_psnr); } + + void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true); + }; + + // Image saving/loading/resampling + + bool load_png(const char* pFilename, image& img); + inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); } + + enum + { + cImageSaveGrayscale = 1, + cImageSaveIgnoreAlpha = 2 + }; + + bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0); + inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } + + bool read_file_to_vec(const char* pFilename, uint8_vec& data); + + bool write_data_to_file(const char* pFilename, const void* pData, size_t len); + + inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); } + + float linear_to_srgb(float l); + float srgb_to_linear(float s); + + bool image_resample(const image &src, image &dst, bool srgb = false, + const char *pFilter = "lanczos4", float filter_scale = 1.0f, + bool wrapping = false, + uint32_t first_comp = 0, uint32_t num_comps = 4); + + // Timing + + typedef uint64_t timer_ticks; + + class interval_timer + { + public: + interval_timer(); + + void start(); + void stop(); + + double get_elapsed_secs() const; + inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); } + + static void init(); + static inline timer_ticks get_ticks_per_sec() { return g_freq; } + static timer_ticks get_ticks(); + static double ticks_to_secs(timer_ticks ticks); + static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } + + private: + static timer_ticks g_init_ticks, g_freq; + static double g_timer_freq; + + timer_ticks m_start_time, m_stop_time; + + bool m_started, m_stopped; + }; + + // 2D array + + template + class vector2D + { + typedef std::vector TVec; + + uint32_t m_width, m_height; + TVec m_values; + + public: + vector2D() : + m_width(0), + m_height(0) + { + } + + vector2D(uint32_t w, uint32_t h) : + m_width(0), + m_height(0) + { + resize(w, h); + } + + vector2D(const vector2D &other) + { + *this = other; + } + + vector2D &operator= (const vector2D &other) + { + if (this != &other) + { + m_width = other.m_width; + m_height = other.m_height; + m_values = other.m_values; + } + return *this; + } + + inline bool operator== (const vector2D &rhs) const + { + return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values); + } + + inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); } + + inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } + inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; } + + inline const T &operator[] (uint32_t i) const { return m_values[i]; } + inline T &operator[] (uint32_t i) { return m_values[i]; } + + inline const T &at_clamped (int x, int y) const { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } + inline T &at_clamped (int x, int y) { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } + + void clear() + { + m_width = 0; + m_height = 0; + m_values.clear(); + } + + void set_all(const T&val) + { + vector_set_all(m_values, val); + } + + inline const T* get_ptr() const { return &m_values[0]; } + inline T* get_ptr() { return &m_values[0]; } + + vector2D &resize(uint32_t new_width, uint32_t new_height) + { + if ((m_width == new_width) && (m_height == new_height)) + return *this; + + TVec oldVals(new_width * new_height); + oldVals.swap(m_values); + + const uint32_t w = minimum(m_width, new_width); + const uint32_t h = minimum(m_height, new_height); + + if ((w) && (h)) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + m_values[x + y * new_width] = oldVals[x + y * m_width]; + } + + m_width = new_width; + m_height = new_height; + + return *this; + } + }; + +} // namespace basisu + + diff --git a/basisu_etc.cpp b/basisu_etc.cpp new file mode 100644 index 0000000..5d6be63 --- /dev/null +++ b/basisu_etc.cpp @@ -0,0 +1,1965 @@ +// basis_etc.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_etc.h" + +#define BASISU_DEBUG_ETC_ENCODER 0 +#define BASISU_DEBUG_ETC_ENCODER_DEEPER 0 + +namespace basisu +{ + const uint32_t BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE = 165; + + static const struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = + { + { { 0, 0, 0, 8 } },{ { 0, 5, 2, 1 } },{ { 0, 6, 1, 1 } },{ { 0, 7, 0, 1 } },{ { 0, 7, 1, 0 } }, + { { 0, 0, 8, 0 } },{ { 0, 0, 3, 5 } },{ { 0, 1, 7, 0 } },{ { 0, 0, 4, 4 } },{ { 0, 0, 2, 6 } }, + { { 0, 0, 7, 1 } },{ { 0, 0, 1, 7 } },{ { 0, 0, 5, 3 } },{ { 1, 6, 0, 1 } },{ { 0, 0, 6, 2 } }, + { { 0, 2, 6, 0 } },{ { 2, 4, 2, 0 } },{ { 0, 3, 5, 0 } },{ { 3, 3, 1, 1 } },{ { 4, 2, 0, 2 } }, + { { 1, 5, 2, 0 } },{ { 0, 5, 3, 0 } },{ { 0, 6, 2, 0 } },{ { 2, 4, 1, 1 } },{ { 5, 1, 0, 2 } }, + { { 6, 1, 1, 0 } },{ { 3, 3, 0, 2 } },{ { 6, 0, 0, 2 } },{ { 0, 8, 0, 0 } },{ { 6, 1, 0, 1 } }, + { { 0, 1, 6, 1 } },{ { 1, 6, 1, 0 } },{ { 4, 1, 3, 0 } },{ { 0, 2, 5, 1 } },{ { 5, 0, 3, 0 } }, + { { 5, 3, 0, 0 } },{ { 0, 1, 5, 2 } },{ { 0, 3, 4, 1 } },{ { 2, 5, 1, 0 } },{ { 1, 7, 0, 0 } }, + { { 0, 1, 4, 3 } },{ { 6, 0, 2, 0 } },{ { 0, 4, 4, 0 } },{ { 2, 6, 0, 0 } },{ { 0, 2, 4, 2 } }, + { { 0, 5, 1, 2 } },{ { 0, 6, 0, 2 } },{ { 3, 5, 0, 0 } },{ { 0, 4, 3, 1 } },{ { 3, 4, 1, 0 } }, + { { 4, 3, 1, 0 } },{ { 1, 5, 0, 2 } },{ { 0, 3, 3, 2 } },{ { 1, 4, 1, 2 } },{ { 0, 4, 2, 2 } }, + { { 2, 3, 3, 0 } },{ { 4, 4, 0, 0 } },{ { 1, 2, 4, 1 } },{ { 0, 5, 0, 3 } },{ { 0, 1, 3, 4 } }, + { { 1, 5, 1, 1 } },{ { 1, 4, 2, 1 } },{ { 1, 3, 2, 2 } },{ { 5, 2, 1, 0 } },{ { 1, 3, 3, 1 } }, + { { 0, 1, 2, 5 } },{ { 1, 1, 5, 1 } },{ { 0, 3, 2, 3 } },{ { 2, 5, 0, 1 } },{ { 3, 2, 2, 1 } }, + { { 2, 3, 0, 3 } },{ { 1, 4, 3, 0 } },{ { 2, 2, 1, 3 } },{ { 6, 2, 0, 0 } },{ { 1, 0, 6, 1 } }, + { { 3, 3, 2, 0 } },{ { 7, 1, 0, 0 } },{ { 3, 1, 4, 0 } },{ { 0, 2, 3, 3 } },{ { 0, 4, 1, 3 } }, + { { 0, 4, 0, 4 } },{ { 0, 1, 0, 7 } },{ { 2, 0, 5, 1 } },{ { 2, 0, 4, 2 } },{ { 3, 0, 2, 3 } }, + { { 2, 2, 4, 0 } },{ { 2, 2, 3, 1 } },{ { 4, 0, 3, 1 } },{ { 3, 2, 3, 0 } },{ { 2, 3, 2, 1 } }, + { { 1, 3, 4, 0 } },{ { 7, 0, 1, 0 } },{ { 3, 0, 4, 1 } },{ { 1, 0, 5, 2 } },{ { 8, 0, 0, 0 } }, + { { 3, 0, 1, 4 } },{ { 4, 1, 1, 2 } },{ { 4, 0, 2, 2 } },{ { 1, 2, 5, 0 } },{ { 4, 2, 1, 1 } }, + { { 3, 4, 0, 1 } },{ { 2, 0, 3, 3 } },{ { 5, 0, 1, 2 } },{ { 5, 0, 0, 3 } },{ { 2, 4, 0, 2 } }, + { { 2, 1, 4, 1 } },{ { 4, 0, 1, 3 } },{ { 2, 1, 5, 0 } },{ { 4, 2, 2, 0 } },{ { 4, 0, 4, 0 } }, + { { 1, 0, 4, 3 } },{ { 1, 4, 0, 3 } },{ { 3, 0, 3, 2 } },{ { 4, 3, 0, 1 } },{ { 0, 1, 1, 6 } }, + { { 1, 3, 1, 3 } },{ { 0, 2, 2, 4 } },{ { 2, 0, 2, 4 } },{ { 5, 1, 1, 1 } },{ { 3, 0, 5, 0 } }, + { { 2, 3, 1, 2 } },{ { 3, 0, 0, 5 } },{ { 0, 3, 1, 4 } },{ { 5, 0, 2, 1 } },{ { 2, 1, 3, 2 } }, + { { 2, 0, 6, 0 } },{ { 3, 1, 3, 1 } },{ { 5, 1, 2, 0 } },{ { 1, 0, 3, 4 } },{ { 1, 1, 6, 0 } }, + { { 4, 0, 0, 4 } },{ { 2, 0, 1, 5 } },{ { 0, 3, 0, 5 } },{ { 1, 3, 0, 4 } },{ { 4, 1, 2, 1 } }, + { { 1, 2, 3, 2 } },{ { 3, 1, 0, 4 } },{ { 5, 2, 0, 1 } },{ { 1, 2, 2, 3 } },{ { 3, 2, 1, 2 } }, + { { 2, 2, 2, 2 } },{ { 6, 0, 1, 1 } },{ { 1, 2, 1, 4 } },{ { 1, 1, 4, 2 } },{ { 3, 2, 0, 3 } }, + { { 1, 2, 0, 5 } },{ { 1, 0, 7, 0 } },{ { 3, 1, 2, 2 } },{ { 1, 0, 2, 5 } },{ { 2, 0, 0, 6 } }, + { { 2, 1, 1, 4 } },{ { 2, 2, 0, 4 } },{ { 1, 1, 3, 3 } },{ { 7, 0, 0, 1 } },{ { 1, 0, 0, 7 } }, + { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, + { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } + }; + + const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = + { + { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } + }; + + const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; + + // [flip][subblock][pixel_index] + const etc_coord2 g_etc1_pixel_coords[2][2][8] = + { + { + { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + }, + { + { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + } + }, + { + { + { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + }, + { + { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + }, + } + }; + + // [flip][subblock][pixel_index] + const uint32_t g_etc1_pixel_indices[2][2][8] = + { + { + { + 0 + 4 * 0, 0 + 4 * 1, 0 + 4 * 2, 0 + 4 * 3, + 1 + 4 * 0, 1 + 4 * 1, 1 + 4 * 2, 1 + 4 * 3 + }, + { + 2 + 4 * 0, 2 + 4 * 1, 2 + 4 * 2, 2 + 4 * 3, + 3 + 4 * 0, 3 + 4 * 1, 3 + 4 * 2, 3 + 4 * 3 + } + }, + { + { + 0 + 4 * 0, 1 + 4 * 0, 2 + 4 * 0, 3 + 4 * 0, + 0 + 4 * 1, 1 + 4 * 1, 2 + 4 * 1, 3 + 4 * 1 + }, + { + 0 + 4 * 2, 1 + 4 * 2, 2 + 4 * 2, 3 + 4 * 2, + 0 + 4 * 3, 1 + 4 * 3, 2 + 4 * 3, 3 + 4 * 3 + }, + } + }; + + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. + static uint16_t g_etc1_inverse_lookup[2 * 8 * 4][256]; // [diff/inten_table/selector][desired_color] + + // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. + // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) + static const uint16_t g_color8_to_etc_block_config_0_255[2][33] = + { + { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, + 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, + { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, + 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, + }; + + // Really only [254][11]. + static const uint16_t g_color8_to_etc_block_config_1_to_254[254][12] = + { + { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E, + 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, { + 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306, + 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112, + 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707, + 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, + 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605, + 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF + }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214, + 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A, + 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, { + 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, + 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D, + 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805, + 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, + 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, { + 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521, + 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523, + 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, + 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, + 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, { + 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, + 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, + 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, + 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917, + 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E, + 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725, + 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139, + 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, { + 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A, + 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437, + 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500, + 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, { + 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, + 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, + 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, { + 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F, + 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, + 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, { + 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, + 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434, + 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01, + 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21, + 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, + 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E, + 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, + 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, { + 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, { + 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, + 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33, + 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B, + 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, { + 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, + 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, + 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536, + 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A, + 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, + 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, { + 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF + }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820, + 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031, + 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, { + 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, + 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, + 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, + 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029, + 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832, + 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D, + 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, + 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF + }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, { + 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331, + 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, + 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513, + 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF + }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, { + 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, { + 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905, + 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, + 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D, + 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621, + 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18, + 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919, + 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625, + 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F, + 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936, + 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A, + 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, { + 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913, + 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, { + 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20, + 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C, + 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, { + 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06, + 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, { + 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26, + 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18, + 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, + 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929, + 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23, + 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF + }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, + 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E, + 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18, + 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01, + 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16, + 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B, + 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, + 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34, + 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11, + 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF }, + }; + + uint16_t etc_block::pack_color5(const color_rgba& color, bool scaled, uint32_t bias) + { + return pack_color5(color.r, color.g, color.b, scaled, bias); + } + + uint16_t etc_block::pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) + { + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 31U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = minimum(r, 31U); + g = minimum(g, 31U); + b = minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 10U)); + } + + color_rgba etc_block::unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color5 & 31U; + uint32_t g = (packed_color5 >> 5U) & 31U; + uint32_t r = (packed_color5 >> 10U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); + } + + void etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, bool scaled) + { + result = unpack_color5(packed_color5, scaled, 255); + } + + void etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled) + { + color_rgba c(unpack_color5(packed_color5, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + bool etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + color_rgba_i16 dc(unpack_delta3(packed_delta3)); + + int b = (packed_color5 & 31U) + dc.b; + int g = ((packed_color5 >> 5U) & 31U) + dc.g; + int r = ((packed_color5 >> 10U) & 31U) + dc.r; + + bool success = true; + if (static_cast(r | g | b) > 31U) + { + success = false; + r = clamp(r, 0, 31); + g = clamp(g, 0, 31); + b = clamp(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, minimum(alpha, 255U)); + return success; + } + + bool etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + color_rgba result; + const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); + r = result.r; + g = result.g; + b = result.b; + return success; + } + + uint16_t etc_block::pack_delta3(const color_rgba_i16& color) + { + return pack_delta3(color.r, color.g, color.b); + } + + uint16_t etc_block::pack_delta3(int r, int g, int b) + { + assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); + assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); + assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); + if (r < 0) r += 8; + if (g < 0) g += 8; + if (b < 0) b += 8; + return static_cast(b | (g << 3) | (r << 6)); + } + + color_rgba_i16 etc_block::unpack_delta3(uint16_t packed_delta3) + { + int r = (packed_delta3 >> 6) & 7; + int g = (packed_delta3 >> 3) & 7; + int b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + return color_rgba_i16(r, g, b, 255); + } + + void etc_block::unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + uint16_t etc_block::pack_color4(const color_rgba& color, bool scaled, uint32_t bias) + { + return pack_color4(color.r, color.g, color.b, scaled, bias); + } + + uint16_t etc_block::pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) + { + if (scaled) + { + r = (r * 15U + bias) / 255U; + g = (g * 15U + bias) / 255U; + b = (b * 15U + bias) / 255U; + } + + r = minimum(r, 15U); + g = minimum(g, 15U); + b = minimum(b, 15U); + + return static_cast(b | (g << 4U) | (r << 8U)); + } + + color_rgba etc_block::unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color4 & 15U; + uint32_t g = (packed_color4 >> 4U) & 15U; + uint32_t r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); + } + + void etc_block::unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled) + { + color_rgba c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + void etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + unpack_color5(r, g, b, packed_color5, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + } + + bool etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint16_t packed_delta3, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + + return success; + } + + void etc_block::get_abs_subblock_colors(color_rgba* pDst, uint16_t packed_color4, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + unpack_color4(r, g, b, packed_color4, true); + + const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + } + + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) + { + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint32_t table_index0 = block.get_inten_table(0); + const uint32_t table_index1 = block.get_inten_table(1); + + color_rgba subblock_colors0[4]; + color_rgba subblock_colors1[4]; + + if (diff_flag) + { + const uint16_t base_color5 = block.get_base5_color(); + const uint16_t delta_color3 = block.get_delta3_color(); + etc_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + + if (!etc_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) + return false; + } + else + { + const uint16_t base_color4_0 = block.get_base4_color(0); + etc_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + + const uint16_t base_color4_1 = block.get_base4_color(1); + etc_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + if (preserve_alpha) + { + if (flip_flag) + { + for (uint32_t y = 0; y < 2; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); + pDst += 4; + } + + for (uint32_t y = 2; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + } + else + { + if (flip_flag) + { + // 0000 + // 0000 + // 1111 + // 1111 + for (uint32_t y = 0; y < 2; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors0[block.get_selector(2, y)]; + pDst[3] = subblock_colors0[block.get_selector(3, y)]; + pDst += 4; + } + + for (uint32_t y = 2; y < 4; y++) + { + pDst[0] = subblock_colors1[block.get_selector(0, y)]; + pDst[1] = subblock_colors1[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + else + { + // 0011 + // 0011 + // 0011 + // 0011 + for (uint32_t y = 0; y < 4; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + } + + return true; + } + + inline int extend_6_to_8(uint32_t n) + { + return (n << 2) | (n >> 4); + } + + inline int extend_7_to_8(uint32_t n) + { + return (n << 1) | (n >> 6); + } + + inline int extend_4_to_8(uint32_t n) + { + return (n << 4) | n; + } + + uint64_t etc_block::evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index) const + { + color_rgba unpacked_block[16]; + + unpack_etc1(*this, unpacked_block); + + uint64_t total_error = 0; + + if (subblock_index < 0) + { + for (uint32_t i = 0; i < 16; i++) + total_error += color_distance(perceptual, pBlock_pixels[i], unpacked_block[i], false); + } + else + { + const bool flip_bit = get_flip_bit(); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; + + total_error += color_distance(perceptual, pBlock_pixels[idx], unpacked_block[idx], false); + } + } + + return total_error; + } + + void etc_block::get_subblock_pixels(color_rgba* pPixels, int subblock_index) const + { + if (subblock_index < 0) + unpack_etc1(*this, pPixels); + else + { + color_rgba unpacked_block[16]; + + unpack_etc1(*this, unpacked_block); + + const bool flip_bit = get_flip_bit(); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; + + pPixels[i] = unpacked_block[idx]; + } + } + } + + bool etc1_optimizer::compute() + { + assert(m_pResult->m_pSelectors); + + if ((m_pParams->m_pForce_selectors) || (m_pParams->m_pEval_solution_override)) + { + assert(m_pParams->m_quality >= cETCQualitySlow); + } + + const uint32_t n = m_pParams->m_num_src_pixels; + + if (m_pParams->m_cluster_fit) + { + if (m_pParams->m_quality == cETCQualityFast) + compute_internal_cluster_fit(4); + else if (m_pParams->m_quality == cETCQualityMedium) + compute_internal_cluster_fit(32); + else if (m_pParams->m_quality == cETCQualitySlow) + compute_internal_cluster_fit(64); + else + compute_internal_cluster_fit(BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE); + } + else + compute_internal_neighborhood(m_br, m_bg, m_bb); + + if (!m_best_solution.m_valid) + { + m_pResult->m_error = UINT32_MAX; + return false; + } + + const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; + +#ifdef BASISU_BUILD_DEBUG + if (m_pParams->m_pEval_solution_override == nullptr) + { + color_rgba block_colors[4]; + m_best_solution.m_coords.get_block_colors(block_colors); + + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + uint64_t actual_error = 0; + for (uint32_t i = 0; i < n; i++) + { + if ((m_pParams->m_perceptual) && (m_pParams->m_quality >= cETCQualitySlow)) + actual_error += color_distance(true, pSrc_pixels[i], block_colors[pSelectors[i]], false); + else + actual_error += color_distance(pSrc_pixels[i], block_colors[pSelectors[i]], false); + } + assert(actual_error == m_best_solution.m_error); + } +#endif + + m_pResult->m_error = m_best_solution.m_error; + + m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; + m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; + + m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; + memcpy(m_pResult->m_pSelectors, pSelectors, n); + m_pResult->m_n = n; + + return true; + } + + void etc1_optimizer::refine_solution(uint32_t max_refinement_trials) + { + // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. + // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: + // The goal is: + // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 + // Rearranging this: + // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 + // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 + // So what this means: + // optimal_block_color = avg_input - avg_inten_delta + // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. + // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. + // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. + + const uint32_t n = m_pParams->m_num_src_pixels; + + for (uint32_t refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) + { + const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; + const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); + for (uint32_t r = 0; r < n; r++) + { + const uint32_t s = *pSelectors++; + const int yd_temp = pInten_table[s]; + // Compute actual delta being applied to each pixel, taking into account clamping. + delta_sum_r += clamp(base_color.r + yd_temp, 0, 255) - base_color.r; + delta_sum_g += clamp(base_color.g + yd_temp, 0, 255) - base_color.g; + delta_sum_b += clamp(base_color.b + yd_temp, 0, 255) - base_color.b; + } + + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + break; + + const float avg_delta_r_f = static_cast(delta_sum_r) / n; + const float avg_delta_g_f = static_cast(delta_sum_g) / n; + const float avg_delta_b_f = static_cast(delta_sum_b) / n; + const int br1 = clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Refinement trial %u, avg_delta %f %f %f\n", refinement_trial, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); +#endif + + if (!evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution)) + break; + + } // refinement_trial + } + + void etc1_optimizer::compute_internal_neighborhood(int scan_r, int scan_g, int scan_b) + { + if (m_best_solution.m_error == 0) + return; + + const uint32_t n = m_pParams->m_num_src_pixels; + const int scan_delta_size = m_pParams->m_scan_delta_size; + + // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. + // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. + for (int zdi = 0; zdi < scan_delta_size; zdi++) + { + const int zd = m_pParams->m_pScan_deltas[zdi]; + const int mbb = scan_b + zd; + if (mbb < 0) continue; else if (mbb > m_limit) break; + + for (int ydi = 0; ydi < scan_delta_size; ydi++) + { + const int yd = m_pParams->m_pScan_deltas[ydi]; + const int mbg = scan_g + yd; + if (mbg < 0) continue; else if (mbg > m_limit) break; + + for (int xdi = 0; xdi < scan_delta_size; xdi++) + { + const int xd = m_pParams->m_pScan_deltas[xdi]; + const int mbr = scan_r + xd; + if (mbr < 0) continue; else if (mbr > m_limit) break; + + etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); + + if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) + continue; + + if (m_pParams->m_refinement) + { + refine_solution((m_pParams->m_quality == cETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2)); + } + + } // xdi + } // ydi + } // zdi + } + + void etc1_optimizer::compute_internal_cluster_fit(uint32_t total_perms_to_try) + { + if ((!m_best_solution.m_valid) || ((m_br != m_best_solution.m_coords.m_unscaled_color.r) || (m_bg != m_best_solution.m_coords.m_unscaled_color.g) || (m_bb != m_best_solution.m_coords.m_unscaled_color.b))) + { + evaluate_solution(etc1_solution_coordinates(m_br, m_bg, m_bb, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); + } + + if ((m_best_solution.m_error == 0) || (!m_best_solution.m_valid)) + return; + + for (uint32_t i = 0; i < total_perms_to_try; i++) + { + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + + const int *pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); + + const uint8_t *pNum_selectors = g_cluster_fit_order_tab[i].m_v; + + for (uint32_t q = 0; q < 4; q++) + { + const int yd_temp = pInten_table[q]; + + delta_sum_r += pNum_selectors[q] * (clamp(base_color.r + yd_temp, 0, 255) - base_color.r); + delta_sum_g += pNum_selectors[q] * (clamp(base_color.g + yd_temp, 0, 255) - base_color.g); + delta_sum_b += pNum_selectors[q] * (clamp(base_color.b + yd_temp, 0, 255) - base_color.b); + } + + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + continue; + + const float avg_delta_r_f = static_cast(delta_sum_r) / 8; + const float avg_delta_g_f = static_cast(delta_sum_g) / 8; + const float avg_delta_b_f = static_cast(delta_sum_b) / 8; + + const int br1 = clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Second refinement trial %u, avg_delta %f %f %f\n", i, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); +#endif + + evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); + + if (m_best_solution.m_error == 0) + break; + } + } + + void etc1_optimizer::init(const params& params, results& result) + { + m_pParams = ¶ms; + m_pResult = &result; + + const uint32_t n = m_pParams->m_num_src_pixels; + + m_selectors.resize(n); + m_best_selectors.resize(n); + m_temp_selectors.resize(n); + m_trial_solution.m_selectors.resize(n); + m_best_solution.m_selectors.resize(n); + + m_limit = m_pParams->m_use_color4 ? 15 : 31; + + vec3F avg_color(0.0f); + + m_luma.resize(n); + m_sorted_luma_indices.resize(n); + m_sorted_luma.resize(n); + + for (uint32_t i = 0; i < n; i++) + { + const color_rgba& c = m_pParams->m_pSrc_pixels[i]; + const vec3F fc(c.r, c.g, c.b); + + avg_color += fc; + + m_luma[i] = static_cast(c.r + c.g + c.b); + m_sorted_luma_indices[i] = i; + } + avg_color /= static_cast(n); + m_avg_color = avg_color; + + m_br = clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); + m_bg = clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); + m_bb = clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Avg block color: %u %u %u\n", m_br, m_bg, m_bb); +#endif + + if (m_pParams->m_quality <= cETCQualityMedium) + { + indirect_sort(n, &m_sorted_luma_indices[0], &m_luma[0]); + + m_pSorted_luma = &m_sorted_luma[0]; + m_pSorted_luma_indices = &m_sorted_luma_indices[0]; + + for (uint32_t i = 0; i < n; i++) + m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; + } + + m_best_solution.m_coords.clear(); + m_best_solution.m_valid = false; + m_best_solution.m_error = UINT64_MAX; + + m_solutions_tried.clear(); + } + + bool etc1_optimizer::evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + uint32_t k = coords.m_unscaled_color.r | (coords.m_unscaled_color.g << 8) | (coords.m_unscaled_color.b << 16); + if (!m_solutions_tried.insert(k).second) + return false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval solution: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); +#endif + + trial_solution.m_valid = false; + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; + const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; + const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); +#endif + return false; + } + } + + const color_rgba base_color(coords.get_scaled_color()); + + const uint32_t n = m_pParams->m_num_src_pixels; + assert(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = UINT64_MAX; + + const uint8_t *pSelectors_to_use = m_pParams->m_pForce_selectors; + + for (uint32_t inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + color_rgba block_colors[4]; + for (uint32_t s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); + } + + uint64_t total_error = 0; + + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + for (uint32_t c = 0; c < n; c++) + { + const color_rgba& src_pixel = *pSrc_pixels++; + + uint32_t best_selector_index = 0; + uint32_t best_error = 0; + + if (pSelectors_to_use) + { + best_selector_index = pSelectors_to_use[c]; + best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[best_selector_index], false); + } + else + { + best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[0], false); + + uint32_t trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[1], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 1; + } + + trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[2], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 2; + } + + trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[3], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 3; + } + } + + m_temp_selectors[c] = static_cast(best_selector_index); + + total_error += best_error; + if ((m_pParams->m_pEval_solution_override == nullptr) && (total_error >= trial_solution.m_error)) + break; + } + + if (m_pParams->m_pEval_solution_override) + { + if (!(*m_pParams->m_pEval_solution_override)(total_error, *m_pParams, block_colors, &m_temp_selectors[0], coords)) + return false; + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); +#endif + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + uint32_t k = coords.m_unscaled_color.r | (coords.m_unscaled_color.g << 8) | (coords.m_unscaled_color.b << 16); + if (!m_solutions_tried.insert(k).second) + return false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval solution fast: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); +#endif + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; + const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; + const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + trial_solution.m_valid = false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); +#endif + return false; + } + } + + const color_rgba base_color(coords.get_scaled_color()); + + const uint32_t n = m_pParams->m_num_src_pixels; + assert(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = UINT64_MAX; + + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + uint32_t block_inten[4]; + color_rgba block_colors[4]; + for (uint32_t s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + color_rgba block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); + block_colors[s] = block_color; + block_inten[s] = block_color.r + block_color.g + block_color.b; + } + + // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. + // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. + // 0 1 2 3 + // 01 12 23 + const uint32_t block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; + + uint64_t total_error = 0; + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint32_t min_error = iabs((int)block_inten[0] - (int)m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(block_colors[0], pSrc_pixels[c], false); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint32_t min_error = iabs((int)m_pSorted_luma[0] - (int)block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(block_colors[3], pSrc_pixels[c], false); + } + else + { + uint32_t cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done; + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); + total_error += color_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); + } + done: + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += color_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); + ++c; + } + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + if (!total_error) + break; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); +#endif + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + static uint32_t etc1_decode_value(uint32_t diff, uint32_t inten, uint32_t selector, uint32_t packed_c) + { + const uint32_t limit = diff ? 32 : 16; limit; + assert((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); + int c; + if (diff) + c = (packed_c >> 2) | (packed_c << 3); + else + c = packed_c | (packed_c << 4); + c += g_etc1_inten_tables[inten][selector]; + c = clamp(c, 0, 255); + return c; + } + + void pack_etc1_block_init() + { + for (uint32_t diff = 0; diff < 2; diff++) + { + const uint32_t limit = diff ? 32 : 16; + + for (uint32_t inten = 0; inten < 8; inten++) + { + for (uint32_t selector = 0; selector < 4; selector++) + { + const uint32_t inverse_table_index = diff + (inten << 1) + (selector << 4); + for (uint32_t color = 0; color < 256; color++) + { + uint32_t best_error = UINT32_MAX, best_packed_c = 0; + for (uint32_t packed_c = 0; packed_c < limit; packed_c++) + { + int v = etc1_decode_value(diff, inten, selector, packed_c); + uint32_t err = iabs(v - static_cast(color)); + if (err < best_error) + { + best_error = err; + best_packed_c = packed_c; + if (!best_error) + break; + } + } + assert(best_error <= 255); + g_etc1_inverse_lookup[inverse_table_index][color] = static_cast(best_packed_c | (best_error << 8)); + } + } + } + } + } + + // Packs solid color blocks efficiently using a set of small precomputed tables. + // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. + static uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor, basis_etc1_pack_params& pack_params, pack_etc1_block_context& context) + { + assert(g_etc1_inverse_lookup[0][255]); + + context, pack_params; + static uint32_t s_next_comp[4] = { 1, 2, 0, 1 }; + + uint32_t best_error = UINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint32_t i = 0; i < 3; i++) + { + const uint32_t c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = clamp(pColor[i] + delta, 0, 255); + + const uint16_t* pTable; + if (!c_plus_delta) + pTable = g_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_color8_to_etc_block_config_0_255[1]; + else + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint32_t x = *pTable++; + +#ifdef BASISU_BUILD_DEBUG + const uint32_t diff = x & 1; + const uint32_t inten = (x >> 1) & 7; + const uint32_t selector = (x >> 4) & 3; + const uint32_t p0 = (x >> 8) & 255; + assert(etc1_decode_value(diff, inten, selector, p0) == (uint32_t)c_plus_delta); +#endif + + const uint16_t* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16_t p1 = pInverse_table[c1]; + uint16_t p2 = pInverse_table[c2]; + const uint32_t trial_error = square(c_plus_delta - pColor[i]) + square(p1 >> 8) + square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } + found_perfect_match: + + const uint32_t diff = best_x & 1; + const uint32_t inten = (best_x >> 1) & 7; + + block.m_bytes[3] = static_cast(((inten | (inten << 3)) << 2) | (diff << 1)); + + const uint32_t etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; + *reinterpret_cast(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; + *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; + + const uint32_t best_packed_c0 = (best_x >> 8) & 255; + if (diff) + { + block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 << 3); + } + else + { + block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); + block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); + } + + return best_error; + } + + static uint32_t pack_etc1_block_solid_color_constrained( + etc1_optimizer::results& results, + uint32_t num_colors, const uint8_t *pColor, + basis_etc1_pack_params& pack_params, + pack_etc1_block_context& context, + bool use_diff, + const color_rgba* pBase_color5_unscaled) + { + assert(g_etc1_inverse_lookup[0][255]); + + context, pack_params; + static uint32_t s_next_comp[4] = { 1, 2, 0, 1 }; + + uint32_t best_error = UINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint32_t i = 0; i < 3; i++) + { + const uint32_t c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = clamp(pColor[i] + delta, 0, 255); + + const uint16_t* pTable; + if (!c_plus_delta) + pTable = g_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_color8_to_etc_block_config_0_255[1]; + else + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint32_t x = *pTable++; + const uint32_t diff = x & 1; + if (static_cast(use_diff) != diff) + { + if (*pTable == 0xFFFF) + break; + continue; + } + + if ((diff) && (pBase_color5_unscaled)) + { + const int p0 = (x >> 8) & 255; + int delta_temp = p0 - static_cast(pBase_color5_unscaled->m_comps[i]); + if ((delta_temp < cETC1ColorDeltaMin) || (delta_temp > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + break; + continue; + } + } + +#ifdef BASISU_BUILD_DEBUG + { + const uint32_t inten = (x >> 1) & 7; + const uint32_t selector = (x >> 4) & 3; + const uint32_t p0 = (x >> 8) & 255; + assert(etc1_decode_value(diff, inten, selector, p0) == (uint32_t)c_plus_delta); + } +#endif + + const uint16_t* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16_t p1 = pInverse_table[c1]; + uint16_t p2 = pInverse_table[c2]; + + if ((diff) && (pBase_color5_unscaled)) + { + int delta1 = (p1 & 0xFF) - static_cast(pBase_color5_unscaled->m_comps[s_next_comp[i]]); + int delta2 = (p2 & 0xFF) - static_cast(pBase_color5_unscaled->m_comps[s_next_comp[i + 1]]); + if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + break; + continue; + } + } + + const uint32_t trial_error = square(c_plus_delta - pColor[i]) + square(p1 >> 8) + square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } + found_perfect_match: + + if (best_error == UINT32_MAX) + return best_error; + + best_error *= num_colors; + + results.m_n = num_colors; + results.m_block_color4 = !(best_x & 1); + results.m_block_inten_table = (best_x >> 1) & 7; + memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors); + + const uint32_t best_packed_c0 = (best_x >> 8) & 255; + results.m_block_color_unscaled[best_i] = static_cast(best_packed_c0); + results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast(best_packed_c1); + results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast(best_packed_c2); + results.m_error = best_error; + + return best_error; + } + + static bool invoke_optimizer(etc1_optimizer::results &results, pack_etc1_block_context &context, etc1_optimizer::params ¶ms, const basis_etc1_pack_params &pack_params) + { + if (!pack_params.m_cluster_fit) + { + if (params.m_quality >= cETCQualitySlow) + { + static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 }; + params.m_scan_delta_size = BASISU_ARRAY_SIZE(s_scan_delta_0_to_4); + params.m_pScan_deltas = s_scan_delta_0_to_4; + } + else if (params.m_quality == cETCQualityMedium) + { + static const int s_scan_delta_0_to_1[] = { -1, 0, 1 }; + params.m_scan_delta_size = BASISU_ARRAY_SIZE(s_scan_delta_0_to_1); + params.m_pScan_deltas = s_scan_delta_0_to_1; + } + else + { + static const int s_scan_delta_0[] = { 0 }; + params.m_scan_delta_size = BASISU_ARRAY_SIZE(s_scan_delta_0); + params.m_pScan_deltas = s_scan_delta_0; + } + } + + context.m_optimizer.init(params, results); + + bool optimizer_succeeded = context.m_optimizer.compute(); + +#if BASISU_DEBUG_ETC_ENCODER + printf("Optimizer succeeded: %u, First optimization pass error: %I64u\n", optimizer_succeeded, results); +#endif + + if (!pack_params.m_cluster_fit) + { + // Fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. + const uint32_t refinement_error_thresh0 = 3000 * (results.m_n / 8); + const uint32_t refinement_error_thresh1 = 6000 * (results.m_n / 8); + if ((params.m_quality >= cETCQualityMedium) && ((results.m_error > refinement_error_thresh0) || (!optimizer_succeeded))) + { + if (params.m_quality == cETCQualityMedium) + { + static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 }; + params.m_scan_delta_size = BASISU_ARRAY_SIZE(s_scan_delta_2_to_3); + params.m_pScan_deltas = s_scan_delta_2_to_3; + } + else + { + static const int s_scan_delta_5_to_5[] = { -5, 5 }; + static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 }; + + if (results.m_error > refinement_error_thresh1) + { + params.m_scan_delta_size = BASISU_ARRAY_SIZE(s_scan_delta_5_to_8); + params.m_pScan_deltas = s_scan_delta_5_to_8; + } + else + { + params.m_scan_delta_size = BASISU_ARRAY_SIZE(s_scan_delta_5_to_5); + params.m_pScan_deltas = s_scan_delta_5_to_5; + } + } + + if (context.m_optimizer.compute()) + optimizer_succeeded = true; + +#if BASISU_DEBUG_ETC_ENCODER + printf("Second optimization pass error: %I64u\n", results.m_error); +#endif + } + } + + return optimizer_succeeded; + } + + static vec3F color_quad_u8_to_f(const color_rgba& c) + { + return vec3F(static_cast(c.r), static_cast(c.g), static_cast(c.b)); + } + + // Returns distance from color c to the gray line going through color d. + static inline float gray_distance2(const color_rgba& c, const vec3F& d) + { + float gray_dist = static_cast(((c.r - d[0]) + (c.g - d[1]) + (c.b - d[2]))) * (1.0f / 3.0f); + + // Project onto the gray line + float gray_point_r = clamp(d[0] + gray_dist, 0.0f, 255.0f); + float gray_point_g = clamp(d[1] + gray_dist, 0.0f, 255.0f); + float gray_point_b = clamp(d[2] + gray_dist, 0.0f, 255.0f); + + // Compute distance^2 + float dist_to_gray_point_r = static_cast(c.r) - gray_point_r; + float dist_to_gray_point_g = static_cast(c.g) - gray_point_g; + float dist_to_gray_point_b = static_cast(c.b) - gray_point_b; + + return dist_to_gray_point_r * dist_to_gray_point_r + dist_to_gray_point_g * dist_to_gray_point_g + dist_to_gray_point_b * dist_to_gray_point_b; + } + + bool pack_etc1_estimate_flipped(const color_rgba* pSrc_pixels) + { + vec3F sums[2][2]; + +#define GET_XY(x, y) color_quad_u8_to_f(pSrc_pixels[(x) + ((y) * 4)]) + + sums[0][0] = GET_XY(0, 0) + GET_XY(0, 1) + GET_XY(1, 0) + GET_XY(1, 1); + sums[1][0] = GET_XY(2, 0) + GET_XY(2, 1) + GET_XY(3, 0) + GET_XY(3, 1); + sums[0][1] = GET_XY(0, 2) + GET_XY(0, 3) + GET_XY(1, 2) + GET_XY(1, 3); + sums[1][1] = GET_XY(2, 2) + GET_XY(2, 3) + GET_XY(3, 2) + GET_XY(3, 3); + + vec3F upper_avg((sums[0][0] + sums[1][0]) * (1.0f / 8.0f)); + vec3F lower_avg((sums[0][1] + sums[1][1]) * (1.0f / 8.0f)); + vec3F left_avg((sums[0][0] + sums[0][1]) * (1.0f / 8.0f)); + vec3F right_avg((sums[1][0] + sums[1][1]) * (1.0f / 8.0f)); + +#undef GET_XY +#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a) + + float upper_gray_dist = 0.0f, lower_gray_dist = 0.0f, left_gray_dist = 0.0f, right_gray_dist = 0.0f; + for (uint32_t i = 0; i < 4; i++) + { + for (uint32_t j = 0; j < 2; j++) + { + upper_gray_dist += GET_XY(i, j, upper_avg); + lower_gray_dist += GET_XY(i, 2 + j, lower_avg); + left_gray_dist += GET_XY(j, i, left_avg); + right_gray_dist += GET_XY(2 + j, i, right_avg); + } + } + +#undef GET_XY + + float upper_lower_sum = upper_gray_dist + lower_gray_dist; + float left_right_sum = left_gray_dist + right_gray_dist; + + return upper_lower_sum < left_right_sum; + } + + uint64_t pack_etc1_block(etc_block& dst_block, const color_rgba* pSrc_pixels, basis_etc1_pack_params& pack_params, pack_etc1_block_context& context, const uint8_t* pForce_selectors) + { +#if BASISU_DEBUG_ETC_ENCODER + printf("** Start of block\n"); +#endif + + color_rgba src_pixel0(pSrc_pixels[0]); + + if (!pForce_selectors) + { + int r; + for (r = 15; r >= 1; --r) + if ((pSrc_pixels[r].r != src_pixel0.r) || (pSrc_pixels[r].g != src_pixel0.g) || (pSrc_pixels[r].b != src_pixel0.b)) + break; + if (!r) + { +#if BASISU_DEBUG_ETC_ENCODER + printf("** Block is a single solid color\n"); +#endif + uint64_t err = 16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params, context); + dst_block.set_flip_bit(true); + return err; + } + } + + uint64_t best_error = UINT64_MAX; + uint32_t best_flip = false, best_use_color4 = false, best_is_subset = false; + + uint8_t best_selectors[2][8]; + etc1_optimizer::results best_results[2]; + for (uint32_t i = 0; i < 2; i++) + { + best_results[i].m_n = 8; + best_results[i].m_pSelectors = best_selectors[i]; + best_results[i].m_block_color_unscaled.set(0, 0, 0, 255); + best_results[i].m_block_inten_table = 0; + best_results[i].m_error = 0; + best_results[i].m_block_color4 = false; + } + + uint8_t best_subset_selectors[16]; + etc1_optimizer::results best_subset_results; + best_subset_results.m_n = 16; + best_subset_results.m_pSelectors = best_subset_selectors; + + if (((pForce_selectors) && (pack_params.m_quality >= cETCQualitySlow)) || (pack_params.m_force_etc1s)) + { + // TODO: This may only be useful in force selector mode. + uint8_t subset_selectors[16]; + + etc1_optimizer::params subset_params(pack_params); + subset_params.m_num_src_pixels = 16; + subset_params.m_pSrc_pixels = pSrc_pixels; + subset_params.m_pForce_selectors = pForce_selectors; + + etc1_optimizer::results subset_results; + subset_results.m_n = 16; + subset_results.m_pSelectors = subset_selectors; + + for (uint32_t use_color4 = 0; use_color4 < (uint32_t)(pack_params.m_use_color4 ? 2 : 1); use_color4++) + { + subset_params.m_use_color4 = (use_color4 != 0); + + if (!invoke_optimizer(subset_results, context, subset_params, subset_params)) + break; + + if (subset_results.m_error < best_error) + { + best_error = subset_results.m_error; + best_flip = true; + best_use_color4 = use_color4; + best_subset_results = subset_results; + best_results[0] = subset_results; + best_results[1] = subset_results; + best_is_subset = true; + } + } + } + + if ((best_error > 0) && (!pack_params.m_force_etc1s)) + { + uint8_t selectors[3][8]; + etc1_optimizer::results results[3]; + + for (uint32_t i = 0; i < 3; i++) + { + results[i].m_n = 8; + results[i].m_pSelectors = selectors[i]; + } + + color_rgba subblock_pixels[8]; + + etc1_optimizer::params params(pack_params); + params.m_num_src_pixels = 8; + params.m_pSrc_pixels = subblock_pixels; + + uint8_t forced_selectors[8]; + params.m_pForce_selectors = pForce_selectors ? forced_selectors : nullptr; + + uint32_t first_flip = 0; + uint32_t last_flip = 2; + if (pack_params.m_quality < cETCQualitySlow) + { + const bool should_flip = pack_etc1_estimate_flipped(pSrc_pixels); + + first_flip = should_flip; + last_flip = first_flip + 1; + } + + for (uint32_t flip = first_flip; flip < last_flip; flip++) + { + for (uint32_t use_color4 = 0; use_color4 < (uint32_t)(pack_params.m_use_color4 ? 2 : 1); use_color4++) + { + uint64_t trial_error = 0; + + uint32_t subblock; + for (subblock = 0; subblock < 2; subblock++) + { +#if BASISU_DEBUG_ETC_ENCODER + printf("** Flip: %u, Color4: %u, Subblock: %u, Best error so far: %I64u\n", flip, use_color4, subblock, best_error); +#endif + + if (flip) + { + memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_rgba) * 8); + + if (pForce_selectors) + memcpy(forced_selectors, pForce_selectors + subblock * 8, 8); + } + else + { + const color_rgba* pSrc_col = pSrc_pixels + subblock * 2; + subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12]; + subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13]; + + if (pForce_selectors) + { + const uint8_t* pSrc_sel = pForce_selectors + subblock * 2; + forced_selectors[0] = pSrc_sel[0]; forced_selectors[1] = pSrc_sel[4]; forced_selectors[2] = pSrc_sel[8]; forced_selectors[3] = pSrc_sel[12]; + forced_selectors[4] = pSrc_sel[1]; forced_selectors[5] = pSrc_sel[5]; forced_selectors[6] = pSrc_sel[9]; forced_selectors[7] = pSrc_sel[13]; + } + } + + results[2].m_error = UINT64_MAX; + if ((params.m_quality >= cETCQualityMedium) && ((subblock) || (use_color4)) && (pForce_selectors == nullptr)) + { + color_rgba subblock_pixel0(subblock_pixels[0]); + int r; + for (r = 7; r >= 1; --r) + if ((subblock_pixels[r].r != subblock_pixel0.r) || (subblock_pixels[r].g != subblock_pixel0.g) || (subblock_pixels[r].b != subblock_pixel0.b)) + break; + if (!r) + { + pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixel0.r, pack_params, context, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : nullptr); + } + } + + params.m_use_color4 = (use_color4 != 0); + params.m_constrain_against_base_color5 = false; + + if ((!use_color4) && (subblock)) + { + params.m_constrain_against_base_color5 = true; + params.m_base_color5 = results[0].m_block_color_unscaled; + } + + if (!invoke_optimizer(results[subblock], context, params, pack_params)) + { +#if BASISU_DEBUG_ETC_ENCODER + printf("Optimizer failed\n"); +#endif + break; + +#if 0 + if (!pack_params.m_cluster_fit) + break; + + // TODO: Test this, is it worth it? + basis_etc1_pack_params temp_pack_params(pack_params); + + temp_pack_params.m_cluster_fit = false; + + if (!invoke_optimizer(results[subblock], context, params, temp_pack_params)) + { +#if BASISU_DEBUG_ETC_ENCODER + printf("Optimizer failed again\n"); +#endif + break; + } + else + { +#if BASISU_DEBUG_ETC_ENCODER + printf("Optimizer succeeded once cluster fit was disabled\n"); +#endif + } +#endif + + } + +#if BASISU_DEBUG_ETC_ENCODER + printf("First optimization pass error: %I64u\n", results[subblock].m_error); +#endif + + if (results[2].m_error < results[subblock].m_error) + { +#if BASISU_DEBUG_ETC_ENCODER + printf("Switching to solid block results for sublock (%I64u vs. %I64u error)\n", results[2].m_error, results[subblock].m_error); +#endif + results[subblock] = results[2]; + } + + trial_error += results[subblock].m_error; + if (trial_error >= best_error) + { +#if BASISU_DEBUG_ETC_ENCODER + printf("Early out, trial error %I64u vs. current best error %I64u\n", trial_error, best_error); +#endif + break; + } + } // subblock + + if (subblock < 2) + continue; + + if ((pack_params.m_flip_bias > 0.0f) && (flip) && (pack_params.m_quality >= cETCQualitySlow)) + { + uint64_t nonflip_error = best_error; + uint64_t flip_error = trial_error; + + if (!(flip_error < nonflip_error * pack_params.m_flip_bias)) + continue; + } + + best_error = trial_error; + best_results[0] = results[0]; + best_results[1] = results[1]; + best_flip = flip; + best_use_color4 = use_color4; + best_is_subset = false; + + } // use_color4 + + } // flip + } + +#if BASISU_DEBUG_ETC_ENCODER + printf("Best error: %I64u\n", best_error); +#endif + + int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; + int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; + int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; + if (!best_use_color4) + { + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + assert(0); +#if 0 + printf("%i %i %i, %i %i %i\n", + best_results[0].m_block_color_unscaled.r, + best_results[0].m_block_color_unscaled.g, + best_results[0].m_block_color_unscaled.b, + best_results[1].m_block_color_unscaled.r, + best_results[1].m_block_color_unscaled.g, + best_results[1].m_block_color_unscaled.b); +#endif + // Shouldn't ever happen! + } + } + + if (best_use_color4) + { + dst_block.m_bytes[0] = static_cast(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); + dst_block.m_bytes[1] = static_cast(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); + dst_block.m_bytes[2] = static_cast(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); + } + else + { + if (dr < 0) dr += 8; + if (dg < 0) dg += 8; + if (db < 0) db += 8; + dst_block.m_bytes[0] = static_cast((best_results[0].m_block_color_unscaled.r << 3) | dr); + dst_block.m_bytes[1] = static_cast((best_results[0].m_block_color_unscaled.g << 3) | dg); + dst_block.m_bytes[2] = static_cast((best_results[0].m_block_color_unscaled.b << 3) | db); + } + + dst_block.m_bytes[3] = static_cast((best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip); + + uint32_t selector0 = 0, selector1 = 0; + if (best_is_subset) + { + const uint8_t* pSelectors0 = best_subset_results.m_pSelectors; + const uint8_t* pSelectors1 = best_subset_results.m_pSelectors + 8; + for (int x = 3; x >= 0; --x) + { + uint32_t b; + b = g_selector_index_to_etc1[pSelectors1[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors1[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + } + } + else if (best_flip) + { + // flipped: + // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + // + // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + const uint8_t* pSelectors0 = best_results[0].m_pSelectors; + const uint8_t* pSelectors1 = best_results[1].m_pSelectors; + for (int x = 3; x >= 0; --x) + { + uint32_t b; + b = g_selector_index_to_etc1[pSelectors1[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors1[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + } + } + else + { + // non-flipped: + // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + // + // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + for (int subblock = 1; subblock >= 0; --subblock) + { + const uint8_t* pSelectors = best_results[subblock].m_pSelectors + 4; + for (uint32_t i = 0; i < 2; i++) + { + uint32_t b; + b = g_selector_index_to_etc1[pSelectors[3]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[2]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[1]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[0]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + pSelectors -= 4; + } + } + } + + dst_block.m_bytes[4] = static_cast(selector1 >> 8); + dst_block.m_bytes[5] = static_cast(selector1 & 0xFF); + dst_block.m_bytes[6] = static_cast(selector0 >> 8); + dst_block.m_bytes[7] = static_cast(selector0 & 0xFF); + + return best_error; + } + +} // namespace basisu diff --git a/basisu_etc.h b/basisu_etc.h new file mode 100644 index 0000000..f097fd8 --- /dev/null +++ b/basisu_etc.h @@ -0,0 +1,984 @@ +// basis_etc.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "transcoder/basisu.h" +#include "basisu_enc.h" +#include + +namespace basisu +{ + enum etc_constants + { + cETC1BytesPerBlock = 8U, + + cETC1SelectorBits = 2U, + cETC1SelectorValues = 1U << cETC1SelectorBits, + cETC1SelectorMask = cETC1SelectorValues - 1U, + + cETC1BlockShift = 2U, + cETC1BlockSize = 1U << cETC1BlockShift, + + cETC1LSBSelectorIndicesBitOffset = 0, + cETC1MSBSelectorIndicesBitOffset = 16, + + cETC1FlipBitOffset = 32, + cETC1DiffBitOffset = 33, + + cETC1IntenModifierNumBits = 3, + cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, + cETC1RightIntenModifierTableBitOffset = 34, + cETC1LeftIntenModifierTableBitOffset = 37, + + // Base+Delta encoding (5 bit bases, 3 bit delta) + cETC1BaseColorCompNumBits = 5, + cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, + + cETC1DeltaColorCompNumBits = 3, + cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, + cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, + + cETC1BaseColor5RBitOffset = 59, + cETC1BaseColor5GBitOffset = 51, + cETC1BaseColor5BBitOffset = 43, + + cETC1DeltaColor3RBitOffset = 56, + cETC1DeltaColor3GBitOffset = 48, + cETC1DeltaColor3BBitOffset = 40, + + // Absolute (non-delta) encoding (two 4-bit per component bases) + cETC1AbsColorCompNumBits = 4, + cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, + + cETC1AbsColor4R1BitOffset = 60, + cETC1AbsColor4G1BitOffset = 52, + cETC1AbsColor4B1BitOffset = 44, + + cETC1AbsColor4R2BitOffset = 56, + cETC1AbsColor4G2BitOffset = 48, + cETC1AbsColor4B2BitOffset = 40, + + cETC1ColorDeltaMin = -4, + cETC1ColorDeltaMax = 3, + + // Delta3: + // 0 1 2 3 4 5 6 7 + // 000 001 010 011 100 101 110 111 + // 0 1 2 3 -4 -3 -2 -1 + }; + + extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; + extern const uint8_t g_etc1_to_selector_index[cETC1SelectorValues]; + extern const uint8_t g_selector_index_to_etc1[cETC1SelectorValues]; + + struct etc_coord2 + { + uint8_t m_x, m_y; + }; + extern const etc_coord2 g_etc1_pixel_coords[2][2][8]; // [flipped][subblock][subblock_pixel] + extern const uint32_t g_etc1_pixel_indices[2][2][8]; // [flipped][subblock][subblock_pixel] + + struct etc_block + { + // big endian uint64: + // bit ofs: 56 48 40 32 24 16 8 0 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + union + { + uint64_t m_uint64; + + uint8_t m_bytes[8]; + }; + + inline void clear() + { + assert(sizeof(*this) == 8); + clear_obj(*this); + } + + inline uint64_t get_all_bits() const + { + return read_be64(&m_uint64); + } + + inline uint32_t get_general_bits(uint32_t ofs, uint32_t num) const + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + return (read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); + } + + inline void set_general_bits(uint32_t ofs, uint32_t num, uint32_t bits) + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + + uint64_t x = read_be64(&m_uint64); + uint64_t msk = ((1ULL << static_cast(num)) - 1ULL) << static_cast(ofs); + x &= ~msk; + x |= (static_cast(bits) << static_cast(ofs)); + write_be64(&m_uint64, x); + } + + inline uint32_t get_byte_bits(uint32_t ofs, uint32_t num) const + { + assert((ofs + num) <= 64U); + assert(num && (num <= 8U)); + assert((ofs >> 3) == ((ofs + num - 1) >> 3)); + const uint32_t byte_ofs = 7 - (ofs >> 3); + const uint32_t byte_bit_ofs = ofs & 7; + return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); + } + + inline void set_byte_bits(uint32_t ofs, uint32_t num, uint32_t bits) + { + assert((ofs + num) <= 64U); + assert(num && (num < 32U)); + assert((ofs >> 3) == ((ofs + num - 1) >> 3)); + assert(bits < (1U << num)); + const uint32_t byte_ofs = 7 - (ofs >> 3); + const uint32_t byte_bit_ofs = ofs & 7; + const uint32_t mask = (1 << num) - 1; + m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); + m_bytes[byte_ofs] |= (bits << byte_bit_ofs); + } + + // false = left/right subblocks + // true = upper/lower subblocks + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline void set_flip_bit(bool flip) + { + m_bytes[3] &= ~1; + m_bytes[3] |= static_cast(flip); + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + + inline void set_diff_bit(bool diff) + { + m_bytes[3] &= ~2; + m_bytes[3] |= (static_cast(diff) << 1); + } + + // Returns intensity modifier table (0-7) used by subblock subblock_id. + // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) + inline uint32_t get_inten_table(uint32_t subblock_id) const + { + assert(subblock_id < 2); + const uint32_t ofs = subblock_id ? 2 : 5; + return (m_bytes[3] >> ofs) & 7; + } + + // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) + inline void set_inten_table(uint32_t subblock_id, uint32_t t) + { + assert(subblock_id < 2); + assert(t < 8); + const uint32_t ofs = subblock_id ? 2 : 5; + m_bytes[3] &= ~(7 << ofs); + m_bytes[3] |= (t << ofs); + } + + inline void set_both_inten_tables(uint32_t t) + { + set_inten_table(0, t); + set_inten_table(1, t); + } + + inline bool is_etc1s() const + { + if (get_inten_table(0) != get_inten_table(1)) + return false; + + if (get_diff_bit()) + { + if (get_delta3_color() != 0) + return false; + } + else + { + if (get_base4_color(0) != get_base4_color(1)) + return false; + } + + return true; + } + + // Returned encoded selector value ranges from 0-3 (this is NOT a direct index into g_etc1_inten_tables, see get_selector()) + inline uint32_t get_raw_selector(uint32_t x, uint32_t y) const + { + assert((x | y) < 4); + + const uint32_t bit_index = x * 4 + y; + const uint32_t byte_bit_ofs = bit_index & 7; + const uint8_t *p = &m_bytes[7 - (bit_index >> 3)]; + const uint32_t lsb = (p[0] >> byte_bit_ofs) & 1; + const uint32_t msb = (p[-2] >> byte_bit_ofs) & 1; + const uint32_t val = lsb | (msb << 1); + + return val; + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint32_t get_selector(uint32_t x, uint32_t y) const + { + return g_etc1_to_selector_index[get_raw_selector(x, y)]; + } + + // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) + { + assert((x | y | val) < 4); + const uint32_t bit_index = x * 4 + y; + + uint8_t *p = &m_bytes[7 - (bit_index >> 3)]; + + const uint32_t byte_bit_ofs = bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t etc1_val = g_selector_index_to_etc1[val]; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + inline uint32_t get_raw_selector_bits() const + { + return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24); + } + + inline void set_raw_selector_bits(uint32_t bits) + { + m_bytes[4] = static_cast(bits); + m_bytes[5] = static_cast(bits >> 8); + m_bytes[6] = static_cast(bits >> 16); + m_bytes[7] = static_cast(bits >> 24); + } + + inline void set_raw_selector_bits(uint8_t byte0, uint8_t byte1, uint8_t byte2, uint8_t byte3) + { + m_bytes[4] = byte0; + m_bytes[5] = byte1; + m_bytes[6] = byte2; + m_bytes[7] = byte3; + } + + inline void set_base4_color(uint32_t idx, uint16_t c) + { + if (idx) + { + set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); + } + else + { + set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); + } + } + + inline uint16_t get_base4_color(uint32_t idx) const + { + uint32_t r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast(b | (g << 4U) | (r << 8U)); + } + + inline void set_base5_color(uint16_t c) + { + set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); + set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); + set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); + } + + inline uint16_t get_base5_color() const + { + const uint32_t r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); + const uint32_t g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); + const uint32_t b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); + return static_cast(b | (g << 5U) | (r << 10U)); + } + + void set_delta3_color(uint16_t c) + { + set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); + set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); + set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); + } + + inline uint16_t get_delta3_color() const + { + const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast(b | (g << 3U) | (r << 6U)); + } + + uint64_t determine_selectors(const color_rgba* pSource_pixels, bool perceptual, uint32_t begin_subblock = 0, uint32_t end_subblock = 2) + { + uint64_t total_error = 0; + + for (uint32_t subblock = begin_subblock; subblock < end_subblock; subblock++) + { + color_rgba block_colors[4]; + get_block_colors(block_colors, subblock); + + if (get_flip_bit()) + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t best_selector = 0; + uint64_t best_error = UINT64_MAX; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[x + (subblock * 2 + y) * 4], false); + if (err < best_error) + { + best_error = err; + best_selector = s; + } + } + + set_selector(x, subblock * 2 + y, best_selector); + + total_error += best_error; + } + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + uint32_t best_selector = 0; + uint64_t best_error = UINT64_MAX; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(perceptual, block_colors[s], pSource_pixels[(subblock * 2) + x + y * 4], false); + if (err < best_error) + { + best_error = err; + best_selector = s; + } + } + + set_selector(subblock * 2 + x, y, best_selector); + + total_error += best_error; + } + } + } + } + + return total_error; + } + + color_rgba get_block_color(uint32_t subblock_index, bool scaled) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), scaled); + else + unpack_color5(b, get_base5_color(), scaled); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), scaled); + } + + return b; + } + + uint32_t get_subblock_index(uint32_t x, uint32_t y) const + { + if (get_flip_bit()) + return y >= 2; + else + return x >= 2; + } + + bool get_block_colors(color_rgba* pBlock_colors, uint32_t subblock_index) const + { + color_rgba b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + bool dc = false; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0], dc), clamp255(b.g + pInten_table[0], dc), clamp255(b.b + pInten_table[0], dc), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1], dc), clamp255(b.g + pInten_table[1], dc), clamp255(b.b + pInten_table[1], dc), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2], dc), clamp255(b.g + pInten_table[2], dc), clamp255(b.b + pInten_table[2], dc), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3], dc), clamp255(b.g + pInten_table[3], dc), clamp255(b.b + pInten_table[3], dc), 255); + + return dc; + } + + static void get_block_colors5(color_rgba *pBlock_colors, const color_rgba &base_color5, uint32_t inten_table, bool scaled = false) + { + color_rgba b(base_color5); + + if (!scaled) + { + b.r = (b.r << 3) | (b.r >> 2); + b.g = (b.g << 3) | (b.g >> 2); + b.b = (b.b << 3) | (b.b >> 2); + } + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + static void get_block_colors4(color_rgba *pBlock_colors, const color_rgba &base_color4, uint32_t inten_table, bool scaled = false) + { + color_rgba b(base_color4); + + if (!scaled) + { + b.r = (b.r << 4) | b.r; + b.g = (b.g << 4) | b.g; + b.b = (b.b << 4) | b.b; + } + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + + uint64_t evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index = -1) const; + void get_subblock_pixels(color_rgba* pPixels, int subblock_index = -1) const; + + void set_block_color4(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(false); + + set_base4_color(0, pack_color4(c0_unscaled, false)); + set_base4_color(1, pack_color4(c1_unscaled, false)); + } + + void set_block_color5(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + set_delta3_color(pack_delta3(dr, dg, db)); + } + + bool set_block_color5_check(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + if (((dr < cETC1ColorDeltaMin) || (dr > cETC1ColorDeltaMax)) || + ((dg < cETC1ColorDeltaMin) || (dg > cETC1ColorDeltaMax)) || + ((db < cETC1ColorDeltaMin) || (db > cETC1ColorDeltaMax))) + return false; + + set_delta3_color(pack_delta3(dr, dg, db)); + + return true; + } + + color_rgba get_selector_color(uint32_t x, uint32_t y, uint32_t s) const + { + color_rgba block_colors[4]; + + get_block_colors(block_colors, get_subblock_index(x, y)); + + return block_colors[s]; + } + + // Base color 5 + static uint16_t pack_color5(const color_rgba& color, bool scaled, uint32_t bias = 127U); + static uint16_t pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U); + + static color_rgba unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha = 255U); + static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color, bool scaled); + static void unpack_color5(color_rgba& result, uint16_t packed_color5, bool scaled); + + static bool unpack_color5(color_rgba& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha = 255U); + static bool unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha = 255U); + + // Delta color 3 + // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static uint16_t pack_delta3(const color_rgba_i16& color); + static uint16_t pack_delta3(int r, int g, int b); + + // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static color_rgba_i16 unpack_delta3(uint16_t packed_delta3); + static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3); + + static bool try_pack_color5_delta3(const color_rgba *pColor5_unscaled) + { + int dr = pColor5_unscaled[1].r - pColor5_unscaled[0].r; + int dg = pColor5_unscaled[1].g - pColor5_unscaled[0].g; + int db = pColor5_unscaled[1].b - pColor5_unscaled[0].b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + return false; + + return true; + } + + // Abs color 4 + static uint16_t pack_color4(const color_rgba& color, bool scaled, uint32_t bias = 127U); + static uint16_t pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias = 127U); + + static color_rgba unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha = 255U); + static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled); + + // subblock colors + static void get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint32_t table_idx); + static bool get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint16_t packed_delta3, uint32_t table_idx); + static void get_abs_subblock_colors(color_rgba* pDst, uint16_t packed_color4, uint32_t table_idx); + + static inline void unscaled_to_scaled_color(color_rgba& dst, const color_rgba& src, bool color4) + { + if (color4) + { + dst.r = src.r | (src.r << 4); + dst.g = src.g | (src.g << 4); + dst.b = src.b | (src.b << 4); + } + else + { + dst.r = (src.r >> 2) | (src.r << 3); + dst.g = (src.g >> 2) | (src.g << 3); + dst.b = (src.b >> 2) | (src.b << 3); + } + dst.a = src.a; + } + + private: + static uint8_t clamp255(int x, bool &did_clamp) + { + if (x < 0) + { + did_clamp = true; + return 0; + } + else if (x > 255) + { + did_clamp = true; + return 255; + } + + return static_cast(x); + } + + static uint8_t clamp255(int x) + { + if (x < 0) + return 0; + else if (x > 255) + return 255; + + return static_cast(x); + } + }; + + typedef std::vector etc_block_vec; + + // Returns false if the unpack fails (could be bogus data or ETC2) + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha = false); + + enum basis_etc_quality + { + cETCQualityFast, + cETCQualityMedium, + cETCQualitySlow, + cETCQualityUber, + cETCQualityTotal, + }; + + struct basis_etc1_pack_params + { + basis_etc_quality m_quality; + bool m_perceptual; + bool m_cluster_fit; + bool m_force_etc1s; + bool m_use_color4; + float m_flip_bias; + + inline basis_etc1_pack_params() + { + clear(); + } + + void clear() + { + m_quality = cETCQualitySlow; + m_perceptual = true; + m_cluster_fit = true; + m_force_etc1s = false; + m_use_color4 = true; + m_flip_bias = 0.0f; + } + }; + + struct etc1_solution_coordinates + { + inline etc1_solution_coordinates() : + m_unscaled_color(0, 0, 0, 0), + m_inten_table(0), + m_color4(false) + { + } + + inline etc1_solution_coordinates(uint32_t r, uint32_t g, uint32_t b, uint32_t inten_table, bool color4) : + m_unscaled_color((uint8_t)r, (uint8_t)g, (uint8_t)b, 255), + m_inten_table((uint8_t)inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const color_rgba& c, uint32_t inten_table, bool color4) : + m_unscaled_color(c), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const etc1_solution_coordinates& other) + { + *this = other; + } + + inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs) + { + m_unscaled_color = rhs.m_unscaled_color; + m_inten_table = rhs.m_inten_table; + m_color4 = rhs.m_color4; + return *this; + } + + inline void clear() + { + m_unscaled_color.clear(); + m_inten_table = 0; + m_color4 = false; + } + + inline void init(const color_rgba& c, uint32_t inten_table, bool color4) + { + m_unscaled_color = c; + m_inten_table = inten_table; + m_color4 = color4; + } + + inline color_rgba get_scaled_color() const + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + return color_rgba((uint8_t)br, (uint8_t)bg, (uint8_t)bb, 255); + } + + // returns true if anything was clamped + inline void get_block_colors(color_rgba* pBlock_colors) + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + const int* pInten_table = g_etc1_inten_tables[m_inten_table]; + pBlock_colors[0].set((uint8_t)(br + pInten_table[0]), (uint8_t)(bg + pInten_table[0]), (uint8_t)(bb + pInten_table[0]), 255); + pBlock_colors[1].set((uint8_t)(br + pInten_table[1]), (uint8_t)(bg + pInten_table[1]), (uint8_t)(bb + pInten_table[1]), 255); + pBlock_colors[2].set((uint8_t)(br + pInten_table[2]), (uint8_t)(bg + pInten_table[2]), (uint8_t)(bb + pInten_table[2]), 255); + pBlock_colors[3].set((uint8_t)(br + pInten_table[3]), (uint8_t)(bg + pInten_table[3]), (uint8_t)(bb + pInten_table[3]), 255); + } + + color_rgba m_unscaled_color; + uint32_t m_inten_table; + bool m_color4; + }; + + class etc1_optimizer + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(etc1_optimizer); + + public: + etc1_optimizer() + { + clear(); + } + + void clear() + { + m_pParams = nullptr; + m_pResult = nullptr; + m_pSorted_luma = nullptr; + m_pSorted_luma_indices = nullptr; + } + + struct params; + + typedef bool(*evaluate_solution_override_func)(uint64_t &error, const params &p, const color_rgba* pBlock_colors, const uint8_t* pSelectors, const etc1_solution_coordinates& coords); + + struct params : basis_etc1_pack_params + { + params() + { + clear(); + } + + params(const basis_etc1_pack_params& base_params) + { + clear_optimizer_params(); + + *static_cast(this) = base_params; + } + + void clear() + { + clear_optimizer_params(); + } + + void clear_optimizer_params() + { + basis_etc1_pack_params::clear(); + + m_num_src_pixels = 0; + m_pSrc_pixels = 0; + + m_use_color4 = false; + static const int s_default_scan_delta[] = { 0 }; + m_pScan_deltas = s_default_scan_delta; + m_scan_delta_size = 1; + + m_base_color5.clear(); + m_constrain_against_base_color5 = false; + + m_refinement = true; + + m_pForce_selectors = nullptr; + + m_pEval_solution_override = nullptr; + m_pEval_solution_override_data = nullptr; + } + + uint32_t m_num_src_pixels; + const color_rgba* m_pSrc_pixels; + + bool m_use_color4; + const int* m_pScan_deltas; + uint32_t m_scan_delta_size; + + color_rgba m_base_color5; + bool m_constrain_against_base_color5; + + bool m_refinement; + + const uint8_t* m_pForce_selectors; + + evaluate_solution_override_func m_pEval_solution_override; + void *m_pEval_solution_override_data; + }; + + struct results + { + uint64_t m_error; + color_rgba m_block_color_unscaled; + uint32_t m_block_inten_table; + uint32_t m_n; + uint8_t* m_pSelectors; + bool m_block_color4; + + inline results& operator= (const results& rhs) + { + m_block_color_unscaled = rhs.m_block_color_unscaled; + m_block_color4 = rhs.m_block_color4; + m_block_inten_table = rhs.m_block_inten_table; + m_error = rhs.m_error; + memcpy(m_pSelectors, rhs.m_pSelectors, minimum(rhs.m_n, m_n)); + return *this; + } + }; + + void init(const params& params, results& result); + bool compute(); + + const params* get_params() const { return m_pParams; } + + private: + struct potential_solution + { + potential_solution() : m_coords(), m_error(UINT64_MAX), m_valid(false) + { + } + + etc1_solution_coordinates m_coords; + std::vector m_selectors; + uint64_t m_error; + bool m_valid; + + void clear() + { + m_coords.clear(); + m_selectors.resize(0); + m_error = UINT64_MAX; + m_valid = false; + } + + bool are_selectors_all_equal() const + { + if (!m_selectors.size()) + return false; + const uint32_t s = m_selectors[0]; + for (uint32_t i = 1; i < m_selectors.size(); i++) + if (m_selectors[i] != s) + return false; + return true; + } + }; + + const params* m_pParams; + results* m_pResult; + + int m_limit; + + vec3F m_avg_color; + int m_br, m_bg, m_bb; + std::vector m_luma; + std::vector m_sorted_luma; + std::vector m_sorted_luma_indices; + const uint32_t* m_pSorted_luma_indices; + uint32_t* m_pSorted_luma; + + std::vector m_selectors; + std::vector m_best_selectors; + + potential_solution m_best_solution; + potential_solution m_trial_solution; + std::vector m_temp_selectors; + + std::set m_solutions_tried; + + void get_nearby_inten_tables(uint32_t idx, int &first_inten_table, int &last_inten_table) + { + first_inten_table = maximum(idx - 1, 0); + last_inten_table = minimum(cETC1IntenModifierValues, idx + 1); + } + + bool evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + + inline bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (m_pParams->m_quality >= cETCQualitySlow) + return evaluate_solution_slow(coords, trial_solution, pBest_solution); + else + return evaluate_solution_fast(coords, trial_solution, pBest_solution); + } + + void refine_solution(uint32_t max_refinement_trials); + void compute_internal_neighborhood(int scan_r, int scan_g, int scan_b); + void compute_internal_cluster_fit(uint32_t total_perms_to_try); + }; + + struct pack_etc1_block_context + { + etc1_optimizer m_optimizer; + }; + + void pack_etc1_block_init(); + + uint64_t pack_etc1_block(etc_block& block, const color_rgba* pSrc_pixels, basis_etc1_pack_params& pack_params, pack_etc1_block_context& context, const uint8_t *pForce_selectors = nullptr); + + bool pack_etc1_estimate_flipped(const color_rgba* pSrc_pixels); + + uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor); + +} // namespace basisu diff --git a/basisu_frontend.cpp b/basisu_frontend.cpp new file mode 100644 index 0000000..3608fbf --- /dev/null +++ b/basisu_frontend.cpp @@ -0,0 +1,1659 @@ +// basisu_frontend.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// TODO: This code originally supported full ETC1 and ETC1S, so there's some legacy stuff in here. +// +#include "transcoder/basisu.h" +#include "basisu_frontend.h" +#include + +#define BASISU_FRONTEND_VERIFY(c) verify(c, __LINE__); + +namespace basisu +{ + const uint32_t BASISU_MAX_ENDPOINT_REFINEMENT_STEPS = 2; + const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 2; + + // TODO - How to handle internal verifies in the basisu lib + static void verify(bool condition, int line) + { + if (!condition) + { + fprintf(stderr, "basisu_frontend: verify check failed at line %i!\n", line); + abort(); + } + } + + bool basisu_frontend::init(const params &p) + { + if (p.m_use_hybrid_selector_codebooks) + { + if (!p.m_pGlobal_sel_codebook) + { + assert(0); + return false; + } + } + + debug_printf("basisu_frontend::init: NumEndpointClusters: %u, NumSelectorClusters: %u, EndpointRefinement: %u, Perceptual: %u, Faster: %u\n", + p.m_max_endpoint_clusters, p.m_max_selector_clusters, p.m_endpoint_refinement, p.m_perceptual, p.m_faster); + + debug_printf("Global sel codebook pal bits: %u, Global sel codebook mod bits: %u, Use hybrid selector codebook: %u, Hybrid codebook quality thresh: %f\n", + p.m_num_global_sel_codebook_pal_bits, + p.m_num_global_sel_codebook_mod_bits, + p.m_use_hybrid_selector_codebooks, + p.m_hybrid_codebook_quality_thresh); + + if ((p.m_max_endpoint_clusters < 1) || (p.m_max_endpoint_clusters > cMaxEndpointClustersRDO)) + return false; + if ((p.m_max_selector_clusters < 1) || (p.m_max_selector_clusters > cMaxSelectorClustersRDO)) + return false; + + m_params = p; + + m_encoded_blocks.resize(m_params.m_num_source_blocks); + memset(&m_encoded_blocks[0], 0, m_encoded_blocks.size() * sizeof(m_encoded_blocks[0])); + + return true; + } + + bool basisu_frontend::compress() + { + debug_printf("basisu_frontend::compress\n"); + + m_total_blocks = m_params.m_num_source_blocks; + m_total_pixels = m_total_blocks * cPixelBlockTotalPixels; + + init_etc1_images(); + + init_endpoint_training_vectors(); + + generate_endpoint_clusters(); + + const uint32_t num_refine_endpoint_steps = m_params.m_endpoint_refinement ? (m_params.m_faster ? 1 : BASISU_MAX_ENDPOINT_REFINEMENT_STEPS) : 1; + + for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < num_refine_endpoint_steps; refine_endpoint_step++) + { + BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); + + if (refine_endpoint_step) + { + introduce_new_endpoint_clusters(); + } + + generate_endpoint_codebook(refine_endpoint_step); + + if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) + { + char buf[256]; +#ifdef _WIN32 + sprintf_s(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step); +#else + snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step); +#endif + dump_endpoint_clusterization_visualization(buf); + } + + bool early_out = false; + + if (m_params.m_endpoint_refinement) + { + //dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png"); + + if (!refine_endpoint_clusterization()) + early_out = true; + + if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) + { + char buf[256]; +#ifdef _WIN32 + sprintf_s(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step); +#else + snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step); +#endif + dump_endpoint_clusterization_visualization(buf); + } + } + + eliminate_redundant_or_empty_endpoint_clusters(); + + if (m_params.m_debug_stats) + debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size()); + + if (early_out) + break; + } + + BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); + + generate_block_endpoint_clusters(); + + create_initial_packed_texture(); + + create_selector_clusters(); + + const uint32_t num_refine_selector_steps = m_params.m_pGlobal_sel_codebook ? 1 : (m_params.m_faster ? 1 : BASISU_MAX_SELECTOR_REFINEMENT_STEPS); + for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++) + { + create_optimized_selector_codebook(refine_selector_steps); + + find_optimal_selector_clusters_for_each_block(); + + if (!m_params.m_faster) + { + if (!refine_block_endpoints_given_selectors()) + break; + } + } + + optimize_selector_codebook(); + + if (m_params.m_debug_stats) + debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_indices.size()); + + finalize(); + + if (m_params.m_validate) + { + if (!validate_output()) + return false; + } + + debug_printf("basisu_frontend::compress: Done\n"); + + return true; + } + + void basisu_frontend::optimize_selector_codebook() + { + debug_printf("optimize_selector_codebook\n"); + + const uint32_t orig_total_selector_clusters = (uint32_t)m_optimized_cluster_selectors.size(); + + bool_vec selector_cluster_was_used(m_optimized_cluster_selectors.size()); + for (uint32_t i = 0; i < m_total_blocks; i++) + selector_cluster_was_used[m_block_selector_cluster_index[i]] = true; + + int_vec old_to_new(m_optimized_cluster_selectors.size()); + int_vec new_to_old; + uint32_t total_new_entries = 0; + + for (int i = 0; i < static_cast(m_optimized_cluster_selectors.size()); i++) + { + if (!selector_cluster_was_used[i]) + { + old_to_new[i] = -1; + continue; + } + + int j; + for (j = 0; j < i; j++) + { + if (m_optimized_cluster_selectors[i].get_raw_selector_bits() == m_optimized_cluster_selectors[j].get_raw_selector_bits()) + break; + } + + if (j < i) + { + old_to_new[i] = old_to_new[j]; + continue; + } + + old_to_new[i] = total_new_entries++; + new_to_old.push_back(i); + } + + std::vector new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0); + basist::etc1_global_selector_codebook_entry_id_vec new_optimized_cluster_selector_global_cb_ids(m_optimized_cluster_selector_global_cb_ids.size() ? total_new_entries : 0); + std::vector new_selector_cluster_indices(m_selector_cluster_indices.size() ? total_new_entries : 0); + bool_vec new_selector_cluster_uses_global_cb(m_selector_cluster_uses_global_cb.size() ? total_new_entries : 0); + + for (uint32_t i = 0; i < total_new_entries; i++) + { + if (m_optimized_cluster_selectors.size()) + new_optimized_cluster_selectors[i] = m_optimized_cluster_selectors[new_to_old[i]]; + + if (m_optimized_cluster_selector_global_cb_ids.size()) + new_optimized_cluster_selector_global_cb_ids[i] = m_optimized_cluster_selector_global_cb_ids[new_to_old[i]]; + + if (m_selector_cluster_indices.size()) + new_selector_cluster_indices[i] = m_selector_cluster_indices[new_to_old[i]]; + + if (m_selector_cluster_uses_global_cb.size()) + new_selector_cluster_uses_global_cb[i] = m_selector_cluster_uses_global_cb[new_to_old[i]]; + } + + m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors); + m_optimized_cluster_selector_global_cb_ids.swap(new_optimized_cluster_selector_global_cb_ids); + m_selector_cluster_indices.swap(new_selector_cluster_indices); + m_selector_cluster_uses_global_cb.swap(new_selector_cluster_uses_global_cb); + + for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++) + m_block_selector_cluster_index[i] = old_to_new[m_block_selector_cluster_index[i]]; + + debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries); + } + + void basisu_frontend::init_etc1_images() + { + debug_printf("basisu_frontend::init_etc1_images\n"); + + m_etc1_blocks_etc1s.resize(m_total_blocks); + m_etc1_blocks_etc1s_unpacked.resize(m_total_blocks); + +#pragma omp parallel for + for (int block_index = 0; block_index < static_cast(m_total_blocks); block_index++) + { + const pixel_block &source_blk = get_source_pixel_block(block_index); + + pack_etc1_block_context pack_context; + + basis_etc1_pack_params etc1_pack_params; + etc1_pack_params.m_perceptual = m_params.m_perceptual; + etc1_pack_params.m_force_etc1s = true; + etc1_pack_params.m_flip_bias = 0.0f; + + pack_etc1_block(m_etc1_blocks_etc1s[block_index], source_blk.get_ptr(), etc1_pack_params, pack_context, NULL); + assert(m_etc1_blocks_etc1s[block_index].get_flip_bit()); + + unpack_etc1(m_etc1_blocks_etc1s[block_index], m_etc1_blocks_etc1s_unpacked[block_index].get_ptr()); + } + } + + void basisu_frontend::init_endpoint_training_vectors() + { + debug_printf("init_endpoint_training_vectors\n"); + + std::vector training_vecs(m_total_blocks); + +#pragma omp parallel for + for (int block_index = 0; block_index < (int)m_total_blocks; block_index++) + { + const color_rgba *pSource_pixels = get_source_pixel_block(block_index).get_ptr(); + + etc1_optimizer optimizer; + etc1_optimizer::params optimizer_params; + etc1_optimizer::results optimizer_results; + + optimizer_params.m_num_src_pixels = 16; + optimizer_params.m_pSrc_pixels = pSource_pixels; + optimizer_params.m_perceptual = m_params.m_perceptual; + + uint8_t selectors[16]; + optimizer_results.m_pSelectors = selectors; + optimizer_results.m_n = 16; + + optimizer.init(optimizer_params, optimizer_results); + optimizer.compute(); + + color_rgba block_colors[4]; + etc_block::get_block_colors5(block_colors, optimizer_results.m_block_color_unscaled, optimizer_results.m_block_inten_table, false); + + vec6F v; + v[0] = block_colors[0].r * (1.0f / 255.0f); + v[1] = block_colors[0].g * (1.0f / 255.0f); + v[2] = block_colors[0].b * (1.0f / 255.0f); + v[3] = block_colors[3].r * (1.0f / 255.0f); + v[4] = block_colors[3].g * (1.0f / 255.0f); + v[5] = block_colors[3].b * (1.0f / 255.0f); + + training_vecs[block_index] = v; + + } // block_index + + for (int block_index = 0; block_index < (int)m_total_blocks; block_index++) + { + const vec6F &v = training_vecs[block_index]; + + m_endpoint_clusterizer.add_training_vec(v, 1); + m_endpoint_clusterizer.add_training_vec(v, 1); + } + } + + void basisu_frontend::generate_endpoint_clusters() + { + debug_printf("Begin endpoint quantization\n"); + + m_endpoint_clusterizer.generate(m_params.m_max_endpoint_clusters); + + m_endpoint_clusterizer.retrieve(m_endpoint_clusters); + + if (m_params.m_debug_stats) + debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size()); + } + + void basisu_frontend::generate_block_endpoint_clusters() + { + m_block_endpoint_clusters_indices.resize(m_total_blocks); + + for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) + { + const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + m_block_endpoint_clusters_indices[block_index][subblock_index] = cluster_index; + + } // cluster_indices_iter + } + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + uint32_t cluster_0 = m_block_endpoint_clusters_indices[block_index][0]; + uint32_t cluster_1 = m_block_endpoint_clusters_indices[block_index][1]; + BASISU_FRONTEND_VERIFY(cluster_0 == cluster_1); + } + } + + void basisu_frontend::compute_endpoint_subblock_error_vec() + { + m_subblock_endpoint_quant_err_vec.resize(0); + +#pragma omp parallel for + for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) + { + const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + assert(cluster_indices.size()); + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + std::vector cluster_pixels(8); + + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + const bool flipped = true; + + const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr(); + + for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++) + { + cluster_pixels[pixel_index] = pSource_block_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]]; + } + + const endpoint_cluster_etc_params &etc_params = m_endpoint_cluster_etc_params[cluster_index]; + + assert(etc_params.m_valid); + + color_rgba block_colors[4]; + etc_block::get_block_colors5(block_colors, etc_params.m_color_unscaled[0], etc_params.m_inten_table[0], true); + + uint64_t total_err = 0; + + for (uint32_t i = 0; i < 8; i++) + { + const color_rgba &c = cluster_pixels[i]; + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false); + if (err < best_err) + { + best_err = err; + best_index = s; + } + } + + total_err += best_err; + } + + subblock_endpoint_quant_err quant_err; + quant_err.m_total_err = total_err; + quant_err.m_cluster_index = cluster_index; + quant_err.m_cluster_subblock_index = cluster_indices_iter; + quant_err.m_block_index = block_index; + quant_err.m_subblock_index = subblock_index; + +#pragma omp critical + { + m_subblock_endpoint_quant_err_vec.push_back(quant_err); + } + } + } + + vector_sort(m_subblock_endpoint_quant_err_vec); + } + + void basisu_frontend::introduce_new_endpoint_clusters() + { + debug_printf("introduce_new_endpoint_clusters\n"); + + generate_block_endpoint_clusters(); + + int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - (uint32_t)m_endpoint_clusters.size(); + if (num_new_endpoint_clusters <= 0) + return; + + compute_endpoint_subblock_error_vec(); + + const uint32_t num_orig_endpoint_clusters = (uint32_t)m_endpoint_clusters.size(); + + std::unordered_set training_vector_was_relocated; + + uint_vec cluster_sizes(num_orig_endpoint_clusters); + for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++) + cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size(); + + std::unordered_set ignore_cluster; + + while (num_new_endpoint_clusters) + { + if (m_subblock_endpoint_quant_err_vec.size() == 0) + break; + + subblock_endpoint_quant_err subblock_to_move(m_subblock_endpoint_quant_err_vec.back()); + const bool is_etc1s_block = true; + + m_subblock_endpoint_quant_err_vec.pop_back(); + + if (unordered_set_contains(ignore_cluster, subblock_to_move.m_cluster_index)) + continue; + + uint32_t training_vector_index = subblock_to_move.m_block_index * 2 + subblock_to_move.m_subblock_index; + + if (is_etc1s_block) + { + if (cluster_sizes[subblock_to_move.m_cluster_index] <= 2) + continue; + } + else + { + if (cluster_sizes[subblock_to_move.m_cluster_index] <= 1) + continue; + } + + if (unordered_set_contains(training_vector_was_relocated, training_vector_index)) + continue; + + if (is_etc1s_block) + { + if (unordered_set_contains(training_vector_was_relocated, training_vector_index ^ 1)) + continue; + } + + enlarge_vector(m_endpoint_clusters, 1)->push_back(training_vector_index); + enlarge_vector(m_endpoint_cluster_etc_params, 1); + + assert(m_endpoint_clusters.size() == m_endpoint_cluster_etc_params.size()); + + training_vector_was_relocated.insert(training_vector_index); + + if (is_etc1s_block) + { + m_endpoint_clusters.back().push_back(training_vector_index ^ 1); + training_vector_was_relocated.insert(training_vector_index ^ 1); + + BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 2); + cluster_sizes[subblock_to_move.m_cluster_index] -= 2; + } + else + { + BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 1); + cluster_sizes[subblock_to_move.m_cluster_index]--; + } + + ignore_cluster.insert(subblock_to_move.m_cluster_index); + + num_new_endpoint_clusters--; + } + + for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++) + { + uint_vec &cluster_indices = m_endpoint_clusters[i]; + + uint_vec new_cluster_indices; + for (uint32_t j = 0; j < cluster_indices.size(); j++) + { + uint32_t training_vector_index = cluster_indices[j]; + + if (!unordered_set_contains(training_vector_was_relocated, training_vector_index)) + new_cluster_indices.push_back(training_vector_index); + } + + if (cluster_indices.size() != new_cluster_indices.size()) + { + BASISU_FRONTEND_VERIFY(new_cluster_indices.size() > 0); + cluster_indices.swap(new_cluster_indices); + } + } + + generate_block_endpoint_clusters(); + } + + void basisu_frontend::generate_endpoint_codebook(uint32_t step) + { + debug_printf("generate_endpoint_codebook\n"); + + m_endpoint_cluster_etc_params.resize(m_endpoint_clusters.size()); + +#pragma omp parallel for + for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) + { + //debug_printf("%u of %u\n", cluster_index, clusters.size()); + + const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + BASISU_FRONTEND_VERIFY(cluster_indices.size()); + + const uint32_t total_pixels = (uint32_t)cluster_indices.size() * 8; + + std::vector cluster_pixels(total_pixels); + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + const bool flipped = true; + + const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); + + for (uint32_t pixel_index = 0; pixel_index < 8; pixel_index++) + { + cluster_pixels[cluster_indices_iter * 8 + pixel_index] = pBlock_pixels[g_etc1_pixel_indices[flipped][subblock_index][pixel_index]]; + } + } + + endpoint_cluster_etc_params new_subblock_params; + + { + etc1_optimizer optimizer; + etc1_solution_coordinates solutions[2]; + + etc1_optimizer::params cluster_optimizer_params; + cluster_optimizer_params.m_num_src_pixels = total_pixels; + cluster_optimizer_params.m_pSrc_pixels = &cluster_pixels[0]; + + cluster_optimizer_params.m_use_color4 = false; + cluster_optimizer_params.m_perceptual = m_params.m_perceptual; + + etc1_optimizer::results cluster_optimizer_results; + + std::vector cluster_selectors(total_pixels); + cluster_optimizer_results.m_n = total_pixels; + cluster_optimizer_results.m_pSelectors = &cluster_selectors[0]; + + optimizer.init(cluster_optimizer_params, cluster_optimizer_results); + + optimizer.compute(); + + new_subblock_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled; + new_subblock_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table; + new_subblock_params.m_color_error[0] = cluster_optimizer_results.m_error; + } + + endpoint_cluster_etc_params &prev_etc_params = m_endpoint_cluster_etc_params[cluster_index]; + + bool use_new_subblock_params = false; + if ((!step) || (!prev_etc_params.m_valid)) + use_new_subblock_params = true; + else + { + assert(prev_etc_params.m_valid); + + uint64_t total_prev_err = 0; + + { + color_rgba block_colors[4]; + + etc_block::get_block_colors5(block_colors, prev_etc_params.m_color_unscaled[0], prev_etc_params.m_inten_table[0], false); + + uint64_t total_err = 0; + + for (uint32_t i = 0; i < total_pixels; i++) + { + const color_rgba &c = cluster_pixels[i]; + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + + for (uint32_t s = 0; s < 4; s++) + { + uint64_t err = color_distance(m_params.m_perceptual, c, block_colors[s], false); + if (err < best_err) + { + best_err = err; + best_index = s; + } + } + + total_err += best_err; + } + + total_prev_err += total_err; + } + + // See if we should update this cluster's endpoints (if the error has actually fallen) + if (total_prev_err > (new_subblock_params.m_color_error[0] + new_subblock_params.m_color_error[1])) + { + use_new_subblock_params = true; + } + } + + if (use_new_subblock_params) + { + new_subblock_params.m_valid = true; + + prev_etc_params = new_subblock_params; + } + + } // cluster_index + } + + bool basisu_frontend::check_etc1s_constraints() const + { + std::vector block_clusters(m_total_blocks); + + for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) + { + const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + block_clusters[block_index][subblock_index] = cluster_index; + + } // cluster_indices_iter + } + + for (uint32_t i = 0; i < m_total_blocks; i++) + { + if (block_clusters[i][0] != block_clusters[i][1]) + return false; + } + + return true; + } + + uint32_t basisu_frontend::refine_endpoint_clusterization() + { + debug_printf("refine_endpoint_clusterization\n"); + + std::vector block_clusters(m_total_blocks); + + for (int cluster_index = 0; cluster_index < static_cast(m_endpoint_clusters.size()); cluster_index++) + { + const std::vector& cluster_indices = m_endpoint_clusters[cluster_index]; + + for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) + { + const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; + const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; + + block_clusters[block_index][subblock_index] = cluster_index; + + } // cluster_indices_iter + } + + //---------------------------------------------------------- + + // Create a new endpoint clusterization + + uint_vec best_cluster_indices(m_total_blocks); + +#pragma omp parallel for + for (int block_index = 0; block_index < (int)m_total_blocks; block_index++) + { + const bool is_flipped = true; + + const uint32_t cluster_index = block_clusters[block_index][0]; + BASISU_FRONTEND_VERIFY(cluster_index == block_clusters[block_index][1]); + + const color_rgba *subblock_pixels = get_source_pixel_block(block_index).get_ptr(); + const uint32_t num_subblock_pixels = 16; + + uint64_t best_cluster_err = UINT64_MAX; + uint32_t best_cluster_index = 0; + + for (uint32_t cluster_iter = 0; cluster_iter < m_endpoint_clusters.size(); cluster_iter++) + { + color_rgba cluster_etc_base_color(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0]); + uint32_t cluster_etc_inten = m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0]; + + uint64_t total_err = 0; + + const uint32_t low_selector = 0;//subblock_etc_params_vec[j].m_low_selectors[0]; + const uint32_t high_selector = 3;//subblock_etc_params_vec[j].m_high_selectors[0]; + color_rgba subblock_colors[4]; + // Can't assign it here - may result in too much error when selector quant occurs + if (cluster_etc_inten > m_endpoint_cluster_etc_params[cluster_index].m_inten_table[0]) + { + total_err = UINT64_MAX; + goto skip_cluster; + } + + etc_block::get_block_colors5(subblock_colors, cluster_etc_base_color, cluster_etc_inten); + + + for (uint32_t p = 0; p < num_subblock_pixels; p++) + { + uint64_t best_err = UINT64_MAX; + + for (uint32_t r = low_selector; r <= high_selector; r++) + { + uint64_t err = color_distance(m_params.m_perceptual, subblock_pixels[p], subblock_colors[r], false); + best_err = minimum(best_err, err); + if (!best_err) + break; + } + + total_err += best_err; + if (total_err > best_cluster_err) + break; + } // p + + skip_cluster: + if ((total_err < best_cluster_err) || + ((cluster_iter == cluster_index) && (total_err == best_cluster_err))) + { + best_cluster_err = total_err; + best_cluster_index = cluster_iter; + + if (!best_cluster_err) + break; + } + } // j + + best_cluster_indices[block_index] = best_cluster_index; + + } // block_index + + std::vector > optimized_endpoint_clusters(m_endpoint_clusters.size()); + uint32_t total_subblocks_reassigned = 0; + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t training_vector_index = block_index * 2 + 0; + + const uint32_t orig_cluster_index = block_clusters[block_index][0]; + const uint32_t best_cluster_index = best_cluster_indices[block_index]; + + optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index); + optimized_endpoint_clusters[best_cluster_index].push_back(training_vector_index + 1); + + if (best_cluster_index != orig_cluster_index) + { + total_subblocks_reassigned++; + } + } + + debug_printf("total_subblocks_reassigned: %u\n", total_subblocks_reassigned); + + m_endpoint_clusters = optimized_endpoint_clusters; + + return total_subblocks_reassigned; + } + + void basisu_frontend::eliminate_redundant_or_empty_endpoint_clusters() + { + debug_printf("eliminate_redundant_or_empty_endpoint_clusters\n"); + + uint_vec sorted_endpoint_cluster_indices(m_endpoint_clusters.size()); + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) + sorted_endpoint_cluster_indices[i] = i; + + indirect_sort((uint32_t)m_endpoint_clusters.size(), &sorted_endpoint_cluster_indices[0], &m_endpoint_cluster_etc_params[0]); + + std::vector > new_endpoint_clusters(m_endpoint_clusters.size()); + std::vector new_subblock_etc_params(m_endpoint_clusters.size()); + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) + { + uint32_t j = sorted_endpoint_cluster_indices[i]; + new_endpoint_clusters[i] = m_endpoint_clusters[j]; + new_subblock_etc_params[i] = m_endpoint_cluster_etc_params[j]; + } + + new_endpoint_clusters.swap(m_endpoint_clusters); + new_subblock_etc_params.swap(m_endpoint_cluster_etc_params); + + new_endpoint_clusters.resize(0); + new_subblock_etc_params.resize(0); + + for (int i = 0; i < (int)m_endpoint_clusters.size(); ) + { + if (!m_endpoint_clusters[i].size()) + { + i++; + continue; + } + + int j; + for (j = i + 1; j < (int)m_endpoint_clusters.size(); j++) + { + if (!(m_endpoint_cluster_etc_params[i] == m_endpoint_cluster_etc_params[j])) + break; + } + + new_endpoint_clusters.push_back(m_endpoint_clusters[i]); + new_subblock_etc_params.push_back(m_endpoint_cluster_etc_params[i]); + + for (int k = i + 1; k < j; k++) + append_vector(new_endpoint_clusters.back(), m_endpoint_clusters[k]); + + i = j; + } + + if (m_endpoint_clusters.size() != new_endpoint_clusters.size()) + { + if (m_params.m_debug_stats) + debug_printf("Eliminated %u redundant or empty clusters\n", (uint32_t)(m_endpoint_clusters.size() - new_endpoint_clusters.size())); + + m_endpoint_clusters.swap(new_endpoint_clusters); + + m_endpoint_cluster_etc_params.swap(new_subblock_etc_params); + } + } + + void basisu_frontend::create_initial_packed_texture() + { + debug_printf("create_initial_packed_texture\n"); + +#pragma omp parallel for + for (int block_index = 0; block_index < static_cast(m_total_blocks); block_index++) + { + uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0]; + uint32_t cluster1 = m_block_endpoint_clusters_indices[block_index][1]; + BASISU_FRONTEND_VERIFY(cluster0 == cluster1); + + const color_rgba *pSource_pixels = get_source_pixel_block(block_index).get_ptr(); + + etc_block &blk = m_encoded_blocks[block_index]; + + color_rgba unscaled[2] = { m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0], m_endpoint_cluster_etc_params[cluster1].m_color_unscaled[0] }; + uint32_t inten[2] = { m_endpoint_cluster_etc_params[cluster0].m_inten_table[0], m_endpoint_cluster_etc_params[cluster1].m_inten_table[0] }; + + blk.set_block_color5(unscaled[0], unscaled[1]); + blk.set_flip_bit(true); + + blk.set_inten_table(0, inten[0]); + blk.set_inten_table(1, inten[1]); + + blk.determine_selectors(pSource_pixels, m_params.m_perceptual); + + } // block_index + + m_orig_encoded_blocks = m_encoded_blocks; + } + + void basisu_frontend::create_selector_clusters() + { + debug_printf("create_selector_clusters\n"); + + typedef vec<16, float> vec16F; + typedef tree_vector_quant vec16F_clusterizer; + + vec16F_clusterizer::array_of_weighted_training_vecs training_vecs(m_total_blocks); + +#pragma omp parallel for + for (int block_index = 0; block_index < static_cast(m_total_blocks); block_index++) + { + const etc_block &blk = m_encoded_blocks[block_index]; + + vec16F v; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + v[x + y * 4] = static_cast(blk.get_selector(x, y)); + + const uint32_t subblock_index = (blk.get_inten_table(0) > blk.get_inten_table(1)) ? 0 : 1; + + color_rgba block_colors[4]; + blk.get_block_colors(block_colors, subblock_index); + + const uint32_t dist = color_distance(m_params.m_perceptual, block_colors[0], block_colors[3], false); + + const uint32_t cColorDistToWeight = 300; + const uint32_t cMaxWeight = 4096; + uint32_t weight = clamp(dist / cColorDistToWeight, 1, cMaxWeight); + + training_vecs[block_index].first = v; + training_vecs[block_index].second = weight; + } + + vec16F_clusterizer selector_clusterizer; + for (uint32_t i = 0; i < m_total_blocks; i++) + selector_clusterizer.add_training_vec(training_vecs[i].first, training_vecs[i].second); + + selector_clusterizer.generate(m_params.m_max_selector_clusters); + + selector_clusterizer.retrieve(m_selector_cluster_indices); + } + + void basisu_frontend::create_optimized_selector_codebook(uint32_t iter) + { + debug_printf("create_optimized_selector_codebook\n"); + + const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_indices.size(); + + m_optimized_cluster_selectors.resize(total_selector_clusters); + + if ((m_params.m_pGlobal_sel_codebook) && (!m_params.m_use_hybrid_selector_codebooks)) + { + uint32_t total_clusters_processed = 0; + + m_optimized_cluster_selector_global_cb_ids.resize(total_selector_clusters); + +#pragma omp parallel for + for (int cluster_index = 0; cluster_index < static_cast(total_selector_clusters); cluster_index++) + { + const std::vector &cluster_block_indices = m_selector_cluster_indices[cluster_index]; + + if (!cluster_block_indices.size()) + continue; + + etc_block_vec etc_blocks; + pixel_block_vec pixel_blocks; + + for (uint32_t cluster_block_index = 0; cluster_block_index < cluster_block_indices.size(); cluster_block_index++) + { + const uint32_t block_index = cluster_block_indices[cluster_block_index]; + + etc_blocks.push_back(m_encoded_blocks[block_index]); + + pixel_blocks.push_back(get_source_pixel_block(block_index)); + } + + uint32_t palette_index; + basist::etc1_global_palette_entry_modifier palette_modifier; + +#if 0 + m_params.m_pGlobal_sel_codebook->find_best_entry(etc_blocks.size(), pixel_blocks.get_ptr(), etc_blocks.get_ptr(), + palette_index, palette_modifier, + m_params.m_perceptual, 1 << m_params.m_num_global_sel_codebook_pal_bits, 1 << m_params.m_num_global_sel_codebook_mod_bits); +#else + etc1_global_selector_codebook_find_best_entry(*m_params.m_pGlobal_sel_codebook, + (uint32_t)etc_blocks.size(), &pixel_blocks[0], &etc_blocks[0], + palette_index, palette_modifier, + m_params.m_perceptual, 1 << m_params.m_num_global_sel_codebook_pal_bits, 1 << m_params.m_num_global_sel_codebook_mod_bits); +#endif + + m_optimized_cluster_selector_global_cb_ids[cluster_index].set(palette_index, palette_modifier); + + basist::etc1_selector_palette_entry pal_entry(m_params.m_pGlobal_sel_codebook->get_entry(palette_index, palette_modifier)); + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + m_optimized_cluster_selectors[cluster_index].set_selector(x, y, pal_entry(x, y)); + +#pragma omp critical + { + total_clusters_processed++; + if ((total_clusters_processed % 63) == 0) + debug_printf("Global selector palette optimization: %3.1f%% complete\n", total_clusters_processed * 100.0f / total_selector_clusters); + } + + } // cluster_index + } + else + { + const bool uses_hybrid_sel_codebook = ((m_params.m_pGlobal_sel_codebook) && (m_params.m_use_hybrid_selector_codebooks)); + if (uses_hybrid_sel_codebook) + { + m_selector_cluster_uses_global_cb.resize(total_selector_clusters); + m_optimized_cluster_selector_global_cb_ids.resize(total_selector_clusters); + } + + uint32_t total_clusters_processed = 0; + + // For each selector codebook entry, determine which selector minimizes the error across all the blocks that use that quantized selector. + +#pragma omp parallel for + for (int cluster_index = 0; cluster_index < static_cast(total_selector_clusters); cluster_index++) + { + const std::vector &cluster_block_indices = m_selector_cluster_indices[cluster_index]; + + if (!cluster_block_indices.size()) + continue; + + uint64_t overall_best_err = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_s = 0; + + for (uint32_t s = 0; s < 4; s++) + { + uint32_t total_err = 0; + + for (uint32_t cluster_block_index = 0; cluster_block_index < cluster_block_indices.size(); cluster_block_index++) + { + const uint32_t block_index = cluster_block_indices[cluster_block_index]; + + etc_block &blk = m_encoded_blocks[block_index]; + + const color_rgba &orig_color = get_source_pixel_block(block_index)(x, y); + + color_rgba block_colors[4]; + blk.get_block_colors(block_colors, blk.get_subblock_index(x, y)); + + total_err += color_distance(m_params.m_perceptual, block_colors[s], orig_color, false); + if (total_err > best_err) + break; + + } // block_index + + if (total_err < best_err) + { + best_err = total_err; + best_s = s; + if (!best_err) + break; + } + + } // s + + m_optimized_cluster_selectors[cluster_index].set_selector(x, y, best_s); + + overall_best_err += best_err; + + } // x + } // y + + if (uses_hybrid_sel_codebook) + { + etc_block_vec etc_blocks; + pixel_block_vec pixel_blocks; + + for (uint32_t cluster_block_index = 0; cluster_block_index < cluster_block_indices.size(); cluster_block_index++) + { + const uint32_t block_index = cluster_block_indices[cluster_block_index]; + + etc_blocks.push_back(m_encoded_blocks[block_index]); + + pixel_blocks.push_back(get_source_pixel_block(block_index)); + } + + uint32_t palette_index; + basist::etc1_global_palette_entry_modifier palette_modifier; + +#if 0 + uint64_t best_global_cb_err = m_params.m_pGlobal_sel_codebook->find_best_entry(etc_blocks.size(), pixel_blocks.get_ptr(), etc_blocks.get_ptr(), + palette_index, palette_modifier, + m_params.m_perceptual, 1 << m_params.m_num_global_sel_codebook_pal_bits, 1 << m_params.m_num_global_sel_codebook_mod_bits); +#else + uint64_t best_global_cb_err = etc1_global_selector_codebook_find_best_entry(*m_params.m_pGlobal_sel_codebook, (uint32_t)etc_blocks.size(), &pixel_blocks[0], &etc_blocks[0], + palette_index, palette_modifier, + m_params.m_perceptual, 1 << m_params.m_num_global_sel_codebook_pal_bits, 1 << m_params.m_num_global_sel_codebook_mod_bits); +#endif + + if (best_global_cb_err <= overall_best_err * m_params.m_hybrid_codebook_quality_thresh) + { + m_selector_cluster_uses_global_cb[cluster_index] = true; + + m_optimized_cluster_selector_global_cb_ids[cluster_index].set(palette_index, palette_modifier); + + basist::etc1_selector_palette_entry pal_entry(m_params.m_pGlobal_sel_codebook->get_entry(palette_index, palette_modifier)); + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + m_optimized_cluster_selectors[cluster_index].set_selector(x, y, pal_entry(x, y)); + } + else + { + m_optimized_cluster_selector_global_cb_ids[cluster_index].set(0, basist::etc1_global_palette_entry_modifier(0)); + + m_selector_cluster_uses_global_cb[cluster_index] = false; + } + } + + if (uses_hybrid_sel_codebook) + { +#pragma omp critical + { + total_clusters_processed++; + if ((total_clusters_processed % 63) == 0) + debug_printf("Global selector palette optimization: %3.1f%% complete\n", total_clusters_processed * 100.0f / total_selector_clusters); + } + } + + } // cluster_index + + } // if (m_params.m_pGlobal_sel_codebook) + + if (m_params.m_debug_images) + { + uint32_t max_selector_cluster_size = 0; + + for (uint32_t i = 0; i < m_selector_cluster_indices.size(); i++) + max_selector_cluster_size = maximum(max_selector_cluster_size, (uint32_t)m_selector_cluster_indices[i].size()); + + if ((max_selector_cluster_size * 5) < 32768) + { + const uint32_t x_spacer_len = 16; + image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_indices.size() * 5); + + for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_indices.size(); selector_cluster_index++) + { + const std::vector &cluster_block_indices = m_selector_cluster_indices[selector_cluster_index]; + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + selector_cluster_vis.set_clipped(x_spacer_len + x - 12, selector_cluster_index * 5 + y, color_rgba((m_optimized_cluster_selectors[selector_cluster_index].get_selector(x, y) * 255) / 3)); + + for (uint32_t i = 0; i < cluster_block_indices.size(); i++) + { + uint32_t block_index = cluster_block_indices[i]; + + const etc_block &blk = m_orig_encoded_blocks[block_index]; + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + selector_cluster_vis.set_clipped(x_spacer_len + x + 5 * i, selector_cluster_index * 5 + y, color_rgba((blk.get_selector(x, y) * 255) / 3)); + } + } + + char buf[256]; +#ifdef _WIN32 + sprintf_s(buf, sizeof(buf), "selector_cluster_vis_%u.png", iter); +#else + snprintf(buf, sizeof(buf), "selector_cluster_vis_%u.png", iter); +#endif + save_png(buf, selector_cluster_vis); + } + } + } + + void basisu_frontend::find_optimal_selector_clusters_for_each_block() + { + debug_printf("find_optimal_selector_clusters_for_each_block\n"); + + m_block_selector_cluster_index.resize(m_total_blocks); + + const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_indices.size(); + + std::vector< std::vector > new_cluster_indices; + + // For each block: Determine which quantized selectors best encode that block, given its quantized endpoints. + +#pragma omp parallel for + for (int block_index = 0; block_index < static_cast(m_total_blocks); block_index++) + { + const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); + + etc_block& blk = m_encoded_blocks[block_index]; + + color_rgba trial_block_colors[4]; + blk.get_block_colors(trial_block_colors, 0); + + uint64_t best_cluster_err = UINT64_MAX; + uint32_t best_cluster_index = 0; + + // TODO: One faster algorithm is to sort the cluster selectors, then iterate through them in sorted order and only eval color distances of the selectors that changed. + for (uint32_t cluster_index = 0; cluster_index < total_selector_clusters; cluster_index++) + { + const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index]; + + uint64_t trial_err = 0; + for (int y = 0; y < 4; y++) + { + for (int x = 0; x < 4; x++) + { + const uint32_t sel = cluster_blk.get_selector(x, y); + + trial_err += color_distance(m_params.m_perceptual, trial_block_colors[sel], pBlock_pixels[x + y * 4], false); + if (trial_err > best_cluster_err) + goto early_out; + } + } + + if (trial_err < best_cluster_err) + { + best_cluster_err = trial_err; + best_cluster_index = cluster_index; + if (!best_cluster_err) + break; + } + + early_out: + ; + } + + blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits()); + + m_block_selector_cluster_index[block_index] = best_cluster_index; + +#pragma omp critical + { + vector_ensure_element_is_valid(new_cluster_indices, best_cluster_index); + new_cluster_indices[best_cluster_index].push_back(block_index); + } + } + + m_selector_cluster_indices.swap(new_cluster_indices); + + for (uint32_t i = 0; i < m_selector_cluster_indices.size(); i++) + vector_sort(m_selector_cluster_indices[i]); + } + + // This seems to be a pretty minor optimization (like .01-.02 dB on kodak) + uint32_t basisu_frontend::refine_block_endpoints_given_selectors() + { + debug_printf("refine_block_endpoints_given_selectors\n"); + + for (int block_index = 0; block_index < static_cast(m_total_blocks); block_index++) + { + //uint32_t selector_cluster = m_block_selector_cluster_index(block_x, block_y); + vec2U &endpoint_clusters = m_block_endpoint_clusters_indices[block_index]; + + m_endpoint_cluster_etc_params[endpoint_clusters[0]].m_subblocks.push_back(block_index * 2); + + m_endpoint_cluster_etc_params[endpoint_clusters[1]].m_subblocks.push_back(block_index * 2 + 1); + } + + uint32_t total_subblocks_refined = 0; + uint32_t total_subblocks_examined = 0; + + for (uint32_t endpoint_cluster_index = 0; endpoint_cluster_index < m_endpoint_cluster_etc_params.size(); endpoint_cluster_index++) + { + endpoint_cluster_etc_params &subblock_params = m_endpoint_cluster_etc_params[endpoint_cluster_index]; + + const uint_vec &subblocks = subblock_params.m_subblocks; + //uint32_t total_pixels = subblock.m_subblocks.size() * 8; + + std::vector subblock_colors[2]; // [use_individual_mode] + uint8_vec subblock_selectors[2]; + + uint64_t cur_subblock_err[2] = { 0, 0 }; + + for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++) + { + uint32_t training_vector_index = subblocks[subblock_iter]; + + uint32_t block_index = training_vector_index >> 1; + uint32_t subblock_index = training_vector_index & 1; + const bool is_flipped = true; + + const etc_block &blk = m_encoded_blocks[block_index]; + + const bool use_individual_mode = !blk.get_diff_bit(); + + const color_rgba *pSource_block_pixels = get_source_pixel_block(block_index).get_ptr(); + + color_rgba unpacked_block_pixels[16]; + unpack_etc1(blk, unpacked_block_pixels); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t pixel_index = g_etc1_pixel_indices[is_flipped][subblock_index][i]; + const etc_coord2 &coords = g_etc1_pixel_coords[is_flipped][subblock_index][i]; + + subblock_colors[use_individual_mode].push_back(pSource_block_pixels[pixel_index]); + + cur_subblock_err[use_individual_mode] += color_distance(m_params.m_perceptual, pSource_block_pixels[pixel_index], unpacked_block_pixels[pixel_index], false); + + subblock_selectors[use_individual_mode].push_back(static_cast(blk.get_selector(coords.m_x, coords.m_y))); + } + } // subblock_iter + + etc1_optimizer::results cluster_optimizer_results[2]; + bool results_valid[2] = { false, false }; + + clear_obj(cluster_optimizer_results); + + std::vector cluster_selectors[2]; + + for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++) + { + const uint32_t total_pixels = (uint32_t)subblock_colors[use_individual_mode].size(); + + if (!total_pixels) + continue; + + total_subblocks_examined += total_pixels / 8; + + etc1_optimizer optimizer; + etc1_solution_coordinates solutions[2]; + + etc1_optimizer::params cluster_optimizer_params; + cluster_optimizer_params.m_num_src_pixels = total_pixels; + cluster_optimizer_params.m_pSrc_pixels = &subblock_colors[use_individual_mode][0]; + + cluster_optimizer_params.m_use_color4 = use_individual_mode != 0; + cluster_optimizer_params.m_perceptual = m_params.m_perceptual; + + cluster_optimizer_params.m_pForce_selectors = &subblock_selectors[use_individual_mode][0]; + cluster_optimizer_params.m_quality = cETCQualityUber; + + cluster_selectors[use_individual_mode].resize(total_pixels); + + cluster_optimizer_results[use_individual_mode].m_n = total_pixels; + cluster_optimizer_results[use_individual_mode].m_pSelectors = &cluster_selectors[use_individual_mode][0]; + + optimizer.init(cluster_optimizer_params, cluster_optimizer_results[use_individual_mode]); + + if (!optimizer.compute()) + continue; + + if (cluster_optimizer_results[use_individual_mode].m_error < cur_subblock_err[use_individual_mode]) + results_valid[use_individual_mode] = true; + + } // use_individual_mode + + for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++) + { + if (!results_valid[use_individual_mode]) + continue; + + uint32_t num_passes = use_individual_mode ? 1 : 2; + + bool all_passed5 = true; + + for (uint32_t pass = 0; pass < num_passes; pass++) + { + for (uint32_t subblock_iter = 0; subblock_iter < subblocks.size(); subblock_iter++) + { + const uint32_t training_vector_index = subblocks[subblock_iter]; + + const uint32_t block_index = training_vector_index >> 1; + const uint32_t subblock_index = training_vector_index & 1; + const bool is_flipped = true; + + etc_block &blk = m_encoded_blocks[block_index]; + + if (!blk.get_diff_bit() != static_cast(use_individual_mode != 0)) + continue; + + if (use_individual_mode) + { + blk.set_base4_color(subblock_index, etc_block::pack_color4(cluster_optimizer_results[1].m_block_color_unscaled, false)); + blk.set_inten_table(subblock_index, cluster_optimizer_results[1].m_block_inten_table); + + subblock_params.m_color_error[1] = cluster_optimizer_results[1].m_error; + subblock_params.m_inten_table[1] = cluster_optimizer_results[1].m_block_inten_table; + subblock_params.m_color_unscaled[1] = cluster_optimizer_results[1].m_block_color_unscaled; + + total_subblocks_refined++; + } + else + { + const uint16_t base_color5 = blk.get_base5_color(); + const uint16_t delta_color3 = blk.get_delta3_color(); + + uint32_t r[2], g[2], b[2]; + etc_block::unpack_color5(r[0], g[0], b[0], base_color5, false); + bool success = etc_block::unpack_color5(r[1], g[1], b[1], base_color5, delta_color3, false); + assert(success); + BASISU_NOTE_UNUSED(success); + + r[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.r; + g[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.g; + b[subblock_index] = cluster_optimizer_results[0].m_block_color_unscaled.b; + + color_rgba colors[2] = { color_rgba(r[0], g[0], b[0], 255), color_rgba(r[1], g[1], b[1], 255) }; + + if (!etc_block::try_pack_color5_delta3(colors)) + { + all_passed5 = false; + break; + } + + if ((pass == 1) && (all_passed5)) + { + blk.set_block_color5(colors[0], colors[1]); + blk.set_inten_table(subblock_index, cluster_optimizer_results[0].m_block_inten_table); + + subblock_params.m_color_error[0] = cluster_optimizer_results[0].m_error; + subblock_params.m_inten_table[0] = cluster_optimizer_results[0].m_block_inten_table; + subblock_params.m_color_unscaled[0] = cluster_optimizer_results[0].m_block_color_unscaled; + + total_subblocks_refined++; + } + } + + } // subblock_iter + + } // pass + + } // use_individual_mode + + } // endpoint_cluster_index + + if (m_params.m_debug_stats) + debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined); + + return total_subblocks_refined; + } + + void basisu_frontend::dump_endpoint_clusterization_visualization(const char *pFilename) + { + debug_printf("dump_endpoint_clusterization_visualization\n"); + + uint32_t max_endpoint_cluster_size = 0; + + std::vector cluster_sizes(m_endpoint_clusters.size()); + std::vector sorted_cluster_indices(m_endpoint_clusters.size()); + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) + { + max_endpoint_cluster_size = maximum(max_endpoint_cluster_size, (uint32_t)m_endpoint_clusters[i].size()); + cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size(); + } + + if (!max_endpoint_cluster_size) + return; + + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) + sorted_cluster_indices[i] = i; + + //indexed_heap_sort(endpoint_clusters.size(), cluster_sizes.get_ptr(), sorted_cluster_indices.get_ptr()); + + image endpoint_cluster_vis(12 + minimum(max_endpoint_cluster_size, 2048) * 5, (uint32_t)m_endpoint_clusters.size() * 3); + + for (uint32_t unsorted_cluster_iter = 0; unsorted_cluster_iter < m_endpoint_clusters.size(); unsorted_cluster_iter++) + { + const uint32_t cluster_iter = sorted_cluster_indices[unsorted_cluster_iter]; + + etc_block blk; + blk.clear(); + blk.set_flip_bit(false); + blk.set_diff_bit(true); + blk.set_both_inten_tables(m_endpoint_cluster_etc_params[cluster_iter].m_inten_table[0]); + blk.set_base5_color(etc_block::pack_color5(m_endpoint_cluster_etc_params[cluster_iter].m_color_unscaled[0], false)); + + color_rgba blk_colors[4]; + blk.get_block_colors(blk_colors, 0); + for (uint32_t i = 0; i < 4; i++) + endpoint_cluster_vis.fill_box(i * 2, 3 * unsorted_cluster_iter, 2, 2, blk_colors[i]); + + for (uint32_t subblock_iter = 0; subblock_iter < m_endpoint_clusters[cluster_iter].size(); subblock_iter++) + { + uint32_t training_vector_index = m_endpoint_clusters[cluster_iter][subblock_iter]; + + const uint32_t block_index = training_vector_index >> 1; + const uint32_t subblock_index = training_vector_index & 1; + + const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); + + const etc_block &blk2 = m_etc1_blocks_etc1s[block_index]; + + color_rgba subblock_pixels[8]; + for (uint32_t i = 0; i < 8; i++) + subblock_pixels[i] = pBlock_pixels[g_etc1_pixel_indices[blk2.get_flip_bit()][subblock_index][i]]; + + endpoint_cluster_vis.set_block_clipped(subblock_pixels, 12 + 5 * subblock_iter, 3 * unsorted_cluster_iter, 4, 2); + } + } + + save_png(pFilename, endpoint_cluster_vis); + } + + void basisu_frontend::finalize() + { + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const bool diff_flag = get_diff_flag(block_index); + + for (uint32_t subblock_index = 0; subblock_index < 2; subblock_index++) + { + const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, subblock_index); + + m_endpoint_cluster_etc_params[endpoint_cluster_index].m_color_used[!diff_flag] = true; + } + } + } + + bool basisu_frontend::validate_output() const + { + debug_printf("validate_output\n"); + + if (!check_etc1s_constraints()) + return false; + + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + if (!get_output_block(block_index).get_flip_bit()) + return false; + + const bool diff_flag = get_diff_flag(block_index); + + etc_block blk; + memset(&blk, 0, sizeof(blk)); + blk.set_flip_bit(true); + blk.set_diff_bit(diff_flag); + + const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0); + const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1); + +#define CHECK(x) do { if (!(x)) return false; } while(0) + + CHECK(endpoint_cluster0_index == endpoint_cluster1_index); + + if (diff_flag) + { + CHECK(blk.set_block_color5_check(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false))); + + CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, false)); + CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster1_index, false)); + } + else + { + blk.set_block_color4(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, true), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, true)); + + CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, true)); + CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster1_index, true)); + } + + blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, !diff_flag)); + blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, !diff_flag)); + + const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index); + + CHECK(vector_find(get_selector_cluster_block_indices(selector_cluster_index), block_index) != -1); + + blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits()); + + const etc_block &rdo_output_block = get_output_block(block_index); + + CHECK(rdo_output_block.get_flip_bit() == blk.get_flip_bit()); + CHECK(rdo_output_block.get_diff_bit() == blk.get_diff_bit()); + CHECK(rdo_output_block.get_inten_table(0) == blk.get_inten_table(0)); + CHECK(rdo_output_block.get_inten_table(1) == blk.get_inten_table(1)); + if (diff_flag) + { + CHECK(rdo_output_block.get_base5_color() == blk.get_base5_color()); + CHECK(rdo_output_block.get_delta3_color() == blk.get_delta3_color()); + } + else + { + CHECK(rdo_output_block.get_base4_color(0) == blk.get_base4_color(0)); + CHECK(rdo_output_block.get_base4_color(1) == blk.get_base4_color(1)); + } + CHECK(rdo_output_block.get_raw_selector_bits() == blk.get_raw_selector_bits()); + + if (m_params.m_pGlobal_sel_codebook) + { + bool used_global_cb = true; + if (m_params.m_use_hybrid_selector_codebooks) + used_global_cb = m_selector_cluster_uses_global_cb[selector_cluster_index]; + + if (used_global_cb) + { + basist::etc1_global_selector_codebook_entry_id pal_id(get_selector_cluster_global_selector_entry_ids()[selector_cluster_index]); + + basist::etc1_selector_palette_entry pal_entry(m_params.m_pGlobal_sel_codebook->get_entry(pal_id)); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + CHECK(pal_entry(x, y) == blk.get_selector(x, y)); + } + } + } + } + +#undef CHECK + } + + return true; + } + + void basisu_frontend::dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks) + { + gpu_image g; + g.init(cETC1, num_blocks_x * 4, num_blocks_y * 4); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + for (uint32_t x = 0; x < num_blocks_x; x++) + { + const uint32_t block_index = first_block + x + y * num_blocks_x; + + etc_block &blk = *(etc_block *)g.get_block_ptr(x, y); + + if (output_blocks) + blk = get_output_block(block_index); + else + { + const bool diff_flag = get_diff_flag(block_index); + + blk.set_diff_bit(diff_flag); + blk.set_flip_bit(true); + + const uint32_t endpoint_cluster0_index = get_subblock_endpoint_cluster_index(block_index, 0); + const uint32_t endpoint_cluster1_index = get_subblock_endpoint_cluster_index(block_index, 1); + + if (diff_flag) + blk.set_block_color5(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false)); + else + blk.set_block_color4(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, true), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, true)); + + blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, !diff_flag)); + blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, !diff_flag)); + + const uint32_t selector_cluster_index = get_block_selector_cluster_index(block_index); + blk.set_raw_selector_bits(get_selector_cluster_selector_bits(selector_cluster_index).get_raw_selector_bits()); + } + } + } + + image img; + g.unpack(img); + + save_png(pFilename, img); + } + +} // namespace basisu + diff --git a/basisu_frontend.h b/basisu_frontend.h new file mode 100644 index 0000000..b25249f --- /dev/null +++ b/basisu_frontend.h @@ -0,0 +1,299 @@ +// basisu_frontend.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_enc.h" +#include "basisu_etc.h" +#include "basisu_gpu_texture.h" +#include "basisu_global_selector_palette_helpers.h" + +namespace basisu +{ + struct vec2U + { + uint32_t m_comps[2]; + + vec2U() { } + vec2U(uint32_t a, uint32_t b) { set(a, b); } + + void set(uint32_t a, uint32_t b) { m_comps[0] = a; m_comps[1] = b; } + + uint32_t operator[] (uint32_t i) const { assert(i < 2); return m_comps[i]; } + uint32_t &operator[] (uint32_t i) { assert(i < 2); return m_comps[i]; } + }; + + class basisu_frontend + { + BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basisu_frontend); + + public: + + basisu_frontend() : + m_total_blocks(0), + m_total_pixels(0) + { + } + + enum + { + cMaxEndpointClusterBits = 13, + cMaxEndpointClusters = 1 << cMaxEndpointClusterBits, + + cMaxEndpointClusterBitsRDO = 15, + cMaxEndpointClustersRDO = 1 << cMaxEndpointClusterBitsRDO, + + cMaxSelectorClusterBits = 13, + cMaxSelectorClusters = (1 << cMaxSelectorClusterBits) - 256, // because of how the delta selector huff symbols are coded alongside an RLE and selector history buffer syms + + cMaxSelectorClusterBitsRDO = 15, + cMaxSelectorClustersRDO = (1 << cMaxSelectorClusterBitsRDO) // because of how the delta selector huff symbols are coded alongside an RLE and selector history buffer syms + }; + + struct params + { + params() : + m_num_source_blocks(0), + m_pSource_blocks(NULL), + m_max_endpoint_clusters(256), + m_max_selector_clusters(256), + m_endpoint_refinement(true), + m_perceptual(true), + m_debug_stats(false), + m_debug_images(false), + m_dump_endpoint_clusterization(false), + m_faster(false), + m_pGlobal_sel_codebook(NULL), + m_num_global_sel_codebook_pal_bits(0), + m_num_global_sel_codebook_mod_bits(0), + m_use_hybrid_selector_codebooks(false), + m_hybrid_codebook_quality_thresh(0.0f), + m_validate(false) + { + } + + uint32_t m_num_source_blocks; + pixel_block *m_pSource_blocks; + + uint32_t m_max_endpoint_clusters; + uint32_t m_max_selector_clusters; + + bool m_faster; + bool m_endpoint_refinement; + bool m_perceptual; + bool m_debug_stats; + bool m_debug_images; + bool m_dump_endpoint_clusterization; + bool m_validate; + + const basist::etc1_global_selector_codebook *m_pGlobal_sel_codebook; + uint32_t m_num_global_sel_codebook_pal_bits; + uint32_t m_num_global_sel_codebook_mod_bits; + bool m_use_hybrid_selector_codebooks; + float m_hybrid_codebook_quality_thresh; + }; + + bool init(const params &p); + + bool compress(); + + const params &get_params() const { return m_params; } + + const pixel_block &get_source_pixel_block(uint32_t i) const { assert(i < m_params.m_num_source_blocks); return m_params.m_pSource_blocks[i]; } + + // RDO output blocks + uint32_t get_total_output_blocks() const { return static_cast(m_encoded_blocks.size()); } + + const etc_block &get_output_block(uint32_t block_index) const { return m_encoded_blocks[block_index]; } + const etc_block_vec &get_output_blocks() const { return m_encoded_blocks; } + + // "Best" ETC1S blocks + const etc_block &get_etc1s_block(uint32_t block_index) const { return m_etc1_blocks_etc1s[block_index]; } + + // Per-block flags + bool get_diff_flag(uint32_t block_index) const { return m_encoded_blocks[block_index].get_diff_bit(); } + + // Endpoint clusters + uint32_t get_total_endpoint_clusters() const { return static_cast(m_endpoint_clusters.size()); } + uint32_t get_subblock_endpoint_cluster_index(uint32_t block_index, uint32_t subblock_index) const { return m_block_endpoint_clusters_indices[block_index][subblock_index]; } + + const color_rgba&get_endpoint_cluster_unscaled_color(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_unscaled[individual_mode]; } + uint32_t get_endpoint_cluster_inten_table(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_inten_table[individual_mode]; } + + bool get_endpoint_cluster_color_is_used(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_used[individual_mode]; } + + // Returns subblock indices using each endpoint cluster (block_index*2+subblock_index) + const uint_vec &get_endpoint_cluster_subblock_indices(uint32_t endpoint_cluster_index) const { return m_endpoint_cluster_etc_params[endpoint_cluster_index].m_subblocks; } + + // Selector clusters + uint32_t get_total_selector_clusters() const { return static_cast(m_selector_cluster_indices.size()); } + uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } + const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } + + const basist::etc1_global_selector_codebook_entry_id_vec &get_selector_cluster_global_selector_entry_ids() const { return m_optimized_cluster_selector_global_cb_ids; } + const bool_vec &get_selector_cluster_uses_global_cb_vec() const { return m_selector_cluster_uses_global_cb; } + + // Returns block indices using each selector cluster + const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_indices[selector_cluster_index]; } + + void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); + + private: + params m_params; + uint32_t m_total_blocks; + uint32_t m_total_pixels; + + etc_block_vec m_encoded_blocks; + etc_block_vec m_orig_encoded_blocks; // encoded blocks after endpoint quant, but before selector quant + + etc_block_vec m_etc1_blocks_etc1s; + pixel_block_vec m_etc1_blocks_etc1s_unpacked; + + typedef vec<6, float> vec6F; + + typedef tree_vector_quant vec6F_quantizer; + vec6F_quantizer m_endpoint_clusterizer; + + std::vector m_endpoint_clusters; + + struct endpoint_cluster_etc_params + { + endpoint_cluster_etc_params() + { + clear(); + } + + void clear() + { + clear_obj(m_color_unscaled); + clear_obj(m_inten_table); + clear_obj(m_color_error); + m_subblocks.clear(); + + clear_obj(m_color_used); + m_valid = false; + } + + // TODO: basisu doesn't use individual mode. + color_rgba m_color_unscaled[2]; // [use_individual_mode] + uint32_t m_inten_table[2]; + + uint64_t m_color_error[2]; + + uint_vec m_subblocks; + + bool m_color_used[2]; + + bool m_valid; + + bool operator== (const endpoint_cluster_etc_params &other) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_color_unscaled[i] != other.m_color_unscaled[i]) + return false; + } + + if (m_inten_table[0] != other.m_inten_table[0]) + return false; + if (m_inten_table[1] != other.m_inten_table[1]) + return false; + + return true; + } + + bool operator< (const endpoint_cluster_etc_params &other) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_color_unscaled[i] < other.m_color_unscaled[i]) + return true; + else if (m_color_unscaled[i] != other.m_color_unscaled[i]) + return false; + } + + if (m_inten_table[0] < other.m_inten_table[0]) + return true; + else if (m_inten_table[0] == other.m_inten_table[0]) + { + if (m_inten_table[1] < other.m_inten_table[1]) + return true; + } + + return false; + } + }; + + typedef std::vector cluster_subblock_etc_params_vec; + cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; + + std::vector m_block_endpoint_clusters_indices; + + // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! + std::vector m_selector_cluster_indices; + + std::vector m_optimized_cluster_selectors; + basist::etc1_global_selector_codebook_entry_id_vec m_optimized_cluster_selector_global_cb_ids; + bool_vec m_selector_cluster_uses_global_cb; + + std::vector m_block_selector_cluster_index; + + struct subblock_endpoint_quant_err + { + uint64_t m_total_err; + uint32_t m_cluster_index; + uint32_t m_cluster_subblock_index; + uint32_t m_block_index; + uint32_t m_subblock_index; + + bool operator< (const subblock_endpoint_quant_err &rhs) const + { + if (m_total_err < rhs.m_total_err) + return true; + else if (m_total_err == rhs.m_total_err) + { + if (m_block_index < rhs.m_block_index) + return true; + else if (m_block_index == rhs.m_block_index) + return m_subblock_index < rhs.m_subblock_index; + } + return false; + } + }; + + std::vector m_subblock_endpoint_quant_err_vec; + + //----------------------------------------------------------------------------- + + void init_etc1_images(); + void init_endpoint_training_vectors(); + void dump_endpoint_clusterization_visualization(const char *pFilename); + void generate_endpoint_clusters(); + void compute_endpoint_subblock_error_vec(); + void introduce_new_endpoint_clusters(); + void generate_endpoint_codebook(uint32_t step); + uint32_t refine_endpoint_clusterization(); + void eliminate_redundant_or_empty_endpoint_clusters(); + void generate_block_endpoint_clusters(); + void create_initial_packed_texture(); + void create_selector_clusters(); + void create_optimized_selector_codebook(uint32_t iter); + void find_optimal_selector_clusters_for_each_block(); + uint32_t refine_block_endpoints_given_selectors(); + void finalize(); + bool validate_output() const; + void optimize_selector_codebook(); + bool check_etc1s_constraints() const; + }; + +} // namespace basisu diff --git a/basisu_global_selector_palette_helpers.cpp b/basisu_global_selector_palette_helpers.cpp new file mode 100644 index 0000000..593287c --- /dev/null +++ b/basisu_global_selector_palette_helpers.cpp @@ -0,0 +1,71 @@ +// basiu_global_selector_palette_helpers.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_global_selector_palette_helpers.h" + +namespace basisu +{ + uint64_t etc1_global_selector_codebook_find_best_entry(const basist::etc1_global_selector_codebook &codebook, + uint32_t num_src_pixel_blocks, const pixel_block *pSrc_pixel_blocks, const etc_block *pBlock_endpoints, + uint32_t &palette_index, basist::etc1_global_palette_entry_modifier &palette_modifier, + bool perceptual, uint32_t max_pal_entries, uint32_t max_modifiers) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_pal_index = 0; + basist::etc1_global_palette_entry_modifier best_pal_modifier; + + if (!max_pal_entries) + max_pal_entries = codebook.size(); + + if (!max_modifiers) + max_modifiers = basist::etc1_global_palette_entry_modifier::cTotalValues; + + for (uint32_t pal_index = 0; pal_index < max_pal_entries; pal_index++) + { + for (uint32_t mod_index = 0; mod_index < max_modifiers; mod_index++) + { + const basist::etc1_global_palette_entry_modifier pal_modifier(mod_index); + + const basist::etc1_selector_palette_entry pal_entry(codebook.get_entry(pal_index, pal_modifier)); + + uint64_t trial_err = 0; + for (uint32_t block_index = 0; block_index < num_src_pixel_blocks; block_index++) + { + etc_block trial_block(pBlock_endpoints[block_index]); + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + trial_block.set_selector(x, y, pal_entry(x, y)); + + trial_err += trial_block.evaluate_etc1_error(reinterpret_cast(pSrc_pixel_blocks[block_index].get_ptr()), perceptual); + if (trial_err >= best_err) + break; + } + + if (trial_err < best_err) + { + best_err = trial_err; + best_pal_index = pal_index; + best_pal_modifier = pal_modifier; + } + } // mod_index + } // pal_index + + palette_index = best_pal_index; + palette_modifier = best_pal_modifier; + + return best_err; + } + +} // namespace basisu diff --git a/basisu_global_selector_palette_helpers.h b/basisu_global_selector_palette_helpers.h new file mode 100644 index 0000000..072d9ea --- /dev/null +++ b/basisu_global_selector_palette_helpers.h @@ -0,0 +1,46 @@ +// File: basisu_global_selector_palette_helpers.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "transcoder/basisu.h" +#include "basisu_etc.h" +#include "transcoder/basisu_global_selector_palette.h" + +namespace basisu +{ + const uint32_t cPixelBlockWidth = 4; + const uint32_t cPixelBlockHeight = 4; + const uint32_t cPixelBlockTotalPixels = cPixelBlockWidth * cPixelBlockHeight; + + struct pixel_block + { + color_rgba m_pixels[cPixelBlockHeight][cPixelBlockWidth]; // [y][x] + + const color_rgba &operator() (uint32_t x, uint32_t y) const { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + color_rgba &operator() (uint32_t x, uint32_t y) { assert((x < cPixelBlockWidth) && (y < cPixelBlockHeight)); return m_pixels[y][x]; } + + const color_rgba *get_ptr() const { return &m_pixels[0][0]; } + color_rgba *get_ptr() { return &m_pixels[0][0]; } + + void clear() { clear_obj(*this); } + }; + typedef std::vector pixel_block_vec; + + uint64_t etc1_global_selector_codebook_find_best_entry(const basist::etc1_global_selector_codebook &codebook, + uint32_t num_src_pixel_blocks, const pixel_block *pSrc_pixel_blocks, const etc_block *pBlock_endpoints, + uint32_t &palette_index, basist::etc1_global_palette_entry_modifier &palette_modifier, + bool perceptual, uint32_t max_pal_entries, uint32_t max_modifiers); + +} // namespace basisu \ No newline at end of file diff --git a/basisu_gpu_texture.cpp b/basisu_gpu_texture.cpp new file mode 100644 index 0000000..2a8cd9f --- /dev/null +++ b/basisu_gpu_texture.cpp @@ -0,0 +1,337 @@ +// basisu_gpu_texture.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_gpu_texture.h" +#include "detex/decompress_bc.h" +#include "detex/decompress_bc7.h" +#include "detex/decompress_eac.h" +#include "basisu_enc.h" +#include "basisu_pvrtc1_4.h" + +namespace basisu +{ + // Unpacks to RGBA, R, RG, or A + void unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels) + { + switch (fmt) + { + case cBC1: + { + if (detexGetModeBC1((uint8_t*)pBlock)) + detexDecompressBlockBC1A((uint8_t*)pBlock, 0, (uint8_t*)pPixels); + else + detexDecompressBlockBC1((uint8_t*)pBlock, 0, (uint8_t*)pPixels); + break; + } + case cBC3: + { + detexDecompressBlockBC3((uint8_t*)pBlock, 0, (uint8_t*)pPixels); + break; + } + case cBC4: + { + // Unpack to R + detexDecompressBlockBC4((uint8_t*)pBlock, 0, (uint8_t*)pPixels, sizeof(color_rgba)); + break; + } + case cBC5: + { + // Unpack to RG + detexDecompressBlockBC4((uint8_t*)pBlock, 0, (uint8_t*)pPixels, sizeof(color_rgba)); + detexDecompressBlockBC4((uint8_t*)pBlock + sizeof(uint64_t), 0, (uint8_t*)pPixels + 1, sizeof(color_rgba)); + break; + } + case cBC7: + { + detexDecompressBlockBPTC((const uint8_t*)pBlock, UINT32_MAX, 0, (uint8_t*)pPixels); + break; + } + // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color) + case cETC2_RGB: + case cETC1: + case cETC1S: + { + unpack_etc1(*static_cast(pBlock), pPixels); + break; + } + case cETC2_RGBA: + { + unpack_etc1(static_cast(pBlock)[1], pPixels); + detexDecompressBlockETC2_EAC((const uint8_t*)pBlock, (uint8_t*)pPixels + 3, sizeof(color_rgba)); + break; + } + case cETC2_ALPHA: + { + // Unpack to A + detexDecompressBlockETC2_EAC((const uint8_t*)pBlock, (uint8_t*)pPixels + 3, sizeof(color_rgba)); + break; + } + default: + { + assert(0); + // TODO + break; + } + } + } + + bool gpu_image::unpack(image& img, bool pvrtc_wrap_addressing) const + { + img.resize(get_width(), get_height()); + img.set_all(g_black_color); + + if (!img.get_width() || !img.get_height()) + return true; + + if ((m_fmt == cPVRTC1_4_RGB) || (m_fmt == cPVRTC1_4_RGBA)) + { + if (!is_pow2(m_width) || !is_pow2(m_height)) + { + // PVRTC1 images must use power of 2 dimensions + return false; + } + + pvrtc4_image pi(m_width, m_height, pvrtc_wrap_addressing); + + if (get_total_blocks() != pi.get_total_blocks()) + return false; + + memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes()); + + pi.deswizzle(); + + pi.unpack_all_pixels(img); + + return true; + } + + color_rgba pixels[cMaxBlockSize * cMaxBlockSize]; + for (uint32_t i = 0; i < cMaxBlockSize * cMaxBlockSize; i++) + pixels[i] = g_black_color; + + for (uint32_t by = 0; by < m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + const void* pBlock = get_block_ptr(bx, by); + + unpack_block(m_fmt, pBlock, pixels); + + img.set_block_clipped(pixels, bx * m_block_width, by * m_block_height, m_block_width, m_block_height); + } // bx + } // by + + return true; + } + + static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; + + // KTX/GL enums + enum + { + KTX_ENDIAN = 0x04030201, + KTX_OPPOSITE_ENDIAN = 0x01020304, + KTX_ETC1_RGB8_OES = 0x8D64, + KTX_RED = 0x1903, + KTX_RG = 0x8227, + KTX_RGB = 0x1907, + KTX_RGBA = 0x1908, + KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0, + KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3, + KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB, + KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD, + KTX_COMPRESSED_RGB8_ETC2 = 0x9274, + KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278, + KTX_COMPRESSED_RGBA_BPTC_UNORM_ARB, + KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00, + KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG, + }; + + struct ktx_header + { + uint8_t m_identifier[12]; + packed_uint<4> m_endianness; + packed_uint<4> m_glType; + packed_uint<4> m_glTypeSize; + packed_uint<4> m_glFormat; + packed_uint<4> m_glInternalFormat; + packed_uint<4> m_glBaseInternalFormat; + packed_uint<4> m_pixelWidth; + packed_uint<4> m_pixelHeight; + packed_uint<4> m_pixelDepth; + packed_uint<4> m_numberOfArrayElements; + packed_uint<4> m_numberOfFaces; + packed_uint<4> m_numberOfMipmapLevels; + packed_uint<4> m_bytesOfKeyValueData; + + void clear() { clear_obj(*this); } + }; + + bool create_ktx_texture_file(uint8_vec &ktx_data, const gpu_image_vec& g) + { + if (!g.size()) + { + assert(0); + return false; + } + + uint32_t internal_fmt = KTX_ETC1_RGB8_OES, base_internal_fmt = KTX_RGB; + + switch (g[0].get_format()) + { + case cBC1: + { + internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT; + break; + } + case cBC3: + { + internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT; + base_internal_fmt = KTX_RGBA; + break; + } + case cBC4: + { + internal_fmt = KTX_COMPRESSED_RED_RGTC1_EXT;// KTX_COMPRESSED_LUMINANCE_LATC1_EXT; + base_internal_fmt = KTX_RED; + break; + } + case cBC5: + { + internal_fmt = KTX_COMPRESSED_RED_GREEN_RGTC2_EXT; + base_internal_fmt = KTX_RG; + break; + } + case cETC1: + case cETC1S: + { + internal_fmt = KTX_ETC1_RGB8_OES; + break; + } + case cETC2_RGB: + { + internal_fmt = KTX_COMPRESSED_RGB8_ETC2; + break; + } + case cETC2_RGBA: + { + internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC; + base_internal_fmt = KTX_RGBA; + break; + } + case cBC7: + { + internal_fmt = KTX_COMPRESSED_RGBA_BPTC_UNORM_ARB; + base_internal_fmt = KTX_RGBA; + break; + } + case cPVRTC1_4_RGB: + { + internal_fmt = KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG; + break; + } + case cPVRTC1_4_RGBA: + { + internal_fmt = KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG; + base_internal_fmt = KTX_RGBA; + break; + } + default: + { + // TODO + assert(0); + return false; + } + } + + ktx_header header; + header.clear(); + memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id)); + header.m_endianness = KTX_ENDIAN; + header.m_pixelWidth = g[0].get_width(); + header.m_pixelHeight = g[0].get_height(); + header.m_glInternalFormat = internal_fmt; + header.m_glBaseInternalFormat = base_internal_fmt; + header.m_numberOfMipmapLevels = (uint32_t)g.size(); + header.m_numberOfFaces = 1; + + append_vector(ktx_data, (uint8_t *)&header, sizeof(header)); + + for (uint32_t level = 0; level < g.size(); level++) + { + const gpu_image& img = g[level]; + + if (level) + { + if ( (img.get_format() != g[0].get_format()) || + (img.get_width() != maximum(1, g[0].get_width() >> level)) || + (img.get_height() != maximum(1, g[0].get_height() >> level)) ) + { + // Bad input + assert(0); + return false; + } + } + + packed_uint<4> img_size = (uint32_t)img.get_size_in_bytes(); + + assert(img_size && ((img_size & 3) == 0)); + + append_vector(ktx_data, (uint8_t *)&img_size, sizeof(img_size)); + + append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes()); + } + + return true; + } + + bool write_compressed_texture_file(const char* pFilename, const gpu_image_vec& g) + { + std::string extension(string_tolower(string_get_extension(pFilename))); + + uint8_vec filedata; + if (extension == "ktx") + { + if (!create_ktx_texture_file(filedata, g)) + return false; + } + else if (extension == "pvr") + { + // TODO + return false; + } + else if (extension == "dds") + { + // TODO + return false; + } + else + { + // unsupported texture format + assert(0); + return false; + } + + return basisu::write_vec_to_file(pFilename, filedata); + } + + bool write_compressed_texture_file(const char* pFilename, const gpu_image& g) + { + gpu_image_vec v; + v.push_back(g); + return write_compressed_texture_file(pFilename, v); + } + +} // basisu + diff --git a/basisu_gpu_texture.h b/basisu_gpu_texture.h new file mode 100644 index 0000000..7d186bc --- /dev/null +++ b/basisu_gpu_texture.h @@ -0,0 +1,125 @@ +// basisu_gpu_texture.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "transcoder/basisu.h" +#include "basisu_etc.h" + +namespace basisu +{ + // GPU texture image + + class gpu_image + { + public: + enum { cMaxBlockSize = 12 }; + + gpu_image() + { + clear(); + } + + gpu_image(texture_format fmt, uint32_t width, uint32_t height) + { + init(fmt, width, height); + } + + void clear() + { + m_fmt = cInvalidTextureFormat; + m_width = 0; + m_height = 0; + m_block_width = 0; + m_block_height = 0; + m_blocks_x = 0; + m_blocks_y = 0; + m_qwords_per_block = 0; + m_blocks.clear(); + } + + inline texture_format get_format() const { return m_fmt; } + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_blocks_x() const { return m_blocks_x; } + inline uint32_t get_blocks_y() const { return m_blocks_y; } + inline uint32_t get_block_width() const { return m_block_width; } + inline uint32_t get_block_height() const { return m_block_height; } + inline uint32_t get_qwords_per_block() const { return m_qwords_per_block; } + inline uint32_t get_total_blocks() const { return m_blocks_x * m_blocks_y; } + + inline const uint64_vec &get_blocks() const { return m_blocks; } + + inline const uint64_t *get_ptr() const { return &m_blocks[0]; } + inline uint64_t *get_ptr() { return &m_blocks[0]; } + + inline uint32_t get_size_in_bytes() const { return get_total_blocks() * get_qwords_per_block() * sizeof(uint64_t); } + + inline const void *get_block_ptr(uint32_t block_x, uint32_t block_y, uint32_t element_index = 0) const + { + assert(block_x < m_blocks_x && block_y < m_blocks_y); + return &m_blocks[(block_x + block_y * m_blocks_x) * m_qwords_per_block + element_index]; + } + + inline void *get_block_ptr(uint32_t block_x, uint32_t block_y, uint32_t element_index = 0) + { + assert(block_x < m_blocks_x && block_y < m_blocks_y && element_index < m_qwords_per_block); + return &m_blocks[(block_x + block_y * m_blocks_x) * m_qwords_per_block + element_index]; + } + + void init(texture_format fmt, uint32_t width, uint32_t height) + { + m_fmt = fmt; + m_width = width; + m_height = height; + m_block_width = basisu::get_block_width(m_fmt); + m_block_height = basisu::get_block_height(m_fmt); + m_blocks_x = (m_width + m_block_width - 1) / m_block_width; + m_blocks_y = (m_height + m_block_height - 1) / m_block_height; + m_qwords_per_block = basisu::get_qwords_per_block(m_fmt); + + m_blocks.resize(0); + m_blocks.resize(m_blocks_x * m_blocks_y * m_qwords_per_block); + } + + bool unpack(image& img, bool pvrtc_wrap_addressing = true) const; + + void override_dimensions(uint32_t w, uint32_t h) + { + m_width = w; + m_height = h; + } + + private: + texture_format m_fmt; + uint32_t m_width, m_height, m_blocks_x, m_blocks_y, m_block_width, m_block_height, m_qwords_per_block; + uint64_vec m_blocks; + }; + + typedef std::vector gpu_image_vec; + + // KTX file writing + + bool create_ktx_texture_file(uint8_vec &ktx_data, const gpu_image_vec& g); + + bool write_compressed_texture_file(const char *pFilename, const gpu_image& g); + bool write_compressed_texture_file(const char *pFilename, const gpu_image_vec& g); + + inline bool write_compressed_texture_file(const std::string &filename, const gpu_image &g) { return write_compressed_texture_file(filename.c_str(), g); } + inline bool write_compressed_texture_file(const std::string &filename, const gpu_image_vec &g) { return write_compressed_texture_file(filename.c_str(), g); } + + // GPU texture block unpacking + + void unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels); + +} // namespace basisu \ No newline at end of file diff --git a/basisu_pvrtc1_4.cpp b/basisu_pvrtc1_4.cpp new file mode 100644 index 0000000..e5dabd7 --- /dev/null +++ b/basisu_pvrtc1_4.cpp @@ -0,0 +1,287 @@ +// basisu_pvrtc1_4.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_pvrtc1_4.h" + +namespace basisu +{ + uint32_t pvrtc4_swizzle_uv(uint32_t width, uint32_t height, uint32_t x, uint32_t y) + { + assert((x < width) && (y < height) && basisu::is_pow2(height) && basisu::is_pow2(width)); + + uint32_t min_d = width, max_v = y; + if (height < width) + { + min_d = height; + max_v = x; + } + + // Interleave the XY LSB's + uint32_t shift_ofs = 0, swizzled = 0; + for (uint32_t s_bit = 1, d_bit = 1; s_bit < min_d; s_bit <<= 1, d_bit <<= 2, ++shift_ofs) + { + if (y & s_bit) swizzled |= d_bit; + if (x & s_bit) swizzled |= (2 * d_bit); + } + + max_v >>= shift_ofs; + + // OR in the rest of the bits from the largest dimension + swizzled |= (max_v << (2 * shift_ofs)); + + return swizzled; + } + + color_rgba pvrtc4_block::get_endpoint(uint32_t endpoint_index, bool unpack) const + { + assert(endpoint_index < 2); + const uint32_t packed = m_endpoints >> (endpoint_index * 16); + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + if (!endpoint_index) + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = (packed >> 1) & 15; + + if (unpack) + { + b = (b << 1) | (b >> 3); + } + } + else + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + } + + a = unpack ? 255 : 7; + } + else + { + // translucent 4433 or 4443 + if (!endpoint_index) + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = (packed >> 1) & 7; + + if (unpack) + { + a = (a << 1); + a = (a << 4) | a; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 2) | (b >> 1); + } + } + else + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = packed & 15; + + if (unpack) + { + a = (a << 1); + a = (a << 4) | a; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 1) | (b >> 3); + } + } + } + + if (unpack) + { + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + } + + assert((r < 256) && (g < 256) && (b < 256) && (a < 256)); + + return color_rgba(r, g, b, a); + } + + color_rgba pvrtc4_block::get_endpoint_5554(uint32_t endpoint_index) const + { + assert(endpoint_index < 2); + const uint32_t packed = m_endpoints >> (endpoint_index * 16); + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + if (!endpoint_index) + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = (packed >> 1) & 15; + + b = (b << 1) | (b >> 3); + } + else + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + } + + a = 15; + } + else + { + // translucent 4433 or 4443 + if (!endpoint_index) + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = (packed >> 1) & 7; + + a = a << 1; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 2) | (b >> 1); + } + else + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = packed & 15; + + a = a << 1; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 1) | (b >> 3); + } + } + + assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); + + return color_rgba(r, g, b, a); + } + + bool pvrtc4_image::get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const + { + assert((x < m_width) && (y < m_height)); + + int block_x0 = (static_cast(x) - 2) >> 2; + int block_x1 = block_x0 + 1; + int block_y0 = (static_cast(y) - 2) >> 2; + int block_y1 = block_y0 + 1; + if (m_wrap_addressing) + { + block_x0 = posmod(block_x0, m_block_width); + block_x1 = posmod(block_x1, m_block_width); + block_y0 = posmod(block_y0, m_block_height); + block_y1 = posmod(block_y1, m_block_height); + } + else + { + block_x0 = clamp(block_x0, 0, m_block_width - 1); + block_x1 = clamp(block_x1, 0, m_block_width - 1); + block_y0 = clamp(block_y0, 0, m_block_height - 1); + block_y1 = clamp(block_y1, 0, m_block_height - 1); + } + + pColors[0] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + pColors[3] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) + { + for (uint32_t c = 0; c < 4; c++) + { + uint32_t m = (pColors[0][c] + pColors[3][c]) / 2; + pColors[1][c] = static_cast(m); + pColors[2][c] = static_cast(m); + } + pColors[2][3] = 0; + return true; + } + + for (uint32_t c = 0; c < 4; c++) + { + pColors[1][c] = static_cast((pColors[0][c] * 5 + pColors[3][c] * 3) / 8); + pColors[2][c] = static_cast((pColors[0][c] * 3 + pColors[3][c] * 5) / 8); + } + + return false; + } + + color_rgba pvrtc4_image::get_pixel(uint32_t x, uint32_t y, uint32_t m) const + { + assert((x < m_width) && (y < m_height)); + + int block_x0 = (static_cast(x) - 2) >> 2; + int block_x1 = block_x0 + 1; + int block_y0 = (static_cast(y) - 2) >> 2; + int block_y1 = block_y0 + 1; + if (m_wrap_addressing) + { + block_x0 = posmod(block_x0, m_block_width); + block_x1 = posmod(block_x1, m_block_width); + block_y0 = posmod(block_y0, m_block_height); + block_y1 = posmod(block_y1, m_block_height); + } + else + { + block_x0 = clamp(block_x0, 0, m_block_width - 1); + block_x1 = clamp(block_x1, 0, m_block_width - 1); + block_y0 = clamp(block_y0, 0, m_block_height - 1); + block_y1 = clamp(block_y1, 0, m_block_height - 1); + } + + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) + { + if (m == 0) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + else if (m == 3) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); + color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); + + return color_rgba((l[0] + h[0]) / 2, (l[1] + h[1]) / 2, (l[2] + h[2]) / 2, (m == 2) ? 0 : (l[3] + h[3]) / 2); + } + else + { + if (m == 0) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + else if (m == 3) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); + color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); + + if (m == 2) + return color_rgba((l[0] * 3 + h[0] * 5) / 8, (l[1] * 3 + h[1] * 5) / 8, (l[2] * 3 + h[2] * 5) / 8, (l[3] * 3 + h[3] * 5) / 8); + else + return color_rgba((l[0] * 5 + h[0] * 3) / 8, (l[1] * 5 + h[1] * 3) / 8, (l[2] * 5 + h[2] * 3) / 8, (l[3] * 5 + h[3] * 3) / 8); + } + } + +} // basisu \ No newline at end of file diff --git a/basisu_pvrtc1_4.h b/basisu_pvrtc1_4.h new file mode 100644 index 0000000..981701c --- /dev/null +++ b/basisu_pvrtc1_4.h @@ -0,0 +1,316 @@ +// basisu_pvrtc1_4.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_gpu_texture.h" + +namespace basisu +{ + enum + { + PVRTC2_MIN_WIDTH = 16, + PVRTC2_MIN_HEIGHT = 8, + PVRTC4_MIN_WIDTH = 8, + PVRTC4_MIN_HEIGHT = 8 + }; + + struct pvrtc4_block + { + uint32_t m_modulation; + uint32_t m_endpoints; + + pvrtc4_block() : m_modulation(0), m_endpoints(0) { } + + inline bool operator== (const pvrtc4_block& rhs) const + { + return (m_modulation == rhs.m_modulation) && (m_endpoints == rhs.m_endpoints); + } + + inline void clear() + { + m_modulation = 0; + m_endpoints = 0; + } + + inline bool get_block_uses_transparent_modulation() const + { + return (m_endpoints & 1) != 0; + } + + inline bool is_endpoint_opaque(uint32_t endpoint_index) const + { + static const uint32_t s_bitmasks[2] = { 0x8000U, 0x80000000U }; + return (m_endpoints & s_bitmasks[open_range_check(endpoint_index, 2U)]) != 0; + } + + // Returns raw endpoint or 8888 + color_rgba get_endpoint(uint32_t endpoint_index, bool unpack) const; + + color_rgba get_endpoint_5554(uint32_t endpoint_index) const; + + static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint) + { + static const uint32_t s_comp_prec[4][4] = + { + // R0 G0 B0 A0 R1 G1 B1 A1 + { 4, 4, 3, 3 }, { 4, 4, 4, 3 }, // transparent endpoint + + { 5, 5, 4, 0 }, { 5, 5, 5, 0 } // opaque endpoint + }; + return s_comp_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)][open_range_check(c, 4U)]; + } + + static color_rgba get_color_precision_in_bits(uint32_t endpoint_index, bool opaque_endpoint) + { + static const color_rgba s_color_prec[4] = + { + color_rgba(4, 4, 3, 3), color_rgba(4, 4, 4, 3), // transparent endpoint + color_rgba(5, 5, 4, 0), color_rgba(5, 5, 5, 0) // opaque endpoint + }; + return s_color_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)]; + } + + inline uint32_t get_modulation(uint32_t x, uint32_t y) const + { + assert((x < 4) && (y < 4)); + return (m_modulation >> ((y * 4 + x) * 2)) & 3; + } + + // Scaled by 8 + inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const + { + static const uint32_t s_block_scales[2][4] = { { 0, 3, 5, 8 }, { 0, 4, 4, 8 } }; + return s_block_scales[block_uses_transparent_modulation]; + } + + // Scaled by 8 + inline uint32_t get_scaled_modulation(uint32_t x, uint32_t y) const + { + return get_scaled_modulation_values(get_block_uses_transparent_modulation())[get_modulation(x, y)]; + } + + inline void byte_swap() + { + m_modulation = byteswap32(m_modulation); + m_endpoints = byteswap32(m_endpoints); + } + }; + + typedef vector2D pvrtc4_block_vector2D; + + uint32_t pvrtc4_swizzle_uv(uint32_t XSize, uint32_t YSize, uint32_t XPos, uint32_t YPos); + + class pvrtc4_image + { + public: + inline pvrtc4_image() : + m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_wrap_addressing(false), m_uses_alpha(false) + { + } + + inline pvrtc4_image(uint32_t width, uint32_t height, bool wrap_addressing = false) : + m_width(0), m_height(0), m_block_width(0), m_block_height(0), m_wrap_addressing(false), m_uses_alpha(false) + { + resize(width, height); + set_wrap_addressing(wrap_addressing); + } + + inline void clear() + { + m_width = 0; + m_height = 0; + m_block_width = 0; + m_block_height = 0; + m_blocks.clear(); + m_uses_alpha = false; + m_wrap_addressing = false; + } + + inline void resize(uint32_t width, uint32_t height) + { + if ((width == m_width) && (height == m_height)) + return; + + m_width = width; + m_height = height; + + m_block_width = (width + 3) >> 2; + m_block_height = (height + 3) >> 2; + + m_blocks.resize(m_block_width, m_block_height); + } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + + inline uint32_t get_block_width() const { return m_block_width; } + inline uint32_t get_block_height() const { return m_block_height; } + + inline const pvrtc4_block_vector2D &get_blocks() const { return m_blocks; } + inline pvrtc4_block_vector2D &get_blocks() { return m_blocks; } + + inline uint32_t get_total_blocks() const { return m_block_width * m_block_height; } + + inline bool get_uses_alpha() const { return m_uses_alpha; } + inline void set_uses_alpha(bool uses_alpha) { m_uses_alpha = uses_alpha; } + + inline void set_wrap_addressing(bool wrapping) { m_wrap_addressing = wrapping; } + inline bool get_wrap_addressing() const { return m_wrap_addressing; } + + inline bool are_blocks_equal(const pvrtc4_image& rhs) const + { + return m_blocks == rhs.m_blocks; + } + + inline void set_to_black() + { + memset(m_blocks.get_ptr(), 0, m_blocks.size_in_bytes()); + } + + inline bool get_block_uses_transparent_modulation(uint32_t bx, uint32_t by) const + { + return m_blocks(bx, by).get_block_uses_transparent_modulation(); + } + + inline bool is_endpoint_opaque(uint32_t bx, uint32_t by, uint32_t endpoint_index) const + { + return m_blocks(bx, by).is_endpoint_opaque(endpoint_index); + } + + color_rgba get_endpoint(uint32_t bx, uint32_t by, uint32_t endpoint_index, bool unpack) const + { + assert((bx < m_block_width) && (by < m_block_height)); + return m_blocks(bx, by).get_endpoint(endpoint_index, unpack); + } + + inline uint32_t get_modulation(uint32_t x, uint32_t y) const + { + assert((x < m_width) && (y < m_height)); + return m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3); + } + + // Returns true if the block uses transparent modulation. + bool get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const; + + color_rgba get_pixel(uint32_t x, uint32_t y, uint32_t m) const; + + inline color_rgba get_pixel(uint32_t x, uint32_t y) const + { + assert((x < m_width) && (y < m_height)); + return get_pixel(x, y, m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3)); + } + + void deswizzle() + { + pvrtc4_block_vector2D temp(m_blocks); + + for (uint32_t y = 0; y < m_block_height; y++) + for (uint32_t x = 0; x < m_block_width; x++) + m_blocks(x, y) = temp[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)]; + } + + void swizzle() + { + pvrtc4_block_vector2D temp(m_blocks); + + for (uint32_t y = 0; y < m_block_height; y++) + for (uint32_t x = 0; x < m_block_width; x++) + m_blocks[pvrtc4_swizzle_uv(m_block_width, m_block_height, x, y)] = temp(x, y); + } + + void unpack_all_pixels(image& img) const + { + img.crop(m_width, m_height); + + for (uint32_t y = 0; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + img(x, y) = get_pixel(x, y); + } + + void unpack_block(image &dst, uint32_t block_x, uint32_t block_y) + { + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + dst(x, y) = get_pixel(block_x * 4 + x, block_y * 4 + y); + } + + inline int wrap_or_clamp_x(int x) const + { + return m_wrap_addressing ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + } + + inline int wrap_or_clamp_y(int y) const + { + return m_wrap_addressing ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + } + + inline int wrap_or_clamp_block_x(int bx) const + { + return m_wrap_addressing ? posmod(bx, m_block_width) : clamp(bx, 0, m_block_width - 1); + } + + inline int wrap_or_clamp_block_y(int by) const + { + return m_wrap_addressing ? posmod(by, m_block_height) : clamp(by, 0, m_block_height - 1); + } + + inline vec2F get_interpolation_factors(uint32_t x, uint32_t y) const + { + // 0 1 2 3 + // 2 3 0 1 + // .5 .75 0 .25 + static const float s_interp[4] = { 2, 3, 0, 1 }; + return vec2F(s_interp[x & 3], s_interp[y & 3]); + } + + inline color_rgba interpolate(int x, int y, + const color_rgba& p, const color_rgba& q, + const color_rgba& r, const color_rgba& s) const + { + static const int s_interp[4] = { 2, 3, 0, 1 }; + const int u_interp = s_interp[x & 3]; + const int v_interp = s_interp[y & 3]; + + color_rgba result; + + for (uint32_t c = 0; c < 4; c++) + { + int t = p[c] * 4 + u_interp * ((int)q[c] - (int)p[c]); + int b = r[c] * 4 + u_interp * ((int)s[c] - (int)r[c]); + int v = t * 4 + v_interp * (b - t); + if (c < 3) + { + v >>= 1; + v += (v >> 5); + } + else + { + v += (v >> 4); + } + assert((v >= 0) && (v < 256)); + result[c] = static_cast(v); + } + + return result; + } + + uint32_t m_width, m_height; + pvrtc4_block_vector2D m_blocks; + uint32_t m_block_width, m_block_height; + + bool m_wrap_addressing; + bool m_uses_alpha; + }; + +} // namespace basisu diff --git a/basisu_resample_filters.cpp b/basisu_resample_filters.cpp new file mode 100644 index 0000000..1a8ce87 --- /dev/null +++ b/basisu_resample_filters.cpp @@ -0,0 +1,328 @@ +// basisu_resampler_filters.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_resampler_filters.h" + +#ifndef M_PI + #define M_PI 3.14159265358979323846 +#endif + +namespace basisu +{ +#define BOX_FILTER_SUPPORT (0.5f) + static float box_filter(float t) /* pulse/Fourier window */ + { + // make_clist() calls the filter function with t inverted (pos = left, neg = right) + if ((t >= -0.5f) && (t < 0.5f)) + return 1.0f; + else + return 0.0f; + } + +#define TENT_FILTER_SUPPORT (1.0f) + static float tent_filter(float t) /* box (*) box, bilinear/triangle */ + { + if (t < 0.0f) + t = -t; + + if (t < 1.0f) + return 1.0f - t; + else + return 0.0f; + } + +#define BELL_SUPPORT (1.5f) + static float bell_filter(float t) /* box (*) box (*) box */ + { + if (t < 0.0f) + t = -t; + + if (t < .5f) + return (.75f - (t * t)); + + if (t < 1.5f) + { + t = (t - 1.5f); + return (.5f * (t * t)); + } + + return (0.0f); + } + +#define B_SPLINE_SUPPORT (2.0f) + static float B_spline_filter(float t) /* box (*) box (*) box (*) box */ + { + float tt; + + if (t < 0.0f) + t = -t; + + if (t < 1.0f) + { + tt = t * t; + return ((.5f * tt * t) - tt + (2.0f / 3.0f)); + } + else if (t < 2.0f) + { + t = 2.0f - t; + return ((1.0f / 6.0f) * (t * t * t)); + } + + return (0.0f); + } + + // Dodgson, N., "Quadratic Interpolation for Image Resampling" +#define QUADRATIC_SUPPORT 1.5f + static float quadratic(float t, const float R) + { + if (t < 0.0f) + t = -t; + if (t < QUADRATIC_SUPPORT) + { + float tt = t * t; + if (t <= .5f) + return (-2.0f * R) * tt + .5f * (R + 1.0f); + else + return (R * tt) + (-2.0f * R - .5f) * t + (3.0f / 4.0f) * (R + 1.0f); + } + else + return 0.0f; + } + + static float quadratic_interp_filter(float t) + { + return quadratic(t, 1.0f); + } + + static float quadratic_approx_filter(float t) + { + return quadratic(t, .5f); + } + + static float quadratic_mix_filter(float t) + { + return quadratic(t, .8f); + } + + // Mitchell, D. and A. Netravali, "Reconstruction Filters in Computer Graphics." + // Computer Graphics, Vol. 22, No. 4, pp. 221-228. + // (B, C) + // (1/3, 1/3) - Defaults recommended by Mitchell and Netravali + // (1, 0) - Equivalent to the Cubic B-Spline + // (0, 0.5) - Equivalent to the Catmull-Rom Spline + // (0, C) - The family of Cardinal Cubic Splines + // (B, 0) - Duff's tensioned B-Splines. + static float mitchell(float t, const float B, const float C) + { + float tt; + + tt = t * t; + + if (t < 0.0f) + t = -t; + + if (t < 1.0f) + { + t = (((12.0f - 9.0f * B - 6.0f * C) * (t * tt)) + ((-18.0f + 12.0f * B + 6.0f * C) * tt) + (6.0f - 2.0f * B)); + + return (t / 6.0f); + } + else if (t < 2.0f) + { + t = (((-1.0f * B - 6.0f * C) * (t * tt)) + ((6.0f * B + 30.0f * C) * tt) + ((-12.0f * B - 48.0f * C) * t) + (8.0f * B + 24.0f * C)); + + return (t / 6.0f); + } + + return (0.0f); + } + +#define MITCHELL_SUPPORT (2.0f) + static float mitchell_filter(float t) + { + return mitchell(t, 1.0f / 3.0f, 1.0f / 3.0f); + } + +#define CATMULL_ROM_SUPPORT (2.0f) + static float catmull_rom_filter(float t) + { + return mitchell(t, 0.0f, .5f); + } + + static double sinc(double x) + { + x = (x * M_PI); + + if ((x < 0.01f) && (x > -0.01f)) + return 1.0f + x * x * (-1.0f / 6.0f + x * x * 1.0f / 120.0f); + + return sin(x) / x; + } + + static float clean(double t) + { + const float EPSILON = .0000125f; + if (fabs(t) < EPSILON) + return 0.0f; + return (float)t; + } + + //static double blackman_window(double x) + //{ + // return .42f + .50f * cos(M_PI*x) + .08f * cos(2.0f*M_PI*x); + //} + + static double blackman_exact_window(double x) + { + return 0.42659071f + 0.49656062f * cos(M_PI * x) + 0.07684867f * cos(2.0f * M_PI * x); + } + +#define BLACKMAN_SUPPORT (3.0f) + static float blackman_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 3.0f) + //return clean(sinc(t) * blackman_window(t / 3.0f)); + return clean(sinc(t) * blackman_exact_window(t / 3.0f)); + else + return (0.0f); + } + +#define GAUSSIAN_SUPPORT (1.25f) + static float gaussian_filter(float t) // with blackman window + { + if (t < 0) + t = -t; + if (t < GAUSSIAN_SUPPORT) + return clean(exp(-2.0f * t * t) * sqrt(2.0f / M_PI) * blackman_exact_window(t / GAUSSIAN_SUPPORT)); + else + return 0.0f; + } + + // Windowed sinc -- see "Jimm Blinn's Corner: Dirty Pixels" pg. 26. +#define LANCZOS3_SUPPORT (3.0f) + static float lanczos3_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 3.0f) + return clean(sinc(t) * sinc(t / 3.0f)); + else + return (0.0f); + } + +#define LANCZOS4_SUPPORT (4.0f) + static float lanczos4_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 4.0f) + return clean(sinc(t) * sinc(t / 4.0f)); + else + return (0.0f); + } + +#define LANCZOS6_SUPPORT (6.0f) + static float lanczos6_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 6.0f) + return clean(sinc(t) * sinc(t / 6.0f)); + else + return (0.0f); + } + +#define LANCZOS12_SUPPORT (12.0f) + static float lanczos12_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 12.0f) + return clean(sinc(t) * sinc(t / 12.0f)); + else + return (0.0f); + } + + static double bessel0(double x) + { + const double EPSILON_RATIO = 1E-16; + double xh, sum, pow, ds; + int k; + + xh = 0.5 * x; + sum = 1.0; + pow = 1.0; + k = 0; + ds = 1.0; + while (ds > sum * EPSILON_RATIO) // FIXME: Shouldn't this stop after X iterations for max. safety? + { + ++k; + pow = pow * (xh / k); + ds = pow * pow; + sum = sum + ds; + } + + return sum; + } + + static const float KAISER_ALPHA = 4.0; + static double kaiser(double alpha, double half_width, double x) + { + const double ratio = (x / half_width); + return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha); + } + +#define KAISER_SUPPORT 3 + static float kaiser_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < KAISER_SUPPORT) + { + // db atten + const float att = 40.0f; + const float alpha = (float)(exp(log((double)0.58417 * (att - 20.96)) * 0.4) + 0.07886 * (att - 20.96)); + //const float alpha = KAISER_ALPHA; + return (float)clean(sinc(t) * kaiser(alpha, KAISER_SUPPORT, t)); + } + + return 0.0f; + } + + const resample_filter g_resample_filters[] = + { + { "box", box_filter, BOX_FILTER_SUPPORT }, { "tent", tent_filter, TENT_FILTER_SUPPORT }, { "bell", bell_filter, BELL_SUPPORT }, { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, + { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, { "blackman", blackman_filter, BLACKMAN_SUPPORT }, { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, + { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, { "kaiser", kaiser_filter, KAISER_SUPPORT }, { "gaussian", gaussian_filter, GAUSSIAN_SUPPORT }, + { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, + }; + + const int g_num_resample_filters = BASISU_ARRAY_SIZE(g_resample_filters); + + int find_resample_filter(const char *pName) + { + for (int i = 0; i < g_num_resample_filters; i++) + if (strcmp(pName, g_resample_filters[i].name) == 0) + return i; + return -1; + } +} // namespace basisu diff --git a/basisu_resampler.cpp b/basisu_resampler.cpp new file mode 100644 index 0000000..d09de6f --- /dev/null +++ b/basisu_resampler.cpp @@ -0,0 +1,852 @@ +// basisu_resampler.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_resampler.h" +#include "basisu_resampler_filters.h" + +#ifndef max +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef min +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#define RESAMPLER_DEBUG 0 + +namespace basisu +{ + static inline int resampler_range_check(int v, int h) + { + BASISU_NOTE_UNUSED(h); + assert((v >= 0) && (v < h)); + return v; + } + + // Float to int cast with truncation. + static inline int cast_to_int(Resample_Real i) + { + return (int)i; + } + + // Ensure that the contributing source sample is within bounds. If not, reflect, clamp, or wrap. + int Resampler::reflect(const int j, const int src_x, const Boundary_Op boundary_op) + { + int n; + + if (j < 0) + { + if (boundary_op == BOUNDARY_REFLECT) + { + n = -j; + + if (n >= src_x) + n = src_x - 1; + } + else if (boundary_op == BOUNDARY_WRAP) + n = posmod(j, src_x); + else + n = 0; + } + else if (j >= src_x) + { + if (boundary_op == BOUNDARY_REFLECT) + { + n = (src_x - j) + (src_x - 1); + + if (n < 0) + n = 0; + } + else if (boundary_op == BOUNDARY_WRAP) + n = posmod(j, src_x); + else + n = src_x - 1; + } + else + n = j; + + return n; + } + + // The make_clist() method generates, for all destination samples, + // the list of all source samples with non-zero weighted contributions. + Resampler::Contrib_List * Resampler::make_clist( + int src_x, int dst_x, Boundary_Op boundary_op, + Resample_Real(*Pfilter)(Resample_Real), + Resample_Real filter_support, + Resample_Real filter_scale, + Resample_Real src_ofs) + { + struct Contrib_Bounds + { + // The center of the range in DISCRETE coordinates (pixel center = 0.0f). + Resample_Real center; + int left, right; + }; + + int i, j, k, n, left, right; + Resample_Real total_weight; + Resample_Real xscale, center, half_width, weight; + Contrib_List* Pcontrib; + Contrib* Pcpool; + Contrib* Pcpool_next; + Contrib_Bounds* Pcontrib_bounds; + + if ((Pcontrib = (Contrib_List*)calloc(dst_x, sizeof(Contrib_List))) == NULL) + return NULL; + + Pcontrib_bounds = (Contrib_Bounds*)calloc(dst_x, sizeof(Contrib_Bounds)); + if (!Pcontrib_bounds) + { + free(Pcontrib); + return (NULL); + } + + const Resample_Real oo_filter_scale = 1.0f / filter_scale; + + const Resample_Real NUDGE = 0.5f; + xscale = dst_x / (Resample_Real)src_x; + + if (xscale < 1.0f) + { + int total; + (void)total; + + // Handle case when there are fewer destination samples than source samples (downsampling/minification). + + // stretched half width of filter + half_width = (filter_support / xscale) * filter_scale; + + // Find the range of source sample(s) that will contribute to each destination sample. + + for (i = 0, n = 0; i < dst_x; i++) + { + // Convert from discrete to continuous coordinates, scale, then convert back to discrete. + center = ((Resample_Real)i + NUDGE) / xscale; + center -= NUDGE; + center += src_ofs; + + left = cast_to_int((Resample_Real)floor(center - half_width)); + right = cast_to_int((Resample_Real)ceil(center + half_width)); + + Pcontrib_bounds[i].center = center; + Pcontrib_bounds[i].left = left; + Pcontrib_bounds[i].right = right; + + n += (right - left + 1); + } + + // Allocate memory for contributors. + + if ((n == 0) || ((Pcpool = (Contrib*)calloc(n, sizeof(Contrib))) == NULL)) + { + free(Pcontrib); + free(Pcontrib_bounds); + return NULL; + } + total = n; + + Pcpool_next = Pcpool; + + // Create the list of source samples which contribute to each destination sample. + + for (i = 0; i < dst_x; i++) + { + int max_k = -1; + Resample_Real max_w = -1e+20f; + + center = Pcontrib_bounds[i].center; + left = Pcontrib_bounds[i].left; + right = Pcontrib_bounds[i].right; + + Pcontrib[i].n = 0; + Pcontrib[i].p = Pcpool_next; + Pcpool_next += (right - left + 1); + assert((Pcpool_next - Pcpool) <= total); + + total_weight = 0; + + for (j = left; j <= right; j++) + total_weight += (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale); + const Resample_Real norm = static_cast(1.0f / total_weight); + + total_weight = 0; + +#if RESAMPLER_DEBUG + printf("%i: ", i); +#endif + + for (j = left; j <= right; j++) + { + weight = (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale) * norm; + if (weight == 0.0f) + continue; + + n = reflect(j, src_x, boundary_op); + +#if RESAMPLER_DEBUG + printf("%i(%f), ", n, weight); +#endif + + // Increment the number of source samples which contribute to the current destination sample. + + k = Pcontrib[i].n++; + + Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ + Pcontrib[i].p[k].weight = weight; /* store src sample weight */ + + total_weight += weight; /* total weight of all contributors */ + + if (weight > max_w) + { + max_w = weight; + max_k = k; + } + } + +#if RESAMPLER_DEBUG + printf("\n\n"); +#endif + + //assert(Pcontrib[i].n); + //assert(max_k != -1); + if ((max_k == -1) || (Pcontrib[i].n == 0)) + { + free(Pcpool); + free(Pcontrib); + free(Pcontrib_bounds); + return NULL; + } + + if (total_weight != 1.0f) + Pcontrib[i].p[max_k].weight += 1.0f - total_weight; + } + } + else + { + // Handle case when there are more destination samples than source samples (upsampling). + + half_width = filter_support * filter_scale; + + // Find the source sample(s) that contribute to each destination sample. + + for (i = 0, n = 0; i < dst_x; i++) + { + // Convert from discrete to continuous coordinates, scale, then convert back to discrete. + center = ((Resample_Real)i + NUDGE) / xscale; + center -= NUDGE; + center += src_ofs; + + left = cast_to_int((Resample_Real)floor(center - half_width)); + right = cast_to_int((Resample_Real)ceil(center + half_width)); + + Pcontrib_bounds[i].center = center; + Pcontrib_bounds[i].left = left; + Pcontrib_bounds[i].right = right; + + n += (right - left + 1); + } + + /* Allocate memory for contributors. */ + + int total = n; + if ((total == 0) || ((Pcpool = (Contrib*)calloc(total, sizeof(Contrib))) == NULL)) + { + free(Pcontrib); + free(Pcontrib_bounds); + return NULL; + } + + Pcpool_next = Pcpool; + + // Create the list of source samples which contribute to each destination sample. + + for (i = 0; i < dst_x; i++) + { + int max_k = -1; + Resample_Real max_w = -1e+20f; + + center = Pcontrib_bounds[i].center; + left = Pcontrib_bounds[i].left; + right = Pcontrib_bounds[i].right; + + Pcontrib[i].n = 0; + Pcontrib[i].p = Pcpool_next; + Pcpool_next += (right - left + 1); + assert((Pcpool_next - Pcpool) <= total); + + total_weight = 0; + for (j = left; j <= right; j++) + total_weight += (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale); + + const Resample_Real norm = static_cast(1.0f / total_weight); + + total_weight = 0; + +#if RESAMPLER_DEBUG + printf("%i: ", i); +#endif + + for (j = left; j <= right; j++) + { + weight = (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale) * norm; + if (weight == 0.0f) + continue; + + n = reflect(j, src_x, boundary_op); + +#if RESAMPLER_DEBUG + printf("%i(%f), ", n, weight); +#endif + + // Increment the number of source samples which contribute to the current destination sample. + + k = Pcontrib[i].n++; + + Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ + Pcontrib[i].p[k].weight = weight; /* store src sample weight */ + + total_weight += weight; /* total weight of all contributors */ + + if (weight > max_w) + { + max_w = weight; + max_k = k; + } + } + +#if RESAMPLER_DEBUG + printf("\n\n"); +#endif + + //assert(Pcontrib[i].n); + //assert(max_k != -1); + + if ((max_k == -1) || (Pcontrib[i].n == 0)) + { + free(Pcpool); + free(Pcontrib); + free(Pcontrib_bounds); + return NULL; + } + + if (total_weight != 1.0f) + Pcontrib[i].p[max_k].weight += 1.0f - total_weight; + } + } + +#if RESAMPLER_DEBUG + printf("*******\n"); +#endif + + free(Pcontrib_bounds); + + return Pcontrib; + } + + void Resampler::resample_x(Sample * Pdst, const Sample * Psrc) + { + assert(Pdst); + assert(Psrc); + + int i, j; + Sample total; + Contrib_List* Pclist = m_Pclist_x; + Contrib* p; + + for (i = m_resample_dst_x; i > 0; i--, Pclist++) + { +#if BASISU_RESAMPLER_DEBUG_OPS + total_ops += Pclist->n; +#endif + + for (j = Pclist->n, p = Pclist->p, total = 0; j > 0; j--, p++) + total += Psrc[p->pixel] * p->weight; + + *Pdst++ = total; + } + } + + void Resampler::scale_y_mov(Sample * Ptmp, const Sample * Psrc, Resample_Real weight, int dst_x) + { + int i; + +#if BASISU_RESAMPLER_DEBUG_OPS + total_ops += dst_x; +#endif + + // Not += because temp buf wasn't cleared. + for (i = dst_x; i > 0; i--) + * Ptmp++ = *Psrc++ * weight; + } + + void Resampler::scale_y_add(Sample * Ptmp, const Sample * Psrc, Resample_Real weight, int dst_x) + { +#if BASISU_RESAMPLER_DEBUG_OPS + total_ops += dst_x; +#endif + + for (int i = dst_x; i > 0; i--) + (*Ptmp++) += *Psrc++ * weight; + } + + void Resampler::clamp(Sample * Pdst, int n) + { + while (n > 0) + { + Sample x = *Pdst; + *Pdst++ = clamp_sample(x); + n--; + } + } + + void Resampler::resample_y(Sample * Pdst) + { + int i, j; + Sample* Psrc; + Contrib_List* Pclist = &m_Pclist_y[m_cur_dst_y]; + + Sample* Ptmp = m_delay_x_resample ? m_Ptmp_buf : Pdst; + assert(Ptmp); + + /* Process each contributor. */ + + for (i = 0; i < Pclist->n; i++) + { + // locate the contributor's location in the scan buffer -- the contributor must always be found! + for (j = 0; j < MAX_SCAN_BUF_SIZE; j++) + if (m_Pscan_buf->scan_buf_y[j] == Pclist->p[i].pixel) + break; + + assert(j < MAX_SCAN_BUF_SIZE); + + Psrc = m_Pscan_buf->scan_buf_l[j]; + + if (!i) + scale_y_mov(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); + else + scale_y_add(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); + + /* If this source line doesn't contribute to any + * more destination lines then mark the scanline buffer slot + * which holds this source line as free. + * (The max. number of slots used depends on the Y + * axis sampling factor and the scaled filter width.) + */ + + if (--m_Psrc_y_count[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] == 0) + { + m_Psrc_y_flag[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] = false; + m_Pscan_buf->scan_buf_y[j] = -1; + } + } + + /* Now generate the destination line */ + + if (m_delay_x_resample) // Was X resampling delayed until after Y resampling? + { + assert(Pdst != Ptmp); + resample_x(Pdst, Ptmp); + } + else + { + assert(Pdst == Ptmp); + } + + if (m_lo < m_hi) + clamp(Pdst, m_resample_dst_x); + } + + bool Resampler::put_line(const Sample * Psrc) + { + int i; + + if (m_cur_src_y >= m_resample_src_y) + return false; + + /* Does this source line contribute + * to any destination line? if not, + * exit now. + */ + + if (!m_Psrc_y_count[resampler_range_check(m_cur_src_y, m_resample_src_y)]) + { + m_cur_src_y++; + return true; + } + + /* Find an empty slot in the scanline buffer. (FIXME: Perf. is terrible here with extreme scaling ratios.) */ + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + if (m_Pscan_buf->scan_buf_y[i] == -1) + break; + + /* If the buffer is full, exit with an error. */ + + if (i == MAX_SCAN_BUF_SIZE) + { + m_status = STATUS_SCAN_BUFFER_FULL; + return false; + } + + m_Psrc_y_flag[resampler_range_check(m_cur_src_y, m_resample_src_y)] = true; + m_Pscan_buf->scan_buf_y[i] = m_cur_src_y; + + /* Does this slot have any memory allocated to it? */ + + if (!m_Pscan_buf->scan_buf_l[i]) + { + if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)malloc(m_intermediate_x * sizeof(Sample))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return false; + } + } + + // Resampling on the X axis first? + if (m_delay_x_resample) + { + assert(m_intermediate_x == m_resample_src_x); + + // Y-X resampling order + memcpy(m_Pscan_buf->scan_buf_l[i], Psrc, m_intermediate_x * sizeof(Sample)); + } + else + { + assert(m_intermediate_x == m_resample_dst_x); + + // X-Y resampling order + resample_x(m_Pscan_buf->scan_buf_l[i], Psrc); + } + + m_cur_src_y++; + + return true; + } + + const Resampler::Sample* Resampler::get_line() + { + int i; + + /* If all the destination lines have been + * generated, then always return NULL. + */ + + if (m_cur_dst_y == m_resample_dst_y) + return NULL; + + /* Check to see if all the required + * contributors are present, if not, + * return NULL. + */ + + for (i = 0; i < m_Pclist_y[m_cur_dst_y].n; i++) + if (!m_Psrc_y_flag[resampler_range_check(m_Pclist_y[m_cur_dst_y].p[i].pixel, m_resample_src_y)]) + return NULL; + + resample_y(m_Pdst_buf); + + m_cur_dst_y++; + + return m_Pdst_buf; + } + + Resampler::~Resampler() + { + int i; + +#if BASISU_RESAMPLER_DEBUG_OPS + printf("actual ops: %i\n", total_ops); +#endif + + free(m_Pdst_buf); + m_Pdst_buf = NULL; + + if (m_Ptmp_buf) + { + free(m_Ptmp_buf); + m_Ptmp_buf = NULL; + } + + /* Don't deallocate a contibutor list + * if the user passed us one of their own. + */ + + if ((m_Pclist_x) && (!m_clist_x_forced)) + { + free(m_Pclist_x->p); + free(m_Pclist_x); + m_Pclist_x = NULL; + } + + if ((m_Pclist_y) && (!m_clist_y_forced)) + { + free(m_Pclist_y->p); + free(m_Pclist_y); + m_Pclist_y = NULL; + } + + free(m_Psrc_y_count); + m_Psrc_y_count = NULL; + + free(m_Psrc_y_flag); + m_Psrc_y_flag = NULL; + + if (m_Pscan_buf) + { + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + free(m_Pscan_buf->scan_buf_l[i]); + + free(m_Pscan_buf); + m_Pscan_buf = NULL; + } + } + + void Resampler::restart() + { + if (STATUS_OKAY != m_status) + return; + + m_cur_src_y = m_cur_dst_y = 0; + + int i, j; + for (i = 0; i < m_resample_src_y; i++) + { + m_Psrc_y_count[i] = 0; + m_Psrc_y_flag[i] = false; + } + + for (i = 0; i < m_resample_dst_y; i++) + { + for (j = 0; j < m_Pclist_y[i].n; j++) + m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; + } + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + m_Pscan_buf->scan_buf_y[i] = -1; + + free(m_Pscan_buf->scan_buf_l[i]); + m_Pscan_buf->scan_buf_l[i] = NULL; + } + } + + Resampler::Resampler(int src_x, int src_y, + int dst_x, int dst_y, + Boundary_Op boundary_op, + Resample_Real sample_low, Resample_Real sample_high, + const char* Pfilter_name, + Contrib_List * Pclist_x, + Contrib_List * Pclist_y, + Resample_Real filter_x_scale, + Resample_Real filter_y_scale, + Resample_Real src_x_ofs, + Resample_Real src_y_ofs) + { + int i, j; + Resample_Real support, (*func)(Resample_Real); + + assert(src_x > 0); + assert(src_y > 0); + assert(dst_x > 0); + assert(dst_y > 0); + +#if BASISU_RESAMPLER_DEBUG_OPS + total_ops = 0; +#endif + + m_lo = sample_low; + m_hi = sample_high; + + m_delay_x_resample = false; + m_intermediate_x = 0; + m_Pdst_buf = NULL; + m_Ptmp_buf = NULL; + m_clist_x_forced = false; + m_Pclist_x = NULL; + m_clist_y_forced = false; + m_Pclist_y = NULL; + m_Psrc_y_count = NULL; + m_Psrc_y_flag = NULL; + m_Pscan_buf = NULL; + m_status = STATUS_OKAY; + + m_resample_src_x = src_x; + m_resample_src_y = src_y; + m_resample_dst_x = dst_x; + m_resample_dst_y = dst_y; + + m_boundary_op = boundary_op; + + if ((m_Pdst_buf = (Sample*)malloc(m_resample_dst_x * sizeof(Sample))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + // Find the specified filter. + + if (Pfilter_name == NULL) + Pfilter_name = BASISU_RESAMPLER_DEFAULT_FILTER; + + for (i = 0; i < g_num_resample_filters; i++) + if (strcmp(Pfilter_name, g_resample_filters[i].name) == 0) + break; + + if (i == g_num_resample_filters) + { + m_status = STATUS_BAD_FILTER_NAME; + return; + } + + func = g_resample_filters[i].func; + support = g_resample_filters[i].support; + + /* Create contributor lists, unless the user supplied custom lists. */ + + if (!Pclist_x) + { + m_Pclist_x = make_clist(m_resample_src_x, m_resample_dst_x, m_boundary_op, func, support, filter_x_scale, src_x_ofs); + if (!m_Pclist_x) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + else + { + m_Pclist_x = Pclist_x; + m_clist_x_forced = true; + } + + if (!Pclist_y) + { + m_Pclist_y = make_clist(m_resample_src_y, m_resample_dst_y, m_boundary_op, func, support, filter_y_scale, src_y_ofs); + if (!m_Pclist_y) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + else + { + m_Pclist_y = Pclist_y; + m_clist_y_forced = true; + } + + if ((m_Psrc_y_count = (int*)calloc(m_resample_src_y, sizeof(int))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + if ((m_Psrc_y_flag = (unsigned char*)calloc(m_resample_src_y, sizeof(unsigned char))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + // Count how many times each source line contributes to a destination line. + + for (i = 0; i < m_resample_dst_y; i++) + for (j = 0; j < m_Pclist_y[i].n; j++) + m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; + + if ((m_Pscan_buf = (Scan_Buf*)malloc(sizeof(Scan_Buf))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + m_Pscan_buf->scan_buf_y[i] = -1; + m_Pscan_buf->scan_buf_l[i] = NULL; + } + + m_cur_src_y = m_cur_dst_y = 0; + { + // Determine which axis to resample first by comparing the number of multiplies required + // for each possibility. + int x_ops = count_ops(m_Pclist_x, m_resample_dst_x); + int y_ops = count_ops(m_Pclist_y, m_resample_dst_y); + + // Hack 10/2000: Weight Y axis ops a little more than X axis ops. + // (Y axis ops use more cache resources.) + int xy_ops = x_ops * m_resample_src_y + + (4 * y_ops * m_resample_dst_x) / 3; + + int yx_ops = (4 * y_ops * m_resample_src_x) / 3 + + x_ops * m_resample_dst_y; + +#if BASISU_RESAMPLER_DEBUG_OPS + printf("src: %i %i\n", m_resample_src_x, m_resample_src_y); + printf("dst: %i %i\n", m_resample_dst_x, m_resample_dst_y); + printf("x_ops: %i\n", x_ops); + printf("y_ops: %i\n", y_ops); + printf("xy_ops: %i\n", xy_ops); + printf("yx_ops: %i\n", yx_ops); +#endif + + // Now check which resample order is better. In case of a tie, choose the order + // which buffers the least amount of data. + if ((xy_ops > yx_ops) || + ((xy_ops == yx_ops) && (m_resample_src_x < m_resample_dst_x))) + { + m_delay_x_resample = true; + m_intermediate_x = m_resample_src_x; + } + else + { + m_delay_x_resample = false; + m_intermediate_x = m_resample_dst_x; + } +#if BASISU_RESAMPLER_DEBUG_OPS + printf("delaying: %i\n", m_delay_x_resample); +#endif + } + + if (m_delay_x_resample) + { + if ((m_Ptmp_buf = (Sample*)malloc(m_intermediate_x * sizeof(Sample))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + } + + void Resampler::get_clists(Contrib_List * *ptr_clist_x, Contrib_List * *ptr_clist_y) + { + if (ptr_clist_x) + * ptr_clist_x = m_Pclist_x; + + if (ptr_clist_y) + * ptr_clist_y = m_Pclist_y; + } + + int Resampler::get_filter_num() + { + return g_num_resample_filters; + } + + const char* Resampler::get_filter_name(int filter_num) + { + if ((filter_num < 0) || (filter_num >= g_num_resample_filters)) + return NULL; + else + return g_resample_filters[filter_num].name; + } + +} // namespace basisu \ No newline at end of file diff --git a/basisu_resampler.h b/basisu_resampler.h new file mode 100644 index 0000000..715825d --- /dev/null +++ b/basisu_resampler.h @@ -0,0 +1,196 @@ +// basisu_resampler.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "transcoder/basisu.h" + +#define BASISU_RESAMPLER_DEBUG_OPS (0) +#define BASISU_RESAMPLER_DEFAULT_FILTER "lanczos4" +#define BASISU_RESAMPLER_MAX_DIMENSION (16384) + +namespace basisu +{ + // float or double + typedef float Resample_Real; + + class Resampler + { + public: + typedef Resample_Real Sample; + + struct Contrib + { + Resample_Real weight; + uint16_t pixel; + }; + + struct Contrib_List + { + uint16_t n; + Contrib *p; + }; + + enum Boundary_Op + { + BOUNDARY_WRAP = 0, + BOUNDARY_REFLECT = 1, + BOUNDARY_CLAMP = 2 + }; + + enum Status + { + STATUS_OKAY = 0, + STATUS_OUT_OF_MEMORY = 1, + STATUS_BAD_FILTER_NAME = 2, + STATUS_SCAN_BUFFER_FULL = 3 + }; + + // src_x/src_y - Input dimensions + // dst_x/dst_y - Output dimensions + // boundary_op - How to sample pixels near the image boundaries + // sample_low/sample_high - Clamp output samples to specified range, or disable clamping if sample_low >= sample_high + // Pclist_x/Pclist_y - Optional pointers to contributor lists from another instance of a Resampler + // src_x_ofs/src_y_ofs - Offset input image by specified amount (fractional values okay) + Resampler( + int src_x, int src_y, + int dst_x, int dst_y, + Boundary_Op boundary_op = BOUNDARY_CLAMP, + Resample_Real sample_low = 0.0f, Resample_Real sample_high = 0.0f, + const char *Pfilter_name = BASISU_RESAMPLER_DEFAULT_FILTER, + Contrib_List *Pclist_x = NULL, + Contrib_List *Pclist_y = NULL, + Resample_Real filter_x_scale = 1.0f, + Resample_Real filter_y_scale = 1.0f, + Resample_Real src_x_ofs = 0.0f, + Resample_Real src_y_ofs = 0.0f); + + ~Resampler(); + + // Reinits resampler so it can handle another frame. + void restart(); + + // false on out of memory. + bool put_line(const Sample *Psrc); + + // NULL if no scanlines are currently available (give the resampler more scanlines!) + const Sample *get_line(); + + Status status() const + { + return m_status; + } + + // Returned contributor lists can be shared with another Resampler. + void get_clists(Contrib_List **ptr_clist_x, Contrib_List **ptr_clist_y); + Contrib_List *get_clist_x() const + { + return m_Pclist_x; + } + Contrib_List *get_clist_y() const + { + return m_Pclist_y; + } + + // Filter accessors. + static int get_filter_num(); + static const char *get_filter_name(int filter_num); + + static Contrib_List *make_clist( + int src_x, int dst_x, Boundary_Op boundary_op, + Resample_Real(*Pfilter)(Resample_Real), + Resample_Real filter_support, + Resample_Real filter_scale, + Resample_Real src_ofs); + + private: + Resampler(); + Resampler(const Resampler &o); + Resampler &operator=(const Resampler &o); + +#ifdef BASISU_RESAMPLER_DEBUG_OPS + int total_ops; +#endif + + int m_intermediate_x; + + int m_resample_src_x; + int m_resample_src_y; + int m_resample_dst_x; + int m_resample_dst_y; + + Boundary_Op m_boundary_op; + + Sample *m_Pdst_buf; + Sample *m_Ptmp_buf; + + Contrib_List *m_Pclist_x; + Contrib_List *m_Pclist_y; + + bool m_clist_x_forced; + bool m_clist_y_forced; + + bool m_delay_x_resample; + + int *m_Psrc_y_count; + uint8_t *m_Psrc_y_flag; + + // The maximum number of scanlines that can be buffered at one time. + enum + { + MAX_SCAN_BUF_SIZE = BASISU_RESAMPLER_MAX_DIMENSION + }; + + struct Scan_Buf + { + int scan_buf_y[MAX_SCAN_BUF_SIZE]; + Sample *scan_buf_l[MAX_SCAN_BUF_SIZE]; + }; + + Scan_Buf *m_Pscan_buf; + + int m_cur_src_y; + int m_cur_dst_y; + + Status m_status; + + void resample_x(Sample *Pdst, const Sample *Psrc); + void scale_y_mov(Sample *Ptmp, const Sample *Psrc, Resample_Real weight, int dst_x); + void scale_y_add(Sample *Ptmp, const Sample *Psrc, Resample_Real weight, int dst_x); + void clamp(Sample *Pdst, int n); + void resample_y(Sample *Pdst); + + static int reflect(const int j, const int src_x, const Boundary_Op boundary_op); + + inline int count_ops(Contrib_List *Pclist, int k) + { + int i, t = 0; + for (i = 0; i < k; i++) + t += Pclist[i].n; + return (t); + } + + Resample_Real m_lo; + Resample_Real m_hi; + + inline Resample_Real clamp_sample(Resample_Real f) const + { + if (f < m_lo) + f = m_lo; + else if (f > m_hi) + f = m_hi; + return f; + } + }; + +} // namespace basisu diff --git a/basisu_resampler_filters.h b/basisu_resampler_filters.h new file mode 100644 index 0000000..c16024f --- /dev/null +++ b/basisu_resampler_filters.h @@ -0,0 +1,35 @@ +// basisu_resampler_filters.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "transcoder/basisu.h" + +namespace basisu +{ + typedef float (*resample_filter_func)(float t); + + struct resample_filter + { + const char *name; + resample_filter_func func; + float support; + }; + + extern const resample_filter g_resample_filters[]; + extern const int g_num_resample_filters; + + int find_resample_filter(const char *pName); + +} // namespace basisu diff --git a/basisu_ssim.cpp b/basisu_ssim.cpp new file mode 100644 index 0000000..4da3051 --- /dev/null +++ b/basisu_ssim.cpp @@ -0,0 +1,408 @@ +// basisu_ssim.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_ssim.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +namespace basisu +{ + float gauss(int x, int y, float sigma_sqr) + { + float pow = expf(-((x * x + y * y) / (2.0f * sigma_sqr))); + float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow; + return g; + } + + // size_x/y should be odd + void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags) + { + assert(size_x & size_y & 1); + + if (!(size_x | size_y)) + return; + + int mid_x = size_x / 2; + int mid_y = size_y / 2; + + double sum = 0; + for (int x = 0; x < size_x; x++) + { + for (int y = 0; y < size_y; y++) + { + float g; + if ((x > mid_x) && (y < mid_y)) + g = pDst[(size_x - x - 1) + y * size_x]; + else if ((x < mid_x) && (y > mid_y)) + g = pDst[x + (size_y - y - 1) * size_x]; + else if ((x > mid_x) && (y > mid_y)) + g = pDst[(size_x - x - 1) + (size_y - y - 1) * size_x]; + else + g = gauss(x - mid_x, y - mid_y, sigma_sqr); + + pDst[x + y * size_x] = g; + sum += g; + } + } + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + { + sum = pDst[mid_x + mid_y * size_x]; + } + + if (flags & (cComputeGaussianFlagNormalizeCenterToOne | cComputeGaussianFlagNormalize)) + { + double one_over_sum = 1.0f / sum; + for (int i = 0; i < size_x * size_y; i++) + pDst[i] = static_cast(pDst[i] * one_over_sum); + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + pDst[mid_x + mid_y * size_x] = 1.0f; + } + + if (flags & cComputeGaussianFlagPrint) + { + printf("{\n"); + for (int y = 0; y < size_y; y++) + { + printf(" "); + for (int x = 0; x < size_x; x++) + { + printf("%f, ", pDst[x + y * size_x]); + } + printf("\n"); + } + printf("}"); + } + } + + void gaussian_filter(imagef &dst, const imagef &orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor) + { + assert(odd_filter_width && (odd_filter_width & 1)); + odd_filter_width |= 1; + + vector2D kernel(odd_filter_width, odd_filter_width); + compute_gaussian_kernel(kernel.get_ptr(), odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize); + + const int dst_width = orig_img.get_width() / width_divisor; + const int dst_height = orig_img.get_height() / height_divisor; + + const int H = odd_filter_width / 2; + const int L = -H; + + dst.crop(dst_width, dst_height); + +#pragma omp parallel for + for (int oy = 0; oy < dst_height; oy++) + { + for (int ox = 0; ox < dst_width; ox++) + { + vec4F c(0.0f); + + for (int yd = L; yd <= H; yd++) + { + int y = oy * height_divisor + (height_divisor >> 1) + yd; + + for (int xd = L; xd <= H; xd++) + { + int x = ox * width_divisor + (width_divisor >> 1) + xd; + + const vec4F &p = orig_img.get_clamped_or_wrapped(x, y, wrapping, wrapping); + + float w = kernel(xd + H, yd + H); + c[0] += p[0] * w; + c[1] += p[1] * w; + c[2] += p[2] * w; + c[3] += p[3] * w; + } + } + + dst(ox, oy).set(c[0], c[1], c[2], c[3]); + } + } + } + + void pow_image(const imagef &src, imagef &dst, const vec4F &power) + { + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &p = src(x, y); + + if ((power[0] == 2.0f) && (power[1] == 2.0f) && (power[2] == 2.0f) && (power[3] == 2.0f)) + dst(x, y).set(p[0] * p[0], p[1] * p[1], p[2] * p[2], p[3] * p[3]); + else + dst(x, y).set(powf(p[0], power[0]), powf(p[1], power[1]), powf(p[2], power[2]), powf(p[3], power[3])); + } + } + } + + void mul_image(const imagef &src, imagef &dst, const vec4F &mul) + { + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &p = src(x, y); + dst(x, y).set(p[0] * mul[0], p[1] * mul[1], p[2] * mul[2], p[3] * mul[3]); + } + } + } + + void scale_image(const imagef &src, imagef &dst, const vec4F &scale, const vec4F &shift) + { + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &p = src(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + d[c] = scale[c] * p[c] + shift[c]; + + dst(x, y).set(d[0], d[1], d[2], d[3]); + } + } + } + + void add_weighted_image(const imagef &src1, const vec4F &alpha, const imagef &src2, const vec4F &beta, const vec4F &gamma, imagef &dst) + { + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &s1 = src1(x, y); + const vec4F &s2 = src2(x, y); + + dst(x, y).set( + s1[0] * alpha[0] + s2[0] * beta[0] + gamma[0], + s1[1] * alpha[1] + s2[1] * beta[1] + gamma[1], + s1[2] * alpha[2] + s2[2] * beta[2] + gamma[2], + s1[3] * alpha[3] + s2[3] * beta[3] + gamma[3]); + } + } + } + + void add_image(const imagef &src1, const imagef &src2, imagef &dst) + { + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &s1 = src1(x, y); + const vec4F &s2 = src2(x, y); + + dst(x, y).set(s1[0] + s2[0], s1[1] + s2[1], s1[2] + s2[2], s1[3] + s2[3]); + } + } + } + + void adds_image(const imagef &src, const vec4F &value, imagef &dst) + { + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &p = src(x, y); + + dst(x, y).set(p[0] + value[0], p[1] + value[1], p[2] + value[2], p[3] + value[3]); + } + } + } + + void mul_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale) + { + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &s1 = src1(x, y); + const vec4F &s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v1 = s1[c]; + float v2 = s2[c]; + d[c] = v1 * v2 * scale[c]; + } + + dst(x, y) = d; + } + } + } + + void div_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale) + { + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F &s1 = src1(x, y); + const vec4F &s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v = s2[c]; + if (v == 0.0f) + d[c] = 0.0f; + else + d[c] = (s1[c] * scale[c]) / v; + } + + dst(x, y) = d; + } + } + } + + vec4F avg_image(const imagef &src) + { + vec4F avg(0.0f); + + for (uint32_t y = 0; y < src.get_height(); y++) + { + for (uint32_t x = 0; x < src.get_width(); x++) + { + const vec4F &s = src(x, y); + + avg += vec4F(s[0], s[1], s[2], s[3]); + } + } + + avg /= static_cast(src.get_total_pixels()); + + return avg; + } + + // Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html + vec4F compute_ssim(const imagef &a, const imagef &b) + { + imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3; + + const float C1 = 6.50250f, C2 = 58.52250f; + + pow_image(a, a_sq, vec4F(2)); + pow_image(b, b_sq, vec4F(2)); + mul_image(a, b, axb, vec4F(1.0f)); + + gaussian_filter(mu1, a, 11, 1.5f * 1.5f); + gaussian_filter(mu2, b, 11, 1.5f * 1.5f); + + pow_image(mu1, mu1_sq, vec4F(2)); + pow_image(mu2, mu2_sq, vec4F(2)); + mul_image(mu1, mu2, mu1_mu2, vec4F(1.0f)); + + gaussian_filter(s1_sq, a_sq, 11, 1.5f * 1.5f); + add_weighted_image(s1_sq, vec4F(1), mu1_sq, vec4F(-1), vec4F(0), s1_sq); + + gaussian_filter(s2_sq, b_sq, 11, 1.5f * 1.5f); + add_weighted_image(s2_sq, vec4F(1), mu2_sq, vec4F(-1), vec4F(0), s2_sq); + + gaussian_filter(s12, axb, 11, 1.5f * 1.5f); + add_weighted_image(s12, vec4F(1), mu1_mu2, vec4F(-1), vec4F(0), s12); + + scale_image(mu1_mu2, t1, vec4F(2), vec4F(0)); + adds_image(t1, vec4F(C1), t1); + + scale_image(s12, t2, vec4F(2), vec4F(0)); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t3, vec4F(1)); + + add_image(mu1_sq, mu2_sq, t1); + adds_image(t1, vec4F(C1), t1); + + add_image(s1_sq, s2_sq, t2); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t1, vec4F(1)); + + div_image(t3, t1, smap, vec4F(1)); + + return avg_image(smap); + } + + vec4F compute_ssim(const image &a, const image &b, bool luma) + { + image ta(a), tb(b); + + if ((ta.get_width() != tb.get_width()) || (ta.get_height() != tb.get_height())) + { + debug_printf("compute_ssim: Cropping input images to equal dimensions\n"); + + const uint32_t w = minimum(a.get_width(), b.get_width()); + const uint32_t h = minimum(a.get_height(), b.get_height()); + ta.crop(w, h); + tb.crop(w, h); + } + + if (!ta.get_width() || !ta.get_height()) + { + assert(0); + return vec4F(0); + } + + if (luma) + { + for (uint32_t y = 0; y < ta.get_height(); y++) + { + for (uint32_t x = 0; x < ta.get_width(); x++) + { + ta(x, y).set(ta(x, y).get_709_luma(), ta(x, y).a); + tb(x, y).set(tb(x, y).get_709_luma(), tb(x, y).a); + } + } + } + + imagef fta, ftb; + + fta.set(ta); + ftb.set(tb); + + return compute_ssim(fta, ftb); + } + +} // namespace basisu \ No newline at end of file diff --git a/basisu_ssim.h b/basisu_ssim.h new file mode 100644 index 0000000..54a9bb5 --- /dev/null +++ b/basisu_ssim.h @@ -0,0 +1,44 @@ +// basisu_ssim.h +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_enc.h" + +namespace basisu +{ + float gauss(int x, int y, float sigma_sqr); + + enum + { + cComputeGaussianFlagNormalize = 1, + cComputeGaussianFlagPrint = 2, + cComputeGaussianFlagNormalizeCenterToOne = 4 + }; + + void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags = 0); + + void scale_image(const imagef &src, imagef &dst, const vec4F &scale, const vec4F &shift); + void add_weighted_image(const imagef &src1, const vec4F &alpha, const imagef &src2, const vec4F &beta, const vec4F &gamma, imagef &dst); + void add_image(const imagef &src1, const imagef &src2, imagef &dst); + void adds_image(const imagef &src, const vec4F &value, imagef &dst); + void mul_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale); + void div_image(const imagef &src1, const imagef &src2, imagef &dst, const vec4F &scale); + vec4F avg_image(const imagef &src); + + void gaussian_filter(imagef &dst, const imagef &orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping = false, uint32_t width_divisor = 1, uint32_t height_divisor = 1); + + vec4F compute_ssim(const imagef &a, const imagef &b); + vec4F compute_ssim(const image &a, const image &b, bool luma); + +} // namespace basisu diff --git a/basisu_tool.cpp b/basisu_tool.cpp new file mode 100644 index 0000000..bf088f4 --- /dev/null +++ b/basisu_tool.cpp @@ -0,0 +1,843 @@ +// basisu_tool.cpp +// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "transcoder/basisu.h" +#include "transcoder/basisu_transcoder_internal.h" +#include "basisu_enc.h" +#include "basisu_etc.h" +#include "basisu_gpu_texture.h" +#include "basisu_frontend.h" +#include "basisu_backend.h" +#include "transcoder/basisu_global_selector_palette.h" +#include "basisu_comp.h" +#include "transcoder/basisu_transcoder.h" +#include "basisu_ssim.h" +#if defined(_OPENMP) +#include +#endif + +using namespace basisu; + +#define BASISU_TOOL_VERSION "1.00.00" + +enum tool_mode +{ + cDefault, + cCompress, + cValidate, + cUnpack, + cCompare +}; + +static void print_usage() +{ + printf("\nUsage: basisu filename [filename ...] \n"); + + puts("\n" + "The default mode is compression of one or more PNG files to a .basis file. Alternate modes:\n" + " -unpack: Use transcoder to unpack .basis file to one or more .ktx/.png files\n" + " -validate: Validate and display information about a .basis file\n" + " -compare: Compare two PNG images specified with -file, output PSNR and SSIM statistics and RGB/A delta images\n" + "\n" + "Important: By default, the compressor assumes the input is not sRGB. If the input is sRGB (diffuse/albedo textures, images, etc), be sure to specify -srgb for much better compression.\n" + "\n" + "Options:\n" + " -file filename.png: Input image filename, multiple images are OK, use -file X for each input filename (prefixing input filenames with -file is now optional)\n" + " -alpha_file filename.png: Input alpha image filename, multiple images are OK, use -file X for each input filename (must be paired with -file), images converted to REC709 grayscale and used as input alpha\n" + " -multifile_printf: printf() format strint to use to compose multiple filenames\n" + " -multifile_first: The index of the first file to process, default is 0 (must specify -multifile_printf and -multifile_num))\n" + " -multifile_num: The total number of files to process\n" + " -srgb: Use perceptual colorspace metrics for significantly higher rate distortion performance on sRGB textures. Don't use on non-sRGB inputs.\n" + " -q X: Set quality level, 1-255, default is 128, lower=better compression/lower quality/faster, higher=less compression/higher quality/slower, default is 128\n" + " -output_file filename: Output .basis/.ktx filename\n" + " -output_path: Output .basis/.ktx files to specified directory\n" + " -debug_output: Enable codec debug print to stdout (slightly slower)\n" + " -debug_images: Enable codec debug images (much slower)\n" + " -compute_stats: Compute and display image quality metrics (slightly slower)\n" + " -slower: Enable optional stages in the compressor for slower but higher quality compression\n" + "\n" + "More options:\n" + " -max_endpoint_clusters X: Manually set the max number of color endpoint clusters from 1-8192, use instead of -q\n" + " -max_selector_clusters X: Manually set the max number of color selector clusters from 1-7936, use instead of -q\n" + " -y_flip: Flip input images vertically before compression\n" + " -normal_map: Tunes codec parameters for better quality on normal maps (no selector RDO, no sRGB)\n" + " -no_alpha: Always output non-alpha basis files, even if one or more inputs has alpha\n" + " -force_alpha: Always output alpha basis files, even if no inputs has alpha\n" + " -seperate_rg_to_color_alpha: Seperate input R and G channels to RGB and A (for tangent space XY normal maps)\n" + " -no_multithreading: Disable OpenMP multithreading\n" + "\n" + "Mipmap generation options:\n" + " -mipmap: Generate mipmaps for each source image\n" + " -mip_scale X: Set mipmap filter kernel's scale, lower=sharper, higher=more blurry, default is 1.0\n" + " -mip_filter X: Set mipmap filter kernel, default is kaiser, filters: box, tent, bell, blackman, catmullrom, mitchell, etc.\n" + " -mip_renorm: Renormalize normal map to unit length vectors after filtering\n" + " -mip_clamp: Use clamp addressing on borders, instead of wrapping\n" + " -mip_smallest X: Set smallest pixel dimension for generated mipmaps, default is 1\n" + " -mip_srgb: Convert image to linear before filtering, then back to sRGB\n" + "\n" + "Backend selector RDO codec options:\n" + " -no_selector_rdo: Disable backend's selector rate distortion optimizations (slightly faster, less noisy output, but lower quality per output bit)\n" + " -selector_rdo_thresh X: Set selector RDO quality threshold, default is 1.25, lower is higher quality but less quality per output bit (try 1.0-3.0)\n" + "\n" + "Hierarchical virtual selector codebook options:\n" + " -global_sel_pal: Always use vitual selector palettes (instead of custom palettes), slightly smaller files, but lower quality, slower encoding\n" + " -no_auto_global_sel_pal: Don't automatically use virtual selector palettes on small images\n" + " -no_hybrid_sel_cb: Don't automatically use hybrid virtual selector codebooks (for higher quality, only active when -global_sel_pal is specified)\n" + " -global_pal_bits X: Set virtual selector codebook palette bits, range is [0,12], default is 8, higher is slower/better quality\n" + " -global_mod_bits X: Set virtual selector codebook modifier bits, range is [0,15], defualt is 8, higher is slower/better quality\n" + " -no_endpoint_refinement: Disable endpoint codebook refinement stage (slightly faster, but lower quality)\n" + " -hybrid_sel_cb_quality_thresh X: Set hybrid selector codebook quality threshold, default is 2.0, try 1.5-3, higher is lower quality/smaller codebooks\n" + "\n" + "Various command line examples:\n" + "basisu -srgb -file x.png -mipmap -y_flip : Compress a mipmapped x.basis file from an sRGB image named x.png, Y flip each source image\n" + "basisu -validate -file x.basis : Validate x.basis (check header, check file CRC's, attempt to transcode all slices)\n" + "basisu -unpack -file x.basis : Validates, transcodes and unpacks x.basis to mipmapped .KTX and RGB/A .PNG files (transcodes to all supported GPU texture formats)\n" + "basisu -q 255 -srgb -file x.png -mipmap -debug_output -comput_stats : Compress sRGB x.png to x.basis at quality level 255 with compressor debug output/statistics\n" + "basisu -max_endpoint_clusters 8192 -max_selector_clusters 7936 -file x.png : Compress non-sRGB x.png to x.basis using the largest supported manually specified codebook sizes\n" + "basisu -global_sel_pal -no_hybrid_sel_cb -file x.png : Compress a non-sRGB image, use virtual selector codebooks for improved compression (but slower encoding)\n" + "basisu -global_sel_pal -file x.png: Compress a non-sRGB image, use hybrid selector codebooks for slightly improved compression (but slower encoding)\n" + "basisu -srgb -multifile_printf \"x%02u.png\" -multifile_first 1 -multifile_count 20 : Compress a 20 sRGB source image video sequence (x01.png, x02.png, x03.png, etc.) to x01.basis\n" + "basisu -srgb x.png : Compress sRGB image x.png to x.basis using default settings (multiple filenames OK)\n" + "basisu x.basis : Unpack x.basis to PNG/KTX files (multiple filenames OK)\n" + ); +} + +class command_line_params +{ +public: + command_line_params() : + m_mode(cDefault), + m_multifile_first(0), + m_multifile_num(0) + { + } + + bool parse(int arg_c, const char **arg_v) + { + int arg_index = 1; + while (arg_index < arg_c) + { + const char *pArg = arg_v[arg_index]; + const int num_remaining_args = arg_c - (arg_index + 1); + int arg_count = 1; + +#define REMAINING_ARGS_CHECK(n) if (num_remaining_args < (n)) { error_printf("Error: Expected %u values to follow %s!\n", n, pArg); return false; } + + if (strcasecmp(pArg, "-compress") == 0) + m_mode = cCompress; + else if (strcasecmp(pArg, "-compare") == 0) + m_mode = cCompare; + else if (strcasecmp(pArg, "-unpack") == 0) + m_mode = cUnpack; + else if (strcasecmp(pArg, "-validate") == 0) + m_mode = cValidate; + else if (strcasecmp(pArg, "-file") == 0) + { + REMAINING_ARGS_CHECK(1); + m_input_filenames.push_back(std::string(arg_v[arg_index + 1])); + arg_count++; + } + else if (strcasecmp(pArg, "-alpha_file") == 0) + { + REMAINING_ARGS_CHECK(1); + m_input_alpha_filenames.push_back(std::string(arg_v[arg_index + 1])); + arg_count++; + } + else if (strcasecmp(pArg, "-multifile_printf") == 0) + { + REMAINING_ARGS_CHECK(1); + m_multifile_printf = std::string(arg_v[arg_index + 1]); + arg_count++; + } + else if (strcasecmp(pArg, "-multifile_first") == 0) + { + REMAINING_ARGS_CHECK(1); + m_multifile_first = atoi(arg_v[arg_index + 1]); + arg_count++; + } + else if (strcasecmp(pArg, "-multifile_num") == 0) + { + REMAINING_ARGS_CHECK(1); + m_multifile_num = atoi(arg_v[arg_index + 1]); + arg_count++; + } + else if (strcasecmp(pArg, "-srgb") == 0) + m_comp_params.m_perceptual = true; + else if (strcasecmp(pArg, "-q") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_quality_level = clamp(atoi(arg_v[arg_index + 1]), BASISU_QUALITY_MIN, BASISU_QUALITY_MAX); + arg_count++; + } + else if (strcasecmp(pArg, "-output_file") == 0) + { + REMAINING_ARGS_CHECK(1); + m_output_filename = arg_v[arg_index + 1]; + arg_count++; + } + else if (strcasecmp(pArg, "-output_path") == 0) + { + REMAINING_ARGS_CHECK(1); + m_output_path = arg_v[arg_index + 1]; + arg_count++; + } + else if (strcasecmp(pArg, "-debug_output") == 0) + { + m_comp_params.m_debug = true; + enable_debug_printf(true); + } + else if (strcasecmp(pArg, "-debug_images") == 0) + m_comp_params.m_debug_images = true; + else if (strcasecmp(pArg, "-compute_stats") == 0) + m_comp_params.m_compute_stats = true; + else if (strcasecmp(pArg, "-slower") == 0) + m_comp_params.m_faster = false; + else if (strcasecmp(pArg, "-max_endpoint_clusters") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_max_endpoint_clusters = clamp(atoi(arg_v[arg_index + 1]), 1, BASISU_MAX_ENDPOINT_CLUSTERS); + arg_count++; + } + else if (strcasecmp(pArg, "-max_selector_clusters") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_max_selector_clusters = clamp(atoi(arg_v[arg_index + 1]), 1, BASISU_MAX_SELECTOR_CLUSTERS); + arg_count++; + } + else if (strcasecmp(pArg, "-y_flip") == 0) + m_comp_params.m_y_flip = true; + else if (strcasecmp(pArg, "-normal_map") == 0) + { + m_comp_params.m_perceptual = false; + m_comp_params.m_mip_srgb = false; + m_comp_params.m_no_selector_rdo = true; + } + else if (strcasecmp(pArg, "-no_alpha") == 0) + m_comp_params.m_check_for_alpha = false; + else if (strcasecmp(pArg, "-force_alpha") == 0) + m_comp_params.m_force_alpha = true; + else if (strcasecmp(pArg, "-seperate_rg_to_color_alpha") == 0) + m_comp_params.m_seperate_rg_to_color_alpha = true; + else if (strcasecmp(pArg, "-no_multithreading") == 0) + { +#if defined(_OPENMP) + omp_set_num_threads(1); +#endif + } + else if (strcasecmp(pArg, "-mipmap") == 0) + m_comp_params.m_mip_gen = true; + else if (strcasecmp(pArg, "-mip_scale") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_mip_scale = (float)atof(arg_v[arg_index + 1]); + arg_count++; + } + else if (strcasecmp(pArg, "-mip_filter") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_mip_filter = arg_v[arg_index + 1]; + // TODO: Check filter + arg_count++; + } + else if (strcasecmp(pArg, "-mip_renorm") == 0) + m_comp_params.m_mip_renormalize = true; + else if (strcasecmp(pArg, "-mip_clamp") == 0) + m_comp_params.m_mip_wrapping = false; + else if (strcasecmp(pArg, "-mip_smallest") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_mip_smallest_dimension = atoi(arg_v[arg_index + 1]); + arg_count++; + } + else if (strcasecmp(pArg, "-mip_srgb") == 0) + m_comp_params.m_mip_srgb = true; + else if (strcasecmp(pArg, "-no_selector_rdo") == 0) + m_comp_params.m_no_selector_rdo = true; + else if (strcasecmp(pArg, "-selector_rdo_thresh") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_selector_rdo_thresh = (float)atof(arg_v[arg_index + 1]); + arg_count++; + } + else if (strcasecmp(pArg, "-global_sel_pal") == 0) + m_comp_params.m_global_sel_pal = true; + else if (strcasecmp(pArg, "-no_endpoint_refinement") == 0) + m_comp_params.m_no_endpoint_refinement = true; + else if (strcasecmp(pArg, "-no_auto_global_sel_pal") == 0) + m_comp_params.m_no_auto_global_sel_pal = true; + else if (strcasecmp(pArg, "-global_pal_bits") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_global_pal_bits = atoi(arg_v[arg_index + 1]); + arg_count++; + } + else if (strcasecmp(pArg, "-global_mod_bits") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_global_mod_bits = atoi(arg_v[arg_index + 1]); + arg_count++; + } + else if (strcasecmp(pArg, "-no_hybrid_sel_cb") == 0) + m_comp_params.m_no_hybrid_sel_cb = true; + else if (strcasecmp(pArg, "-hybrid_sel_cb_quality_thresh") == 0) + { + REMAINING_ARGS_CHECK(1); + m_comp_params.m_hybrid_sel_cb_quality_thresh = (float)atof(arg_v[arg_index + 1]); + arg_count++; + } + else + { + // Let's assume it's a source filename, so globbing works + //error_printf("Unrecognized command line option: %s\n", pArg); + m_input_filenames.push_back(pArg); + } + + arg_index += arg_count; + } + + if (m_comp_params.m_quality_level != -1) + { + m_comp_params.m_max_endpoint_clusters = 0; + m_comp_params.m_max_selector_clusters = 0; + } + else if ((!m_comp_params.m_max_endpoint_clusters) || (!m_comp_params.m_max_selector_clusters)) + { + m_comp_params.m_max_endpoint_clusters = 0; + m_comp_params.m_max_selector_clusters = 0; + + m_comp_params.m_quality_level = 128; + } + + return true; + } + + basis_compressor_params m_comp_params; + + tool_mode m_mode; + + std::vector m_input_filenames; + std::vector m_input_alpha_filenames; + + std::string m_output_filename; + std::string m_output_path; + + std::string m_multifile_printf; + uint32_t m_multifile_first; + uint32_t m_multifile_num; +}; + +static bool expand_multifile(command_line_params &opts) +{ + if (!opts.m_multifile_printf.size()) + return true; + + if (!opts.m_multifile_num) + { + error_printf("Error: -multifile_printf specified, but not -multifile_num\n"); + return false; + } + + std::string fmt(opts.m_multifile_printf); + size_t x = fmt.find_first_of('!'); + if (x != std::string::npos) + fmt[x] = '%'; + + if (string_find_right(fmt, '%') == -1) + { + error_printf("Error: Must include C-style printf() format character '%' in -multifile_printf string\n"); + return false; + } + + for (uint32_t i = opts.m_multifile_first; i < opts.m_multifile_first + opts.m_multifile_num; i++) + { + char buf[1024]; +#ifdef _WIN32 + sprintf_s(buf, sizeof(buf), fmt.c_str(), i); +#else + snprintf(buf, sizeof(buf), fmt.c_str(), i); +#endif + + if (buf[0]) + opts.m_input_filenames.push_back(buf); + } + + return true; +} + +static bool compress_mode(command_line_params &opts) +{ + basist::etc1_global_selector_codebook sel_codebook(basist::g_global_selector_cb_size, basist::g_global_selector_cb); + + if (!expand_multifile(opts)) + return false; + + if (!opts.m_input_filenames.size()) + { + error_printf("No input files to process!\n"); + return false; + } + + basis_compressor_params ¶ms = opts.m_comp_params; + + params.m_source_filenames = opts.m_input_filenames; + params.m_source_alpha_filenames = opts.m_input_alpha_filenames; + + params.m_read_source_images = true; + params.m_write_output_basis_files = true; + params.m_pSel_codebook = &sel_codebook; + + if (opts.m_output_filename.size()) + params.m_out_filename = opts.m_output_filename; + else + { + std::string filename; + + string_get_filename(opts.m_input_filenames[0].c_str(), filename); + string_remove_extension(filename); + filename += ".basis"; + + if (opts.m_output_path.size()) + string_combine_path(filename, opts.m_output_path.c_str(), filename.c_str()); + + params.m_out_filename = filename; + } + + basis_compressor c; + + if (!c.init(opts.m_comp_params)) + { + error_printf("basis_compressor::init() failed!\n"); + return false; + } + + basis_compressor::error_code ec = c.process(); + if (ec != basis_compressor::cECSuccess) + { + switch (ec) + { + case basis_compressor::cECFailedReadingSourceImages: + error_printf("Compressor failed reading a source image!\n"); + break; + case basis_compressor::cECFailedFrontEnd: + error_printf("Compressor frontend stage failed!\n"); + break; + case basis_compressor::cECFailedFontendExtract: + error_printf("Compressor frontend data extraction failed!\n"); + break; + case basis_compressor::cECFailedBackend: + error_printf("Compressor backend stage failed!\n"); + break; + case basis_compressor::cECFailedCreateBasisFile: + error_printf("Compressor failed creating Basis file data!\n"); + break; + case basis_compressor::cECFailedWritingOutput: + error_printf("Compressor failed writing to output Basis file!\n"); + break; + default: + error_printf("basis_compress::process() failed!\n"); + break; + } + + return false; + } + + printf("Compression succeeded\n"); + + return true; +} + +static bool unpack_and_validate_mode(command_line_params &opts, bool validate_flag) +{ + basist::etc1_global_selector_codebook sel_codebook(basist::g_global_selector_cb_size, basist::g_global_selector_cb); + + if (!opts.m_input_filenames.size()) + { + error_printf("No input files to process!\n"); + return false; + } + + for (uint32_t file_index = 0; file_index < opts.m_input_filenames.size(); file_index++) + { + const char *pInput_filename = opts.m_input_filenames[file_index].c_str(); + + std::string base_filename; + string_split_path(pInput_filename, nullptr, nullptr, &base_filename, nullptr); + + uint8_vec basis_data; + if (!basisu::read_file_to_vec(pInput_filename, basis_data)) + { + error_printf("Failed reading file \"%s\"\n", pInput_filename); + return false; + } + + printf("Input file \"%s\"\n", pInput_filename); + + if (!basis_data.size()) + { + error_printf("File is empty!\n"); + return false; + } + + if (basis_data.size() > UINT32_MAX) + { + error_printf("File is too large!\n"); + return false; + } + + basist::basisu_transcoder dec(&sel_codebook); + + // Validate the file - note this isn't necessary for transcoding + if (!dec.validate_file_checksums(&basis_data[0], (uint32_t)basis_data.size(), true)) + { + error_printf("File failed CRC checks!\n"); + return false; + } + + printf("File CRC checks succeeded\n"); + + basist::basisu_file_info fileinfo; + if (!dec.get_file_info(&basis_data[0], (uint32_t)basis_data.size(), fileinfo)) + { + error_printf("Failed retrieving Basis file information!\n"); + return false; + } + + assert(fileinfo.m_total_images == fileinfo.m_image_mipmap_levels.size()); + assert(fileinfo.m_total_images == dec.get_total_images(&basis_data[0], (uint32_t)basis_data.size())); + + printf("File info:\n"); + printf(" Version: %X\n", fileinfo.m_version); + printf(" Total header size: %u\n", fileinfo.m_total_header_size); + printf(" Total selectors: %u\n", fileinfo.m_total_selectors); + printf(" Selector codebook size: %u\n", fileinfo.m_selector_codebook_size); + printf(" Total endpoints: %u\n", fileinfo.m_total_endpoints); + printf(" Endpoint codebook size: %u\n", fileinfo.m_endpoint_codebook_size); + printf(" Tables size: %u\n", fileinfo.m_tables_size); + printf(" Slices size: %u\n", fileinfo.m_slices_size); + printf(" Total slices: %u\n", (uint32_t)fileinfo.m_slice_info.size()); + printf(" Total images: %i\n", fileinfo.m_total_images); + printf(" Image mipmap levels: "); + for (uint32_t i = 0; i < fileinfo.m_total_images; i++) + printf("%u ", fileinfo.m_image_mipmap_levels[i]); + printf("\n"); + printf(" Y Flipped: %u, Has alpha slices: %u\n", fileinfo.m_y_flipped, fileinfo.m_has_alpha_slices); + + if (!dec.start_decoding(&basis_data[0], (uint32_t)basis_data.size())) + { + error_printf("start_decoding() failed!\n"); + return false; + } + + std::vector< gpu_image_vec > gpu_images[basist::cTFTotalTextureFormats]; + + for (int format_iter = 0; format_iter < basist::cTFTotalTextureFormats; format_iter++) + { + basist::transcoder_texture_format tex_fmt = static_cast(format_iter); + + gpu_images[tex_fmt].resize(fileinfo.m_total_images); + + for (uint32_t image_index = 0; image_index < fileinfo.m_total_images; image_index++) + gpu_images[tex_fmt][image_index].resize(fileinfo.m_image_mipmap_levels[image_index]); + } + + bool pvrtc_nonpow2_warning = false; + + // Now transcode the file to all supported texture formats and save mipmapped KTX files + for (uint32_t image_index = 0; image_index < fileinfo.m_total_images; image_index++) + { + for (uint32_t level_index = 0; level_index < fileinfo.m_image_mipmap_levels[image_index]; level_index++) + { + basist::basisu_image_level_info level_info; + + if (!dec.get_image_level_info(&basis_data[0], (uint32_t)basis_data.size(), level_info, image_index, level_index)) + { + error_printf("Failed retrieving image level information (%u %u)!\n", image_index, level_index); + return false; + } + + for (int format_iter = 0; format_iter < basist::cTFTotalTextureFormats; format_iter++) + { + const basist::transcoder_texture_format transcoder_tex_fmt = static_cast(format_iter); + + if (transcoder_tex_fmt == basist::cTFPVRTC1_4_OPAQUE_ONLY) + { + if (!is_pow2(level_info.m_width) || !is_pow2(level_info.m_height)) + { + if (!pvrtc_nonpow2_warning) + { + pvrtc_nonpow2_warning = true; + + printf("Warning: Will not transcode image %u level %u res %ux%u to PVRTC1 (one or more dimension is not a power of 2)\n", image_index, level_index, level_info.m_width, level_info.m_height); + } + + // Can't transcode this image level to PVRTC because it's not a pow2 (we're going to support transcoding non-pow2 to the next larger pow2 soon) + continue; + } + } + + basisu::texture_format tex_fmt = basis_get_basisu_texture_format(transcoder_tex_fmt); + + gpu_image &gi = gpu_images[transcoder_tex_fmt][image_index][level_index]; + gi.init(tex_fmt, level_info.m_orig_width, level_info.m_orig_height); + + if (!dec.transcode_image_level(&basis_data[0], (uint32_t)basis_data.size(), image_index, level_index, gi.get_ptr(), gi.get_total_blocks(), transcoder_tex_fmt, 0)) + { + error_printf("Failed transcoding image level (%u %u %u)!\n", image_index, level_index, format_iter); + return false; + } + + printf("Transcode of image %u level %u res %ux%u format %s succeeded\n", image_index, level_index, level_info.m_orig_width, level_info.m_orig_height, basist::basis_get_format_name(transcoder_tex_fmt)); + + } // format_iter + + } // level_index + + } // image_info + + if (!validate_flag) + { + // Now write KTX files and unpack them to individual PNG's + + for (int format_iter = 0; format_iter < basist::cTFTotalTextureFormats; format_iter++) + { + const basist::transcoder_texture_format transcoder_tex_fmt = static_cast(format_iter); + + for (uint32_t image_index = 0; image_index < fileinfo.m_total_images; image_index++) + { + gpu_image_vec &gi = gpu_images[format_iter][image_index]; + + if (!gi.size()) + continue; + + uint32_t level; + for (level = 0; level < gi.size(); level++) + if (!gi[level].get_total_blocks()) + break; + + if (level < gi.size()) + continue; + + std::string ktx_filename(base_filename + string_format("_transcoded_%s_%u.ktx", basist::basis_get_format_name(transcoder_tex_fmt), image_index)); + if (!write_compressed_texture_file(ktx_filename, gi)) + { + error_printf("Failed writing KTX file \"%s\"!\n", ktx_filename.c_str()); + return false; + } + printf("Wrote KTX file \"%s\"\n", ktx_filename.c_str()); + + for (uint32_t level_index = 0; level_index < gi.size(); level_index++) + { + basist::basisu_image_level_info level_info; + + if (!dec.get_image_level_info(&basis_data[0], (uint32_t)basis_data.size(), level_info, image_index, level_index)) + { + error_printf("Failed retrieving image level information (%u %u)!\n", image_index, level_index); + return false; + } + + image u; + if (!gi[level_index].unpack(u)) + { + error_printf("Failed unpacking GPU texture data (%u %u %u)\n", format_iter, image_index, level_index); + return false; + } + //u.crop(level_info.m_orig_width, level_info.m_orig_height); + + std::string rgb_filename(base_filename + string_format("_unpacked_rgb_%s_%u_%u.png", basist::basis_get_format_name(transcoder_tex_fmt), image_index, level_index)); + if (!save_png(rgb_filename, u, cImageSaveIgnoreAlpha)) + { + error_printf("Failed writing to PNG file \"%s\"\n", rgb_filename.c_str()); + return false; + } + printf("Wrote PNG file \"%s\"\n", rgb_filename.c_str()); + + if (basis_transcoder_format_has_alpha(transcoder_tex_fmt)) + { + std::string a_filename(base_filename + string_format("_unpacked_a_%s_%u_%u.png", basist::basis_get_format_name(transcoder_tex_fmt), image_index, level_index)); + if (!save_png(a_filename, u, cImageSaveGrayscale, 3)) + { + error_printf("Failed writing to PNG file \"%s\"\n", a_filename.c_str()); + return false; + } + printf("Wrote PNG file \"%s\"\n", a_filename.c_str()); + } + + } // level_index + + } // image_index + + } // format_iter + } // if (!validate_flag) + + } // image_index + + printf("Success\n"); + + return true; +} + +static bool compare_mode(command_line_params &opts) +{ + if (opts.m_input_filenames.size() != 2) + { + error_printf("Must specify two PNG filenames using -file\n"); + return false; + } + + image a, b; + if (!load_png(opts.m_input_filenames[0].c_str(), a)) + { + error_printf("Failed loading image from file \"%s\"!\n", opts.m_input_filenames[0].c_str()); + return false; + } + + printf("Loaded \"%s\", %ux%u, has alpha: %u\n", opts.m_input_filenames[0].c_str(), a.get_width(), a.get_height(), a.has_alpha()); + + if (!load_png(opts.m_input_filenames[1].c_str(), b)) + { + error_printf("Failed loading image from file \"%s\"!\n", opts.m_input_filenames[1].c_str()); + return false; + } + + printf("Loaded \"%s\", %ux%u, has alpha: %u\n", opts.m_input_filenames[1].c_str(), b.get_width(), b.get_height(), b.has_alpha()); + + if ((a.get_width() != b.get_width()) || (a.get_height() != b.get_height())) + { + printf("Images don't have the same dimensions - cropping input images to smallest common dimensions\n"); + + uint32_t w = minimum(a.get_width(), b.get_width()); + uint32_t h = minimum(a.get_height(), b.get_height()); + + a.crop(w, h); + b.crop(w, h); + } + + printf("Comparison image res: %ux%u\n", a.get_width(), a.get_height()); + + image_metrics im; + im.calc(a, b, 0, 3); + im.print("RGB "); + + im.calc(a, b, 0, 1); + im.print("R "); + + im.calc(a, b, 1, 1); + im.print("G "); + + im.calc(a, b, 2, 1); + im.print("B "); + + im.calc(a, b, 0, 0); + im.print("Y " ); + + vec4F s_rgb(compute_ssim(a, b, false)); + + printf("R SSIM: %f\n", s_rgb[0]); + printf("G SSIM: %f\n", s_rgb[1]); + printf("B SSIM: %f\n", s_rgb[2]); + printf("RGB Avg SSIM: %f\n", (s_rgb[0] + s_rgb[1] + s_rgb[2]) / 3.0f); + printf("A SSIM: %f\n", s_rgb[3]); + + vec4F s_y(compute_ssim(a, b, true)); + printf("Y SSIM: %f\n", s_y[0]); + + image delta_img(a.get_width(), a.get_height()); + + const int X = 2; + +#pragma omp parallel for + for (int y = 0; y < (int)a.get_height(); y++) + { + for (uint32_t x = 0; x < a.get_width(); x++) + { + color_rgba &d = delta_img(x, y); + + for (int c = 0; c < 4; c++) + d[c] = (uint8_t)clamp((a(x, y)[c] - b(x, y)[c]) * X + 128, 0, 255); + } // x + } // y + + save_png("a_rgb.png", a, cImageSaveIgnoreAlpha); + save_png("a_alpha.png", a, cImageSaveGrayscale, 3); + printf("Wrote a_rgb.png and a_alpha.png\n"); + + save_png("b_rgb.png", b, cImageSaveIgnoreAlpha); + save_png("b_alpha.png", b, cImageSaveGrayscale, 3); + printf("Wrote b_rgb.png and b_alpha.png\n"); + + save_png("delta_img_rgb.png", delta_img, cImageSaveIgnoreAlpha); + printf("Wrote delta_img_rgb.png\n"); + + save_png("delta_img_a.png", delta_img, cImageSaveGrayscale, 3); + printf("Wrote delta_img_a.png\n"); + + return true; +} + +int main(int argc, const char **argv) +{ + basisu_encoder_init(); + + printf("Basis Universal GPU Texture Compressor v" BASISU_TOOL_VERSION ", Copyright (C) 2017-2019 Binomial LLC, All rights reserved\n"); + + if (argc == 1) + { + print_usage(); + return EXIT_FAILURE; + } + + command_line_params opts; + if (!opts.parse(argc, argv)) + { + print_usage(); + return EXIT_FAILURE; + } + + if (opts.m_mode == cDefault) + { + for (size_t i = 0; i < opts.m_input_filenames.size(); i++) + { + std::string ext(string_get_extension(opts.m_input_filenames[i])); + if (strcasecmp(ext.c_str(), "basis") == 0) + { + // If they haven't specified any modes, and they give us a .basis file, then assume they want to unpack it. + opts.m_mode = cUnpack; + break; + } + } + } + + bool status = false; + + switch (opts.m_mode) + { + case cDefault: + case cCompress: + status = compress_mode(opts); + break; + case cValidate: + status = unpack_and_validate_mode(opts, true); + break; + case cUnpack: + status = unpack_and_validate_mode(opts, false); + break; + case cCompare: + status = compare_mode(opts); + break; + default: + assert(0); + break; + } + + return status ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/build_clang.sh b/build_clang.sh new file mode 100644 index 0000000..91e8a79 --- /dev/null +++ b/build_clang.sh @@ -0,0 +1 @@ +cmake -D CMAKE_C_COMPILER=/usr/bin/clang -D CMAKE_CXX_COMPILER=/usr/bin/clang++ . diff --git a/lodepng.cpp b/lodepng.cpp new file mode 100644 index 0000000..b4087d6 --- /dev/null +++ b/lodepng.cpp @@ -0,0 +1,5999 @@ +/* +LodePNG version 20190210 + +Copyright (c) 2005-2019 Lode Vandevenne + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + +/* +The manual and changelog are in the header file "lodepng.h" +Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C. +*/ + +#ifdef _MSC_VER +#pragma warning (disable : 4201) +#define _SECURE_SCL 0 +#define _HAS_ITERATOR_DEBUGGING 0 +#define _ITERATOR_DEBUG_LEVEL 0 +#endif + +#include "lodepng.h" + +#include /* LONG_MAX */ +#include /* file handling */ +#include /* allocations */ + +#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/ +#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/ +#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/ +#endif /*_MSC_VER */ + +const char* LODEPNG_VERSION_STRING = "20190210"; + +/* +This source file is built up in the following large parts. The code sections +with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way. +-Tools for C and common code for PNG and Zlib +-C Code for Zlib (huffman, deflate, ...) +-C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...) +-The C++ wrapper around all of the above +*/ + +/*The malloc, realloc and free functions defined here with "lodepng_" in front +of the name, so that you can easily change them to others related to your +platform if needed. Everything else in the code calls these. Pass +-DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out +#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and +define them in your own project's source files without needing to change +lodepng source code. Don't forget to remove "static" if you copypaste them +from here.*/ + +#ifdef LODEPNG_COMPILE_ALLOCATORS +static void* lodepng_malloc(size_t size) { +#ifdef LODEPNG_MAX_ALLOC + if(size > LODEPNG_MAX_ALLOC) return 0; +#endif + return malloc(size); +} + +static void* lodepng_realloc(void* ptr, size_t new_size) { +#ifdef LODEPNG_MAX_ALLOC + if(new_size > LODEPNG_MAX_ALLOC) return 0; +#endif + return realloc(ptr, new_size); +} + +static void lodepng_free(void* ptr) { + free(ptr); +} +#else /*LODEPNG_COMPILE_ALLOCATORS*/ +void* lodepng_malloc(size_t size); +void* lodepng_realloc(void* ptr, size_t new_size); +void lodepng_free(void* ptr); +#endif /*LODEPNG_COMPILE_ALLOCATORS*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* // Tools for C, and common code for PNG and Zlib. // */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#define LODEPNG_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b)) + +/* +Often in case of an error a value is assigned to a variable and then it breaks +out of a loop (to go to the cleanup phase of a function). This macro does that. +It makes the error handling code shorter and more readable. + +Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83); +*/ +#define CERROR_BREAK(errorvar, code){\ + errorvar = code;\ + break;\ +} + +/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/ +#define ERROR_BREAK(code) CERROR_BREAK(error, code) + +/*Set error var to the error code, and return it.*/ +#define CERROR_RETURN_ERROR(errorvar, code){\ + errorvar = code;\ + return code;\ +} + +/*Try the code, if it returns error, also return the error.*/ +#define CERROR_TRY_RETURN(call){\ + unsigned error = call;\ + if(error) return error;\ +} + +/*Set error var to the error code, and return from the void function.*/ +#define CERROR_RETURN(errorvar, code){\ + errorvar = code;\ + return;\ +} + +/* +About uivector, ucvector and string: +-All of them wrap dynamic arrays or text strings in a similar way. +-LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version. +-The string tools are made to avoid problems with compilers that declare things like strncat as deprecated. +-They're not used in the interface, only internally in this file as static functions. +-As with many other structs in this file, the init and cleanup functions serve as ctor and dtor. +*/ + +#ifdef LODEPNG_COMPILE_ZLIB +/*dynamic vector of unsigned ints*/ +typedef struct uivector { + unsigned* data; + size_t size; /*size in number of unsigned longs*/ + size_t allocsize; /*allocated size in bytes*/ +} uivector; + +static void uivector_cleanup(void* p) { + ((uivector*)p)->size = ((uivector*)p)->allocsize = 0; + lodepng_free(((uivector*)p)->data); + ((uivector*)p)->data = NULL; +} + +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned uivector_reserve(uivector* p, size_t allocsize) { + if(allocsize > p->allocsize) { + size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2); + void* data = lodepng_realloc(p->data, newsize); + if(data) { + p->allocsize = newsize; + p->data = (unsigned*)data; + } + else return 0; /*error: not enough memory*/ + } + return 1; +} + +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned uivector_resize(uivector* p, size_t size) { + if(!uivector_reserve(p, size * sizeof(unsigned))) return 0; + p->size = size; + return 1; /*success*/ +} + +/*resize and give all new elements the value*/ +static unsigned uivector_resizev(uivector* p, size_t size, unsigned value) { + size_t oldsize = p->size, i; + if(!uivector_resize(p, size)) return 0; + for(i = oldsize; i < size; ++i) p->data[i] = value; + return 1; +} + +static void uivector_init(uivector* p) { + p->data = NULL; + p->size = p->allocsize = 0; +} + +#ifdef LODEPNG_COMPILE_ENCODER +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned uivector_push_back(uivector* p, unsigned c) { + if(!uivector_resize(p, p->size + 1)) return 0; + p->data[p->size - 1] = c; + return 1; +} +#endif /*LODEPNG_COMPILE_ENCODER*/ +#endif /*LODEPNG_COMPILE_ZLIB*/ + +/* /////////////////////////////////////////////////////////////////////////// */ + +/*dynamic vector of unsigned chars*/ +typedef struct ucvector { + unsigned char* data; + size_t size; /*used size*/ + size_t allocsize; /*allocated size*/ +} ucvector; + +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned ucvector_reserve(ucvector* p, size_t allocsize) { + if(allocsize > p->allocsize) { + size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2); + void* data = lodepng_realloc(p->data, newsize); + if(data) { + p->allocsize = newsize; + p->data = (unsigned char*)data; + } + else return 0; /*error: not enough memory*/ + } + return 1; +} + +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned ucvector_resize(ucvector* p, size_t size) { + if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0; + p->size = size; + return 1; /*success*/ +} + +#ifdef LODEPNG_COMPILE_PNG + +static void ucvector_cleanup(void* p) { + ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0; + lodepng_free(((ucvector*)p)->data); + ((ucvector*)p)->data = NULL; +} + +static void ucvector_init(ucvector* p) { + p->data = NULL; + p->size = p->allocsize = 0; +} +#endif /*LODEPNG_COMPILE_PNG*/ + +#ifdef LODEPNG_COMPILE_ZLIB +/*you can both convert from vector to buffer&size and vica versa. If you use +init_buffer to take over a buffer and size, it is not needed to use cleanup*/ +static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size) { + p->data = buffer; + p->allocsize = p->size = size; +} +#endif /*LODEPNG_COMPILE_ZLIB*/ + +#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER) +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned ucvector_push_back(ucvector* p, unsigned char c) { + if(!ucvector_resize(p, p->size + 1)) return 0; + p->data[p->size - 1] = c; + return 1; +} +#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/ + + +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_PNG +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + +/*free string pointer and set it to NULL*/ +static void string_cleanup(char** out) { + lodepng_free(*out); + *out = NULL; +} + +/* dynamically allocates a new string with a copy of the null terminated input text */ +static char* alloc_string(const char* in) { + size_t insize = strlen(in); + char* out = (char*)lodepng_malloc(insize + 1); + if(out) { + size_t i; + for(i = 0; i != insize; ++i) { + out[i] = in[i]; + } + out[i] = 0; + } + return out; +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +#endif /*LODEPNG_COMPILE_PNG*/ + +/* ////////////////////////////////////////////////////////////////////////// */ + +unsigned lodepng_read32bitInt(const unsigned char* buffer) { + return (unsigned)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]); +} + +#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER) +/*buffer must have at least 4 allocated bytes available*/ +static void lodepng_set32bitInt(unsigned char* buffer, unsigned value) { + buffer[0] = (unsigned char)((value >> 24) & 0xff); + buffer[1] = (unsigned char)((value >> 16) & 0xff); + buffer[2] = (unsigned char)((value >> 8) & 0xff); + buffer[3] = (unsigned char)((value ) & 0xff); +} +#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/ + +#ifdef LODEPNG_COMPILE_ENCODER +static void lodepng_add32bitInt(ucvector* buffer, unsigned value) { + ucvector_resize(buffer, buffer->size + 4); /*todo: give error if resize failed*/ + lodepng_set32bitInt(&buffer->data[buffer->size - 4], value); +} +#endif /*LODEPNG_COMPILE_ENCODER*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / File IO / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_DISK + +/* returns negative value on error. This should be pure C compatible, so no fstat. */ +static long lodepng_filesize(const char* filename) { + FILE* file; + long size; + file = fopen(filename, "rb"); + if(!file) return -1; + + if(fseek(file, 0, SEEK_END) != 0) { + fclose(file); + return -1; + } + + size = ftell(file); + /* It may give LONG_MAX as directory size, this is invalid for us. */ + if(size == LONG_MAX) size = -1; + + fclose(file); + return size; +} + +/* load file into buffer that already has the correct allocated size. Returns error code.*/ +static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename) { + FILE* file; + size_t readsize; + file = fopen(filename, "rb"); + if(!file) return 78; + + readsize = fread(out, 1, size, file); + fclose(file); + + if (readsize != size) return 78; + return 0; +} + +unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) { + long size = lodepng_filesize(filename); + if (size < 0) return 78; + *outsize = (size_t)size; + + *out = (unsigned char*)lodepng_malloc((size_t)size); + if(!(*out) && size > 0) return 83; /*the above malloc failed*/ + + return lodepng_buffer_file(*out, (size_t)size, filename); +} + +/*write given buffer to the file, overwriting the file, it doesn't append to it.*/ +unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) { + FILE* file; + file = fopen(filename, "wb" ); + if(!file) return 79; + fwrite(buffer, 1, buffersize, file); + fclose(file); + return 0; +} + +#endif /*LODEPNG_COMPILE_DISK*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* // End of common code and tools. Begin of Zlib related code. // */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_ZLIB +#ifdef LODEPNG_COMPILE_ENCODER +/*TODO: this ignores potential out of memory errors*/ +#define addBitToStream(/*size_t**/ bitpointer, /*ucvector**/ bitstream, /*unsigned char*/ bit){\ + /*add a new byte at the end*/\ + if(((*bitpointer) & 7) == 0) ucvector_push_back(bitstream, (unsigned char)0);\ + /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/\ + (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7));\ + ++(*bitpointer);\ +} + +static void addBitsToStream(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits) { + size_t i; + for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1)); +} + +static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits) { + size_t i; + for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1)); +} +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#ifdef LODEPNG_COMPILE_DECODER + +#define READBIT(bitpointer, bitstream) ((bitstream[bitpointer >> 3] >> (bitpointer & 0x7)) & (unsigned char)1) + +static unsigned char readBitFromStream(size_t* bitpointer, const unsigned char* bitstream) { + unsigned char result = (unsigned char)(READBIT(*bitpointer, bitstream)); + ++(*bitpointer); + return result; +} + +static unsigned readBitsFromStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) { + unsigned result = 0, i; + for(i = 0; i != nbits; ++i) { + result += ((unsigned)READBIT(*bitpointer, bitstream)) << i; + ++(*bitpointer); + } + return result; +} +#endif /*LODEPNG_COMPILE_DECODER*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Deflate - Huffman / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#define FIRST_LENGTH_CODE_INDEX 257 +#define LAST_LENGTH_CODE_INDEX 285 +/*256 literals, the end code, some length codes, and 2 unused codes*/ +#define NUM_DEFLATE_CODE_SYMBOLS 288 +/*the distance codes have their own symbols, 30 used, 2 unused*/ +#define NUM_DISTANCE_SYMBOLS 32 +/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/ +#define NUM_CODE_LENGTH_CODES 19 + +/*the base lengths represented by codes 257-285*/ +static const unsigned LENGTHBASE[29] + = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258}; + +/*the extra bits used by codes 257-285 (added to base length)*/ +static const unsigned LENGTHEXTRA[29] + = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 0}; + +/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/ +static const unsigned DISTANCEBASE[30] + = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, + 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; + +/*the extra bits of backwards distances (added to base)*/ +static const unsigned DISTANCEEXTRA[30] + = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; + +/*the order in which "code length alphabet code lengths" are stored, out of this +the huffman tree of the dynamic huffman tree lengths is generated*/ +static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES] + = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +/* ////////////////////////////////////////////////////////////////////////// */ + +/* +Huffman tree struct, containing multiple representations of the tree +*/ +typedef struct HuffmanTree { + unsigned* tree2d; + unsigned* tree1d; + unsigned* lengths; /*the lengths of the codes of the 1d-tree*/ + unsigned maxbitlen; /*maximum number of bits a single code can get*/ + unsigned numcodes; /*number of symbols in the alphabet = number of codes*/ +} HuffmanTree; + +/*function used for debug purposes to draw the tree in ascii art with C++*/ +/* +static void HuffmanTree_draw(HuffmanTree* tree) { + std::cout << "tree. length: " << tree->numcodes << " maxbitlen: " << tree->maxbitlen << std::endl; + for(size_t i = 0; i != tree->tree1d.size; ++i) { + if(tree->lengths.data[i]) + std::cout << i << " " << tree->tree1d.data[i] << " " << tree->lengths.data[i] << std::endl; + } + std::cout << std::endl; +}*/ + +static void HuffmanTree_init(HuffmanTree* tree) { + tree->tree2d = 0; + tree->tree1d = 0; + tree->lengths = 0; +} + +static void HuffmanTree_cleanup(HuffmanTree* tree) { + lodepng_free(tree->tree2d); + lodepng_free(tree->tree1d); + lodepng_free(tree->lengths); +} + +/*the tree representation used by the decoder. return value is error*/ +static unsigned HuffmanTree_make2DTree(HuffmanTree* tree) { + unsigned nodefilled = 0; /*up to which node it is filled*/ + unsigned treepos = 0; /*position in the tree (1 of the numcodes columns)*/ + unsigned n, i; + + tree->tree2d = (unsigned*)lodepng_malloc(tree->numcodes * 2 * sizeof(unsigned)); + if(!tree->tree2d) return 83; /*alloc fail*/ + + /* + convert tree1d[] to tree2d[][]. In the 2D array, a value of 32767 means + uninited, a value >= numcodes is an address to another bit, a value < numcodes + is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as + many columns as codes - 1. + A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes. + Here, the internal nodes are stored (what their 0 and 1 option point to). + There is only memory for such good tree currently, if there are more nodes + (due to too long length codes), error 55 will happen + */ + for(n = 0; n < tree->numcodes * 2; ++n) { + tree->tree2d[n] = 32767; /*32767 here means the tree2d isn't filled there yet*/ + } + + for(n = 0; n < tree->numcodes; ++n) /*the codes*/ { + for(i = 0; i != tree->lengths[n]; ++i) /*the bits for this code*/ { + unsigned char bit = (unsigned char)((tree->tree1d[n] >> (tree->lengths[n] - i - 1)) & 1); + /*oversubscribed, see comment in lodepng_error_text*/ + if(treepos > 2147483647 || treepos + 2 > tree->numcodes) return 55; + if(tree->tree2d[2 * treepos + bit] == 32767) /*not yet filled in*/ { + if(i + 1 == tree->lengths[n]) /*last bit*/ { + tree->tree2d[2 * treepos + bit] = n; /*put the current code in it*/ + treepos = 0; + } else { + /*put address of the next step in here, first that address has to be found of course + (it's just nodefilled + 1)...*/ + ++nodefilled; + /*addresses encoded with numcodes added to it*/ + tree->tree2d[2 * treepos + bit] = nodefilled + tree->numcodes; + treepos = nodefilled; + } + } + else treepos = tree->tree2d[2 * treepos + bit] - tree->numcodes; + } + } + + for(n = 0; n < tree->numcodes * 2; ++n) { + if(tree->tree2d[n] == 32767) tree->tree2d[n] = 0; /*remove possible remaining 32767's*/ + } + + return 0; +} + +/* +Second step for the ...makeFromLengths and ...makeFromFrequencies functions. +numcodes, lengths and maxbitlen must already be filled in correctly. return +value is error. +*/ +static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) { + uivector blcount; + uivector nextcode; + unsigned error = 0; + unsigned bits, n; + + uivector_init(&blcount); + uivector_init(&nextcode); + + tree->tree1d = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned)); + if(!tree->tree1d) error = 83; /*alloc fail*/ + + if(!uivector_resizev(&blcount, tree->maxbitlen + 1, 0) + || !uivector_resizev(&nextcode, tree->maxbitlen + 1, 0)) + error = 83; /*alloc fail*/ + + if(!error) { + /*step 1: count number of instances of each code length*/ + for(bits = 0; bits != tree->numcodes; ++bits) ++blcount.data[tree->lengths[bits]]; + /*step 2: generate the nextcode values*/ + for(bits = 1; bits <= tree->maxbitlen; ++bits) { + nextcode.data[bits] = (nextcode.data[bits - 1] + blcount.data[bits - 1]) << 1; + } + /*step 3: generate all the codes*/ + for(n = 0; n != tree->numcodes; ++n) { + if(tree->lengths[n] != 0) tree->tree1d[n] = nextcode.data[tree->lengths[n]]++; + } + } + + uivector_cleanup(&blcount); + uivector_cleanup(&nextcode); + + if(!error) return HuffmanTree_make2DTree(tree); + else return error; +} + +/* +given the code lengths (as stored in the PNG file), generate the tree as defined +by Deflate. maxbitlen is the maximum bits that a code in the tree can have. +return value is error. +*/ +static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen, + size_t numcodes, unsigned maxbitlen) { + unsigned i; + tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned)); + if(!tree->lengths) return 83; /*alloc fail*/ + for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i]; + tree->numcodes = (unsigned)numcodes; /*number of symbols*/ + tree->maxbitlen = maxbitlen; + return HuffmanTree_makeFromLengths2(tree); +} + +#ifdef LODEPNG_COMPILE_ENCODER + +/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding", +Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/ + +/*chain node for boundary package merge*/ +typedef struct BPMNode { + int weight; /*the sum of all weights in this chain*/ + unsigned index; /*index of this leaf node (called "count" in the paper)*/ + struct BPMNode* tail; /*the next nodes in this chain (null if last)*/ + int in_use; +} BPMNode; + +/*lists of chains*/ +typedef struct BPMLists { + /*memory pool*/ + unsigned memsize; + BPMNode* memory; + unsigned numfree; + unsigned nextfree; + BPMNode** freelist; + /*two heads of lookahead chains per list*/ + unsigned listsize; + BPMNode** chains0; + BPMNode** chains1; +} BPMLists; + +/*creates a new chain node with the given parameters, from the memory in the lists */ +static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail) { + unsigned i; + BPMNode* result; + + /*memory full, so garbage collect*/ + if(lists->nextfree >= lists->numfree) { + /*mark only those that are in use*/ + for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0; + for(i = 0; i != lists->listsize; ++i) { + BPMNode* node; + for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1; + for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1; + } + /*collect those that are free*/ + lists->numfree = 0; + for(i = 0; i != lists->memsize; ++i) { + if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i]; + } + lists->nextfree = 0; + } + + result = lists->freelist[lists->nextfree++]; + result->weight = weight; + result->index = index; + result->tail = tail; + return result; +} + +/*sort the leaves with stable mergesort*/ +static void bpmnode_sort(BPMNode* leaves, size_t num) { + BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num); + size_t width, counter = 0; + for(width = 1; width < num; width *= 2) { + BPMNode* a = (counter & 1) ? mem : leaves; + BPMNode* b = (counter & 1) ? leaves : mem; + size_t p; + for(p = 0; p < num; p += 2 * width) { + size_t q = (p + width > num) ? num : (p + width); + size_t r = (p + 2 * width > num) ? num : (p + 2 * width); + size_t i = p, j = q, k; + for(k = p; k < r; k++) { + if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++]; + else b[k] = a[j++]; + } + } + counter++; + } + if(counter & 1) memcpy(leaves, mem, sizeof(*leaves) * num); + lodepng_free(mem); +} + +/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/ +static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num) { + unsigned lastindex = lists->chains1[c]->index; + + if(c == 0) { + if(lastindex >= numpresent) return; + lists->chains0[c] = lists->chains1[c]; + lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0); + } else { + /*sum of the weights of the head nodes of the previous lookahead chains.*/ + int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight; + lists->chains0[c] = lists->chains1[c]; + if(lastindex < numpresent && sum > leaves[lastindex].weight) { + lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail); + return; + } + lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]); + /*in the end we are only interested in the chain of the last list, so no + need to recurse if we're at the last one (this gives measurable speedup)*/ + if(num + 1 < (int)(2 * numpresent - 2)) { + boundaryPM(lists, leaves, numpresent, c - 1, num); + boundaryPM(lists, leaves, numpresent, c - 1, num); + } + } +} + +unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies, + size_t numcodes, unsigned maxbitlen) { + unsigned error = 0; + unsigned i; + size_t numpresent = 0; /*number of symbols with non-zero frequency*/ + BPMNode* leaves; /*the symbols, only those with > 0 frequency*/ + + if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/ + if((1u << maxbitlen) < (unsigned)numcodes) return 80; /*error: represent all symbols*/ + + leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves)); + if(!leaves) return 83; /*alloc fail*/ + + for(i = 0; i != numcodes; ++i) { + if(frequencies[i] > 0) { + leaves[numpresent].weight = (int)frequencies[i]; + leaves[numpresent].index = i; + ++numpresent; + } + } + + for(i = 0; i != numcodes; ++i) lengths[i] = 0; + + /*ensure at least two present symbols. There should be at least one symbol + according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To + make these work as well ensure there are at least two symbols. The + Package-Merge code below also doesn't work correctly if there's only one + symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit*/ + if(numpresent == 0) { + lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/ + } else if(numpresent == 1) { + lengths[leaves[0].index] = 1; + lengths[leaves[0].index == 0 ? 1 : 0] = 1; + } else { + BPMLists lists; + BPMNode* node; + + bpmnode_sort(leaves, numpresent); + + lists.listsize = maxbitlen; + lists.memsize = 2 * maxbitlen * (maxbitlen + 1); + lists.nextfree = 0; + lists.numfree = lists.memsize; + lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory)); + lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*)); + lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*)); + lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*)); + if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/ + + if(!error) { + for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i]; + + bpmnode_create(&lists, leaves[0].weight, 1, 0); + bpmnode_create(&lists, leaves[1].weight, 2, 0); + + for(i = 0; i != lists.listsize; ++i) { + lists.chains0[i] = &lists.memory[0]; + lists.chains1[i] = &lists.memory[1]; + } + + /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/ + for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i); + + for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail) { + for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index]; + } + } + + lodepng_free(lists.memory); + lodepng_free(lists.freelist); + lodepng_free(lists.chains0); + lodepng_free(lists.chains1); + } + + lodepng_free(leaves); + return error; +} + +/*Create the Huffman tree given the symbol frequencies*/ +static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies, + size_t mincodes, size_t numcodes, unsigned maxbitlen) { + unsigned error = 0; + while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/ + tree->maxbitlen = maxbitlen; + tree->numcodes = (unsigned)numcodes; /*number of symbols*/ + tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned)); + if(!tree->lengths) return 83; /*alloc fail*/ + /*initialize all lengths to 0*/ + memset(tree->lengths, 0, numcodes * sizeof(unsigned)); + + error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen); + if(!error) error = HuffmanTree_makeFromLengths2(tree); + return error; +} + +static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index) { + return tree->tree1d[index]; +} + +static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index) { + return tree->lengths[index]; +} +#endif /*LODEPNG_COMPILE_ENCODER*/ + +/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/ +static unsigned generateFixedLitLenTree(HuffmanTree* tree) { + unsigned i, error = 0; + unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); + if(!bitlen) return 83; /*alloc fail*/ + + /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/ + for(i = 0; i <= 143; ++i) bitlen[i] = 8; + for(i = 144; i <= 255; ++i) bitlen[i] = 9; + for(i = 256; i <= 279; ++i) bitlen[i] = 7; + for(i = 280; i <= 287; ++i) bitlen[i] = 8; + + error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15); + + lodepng_free(bitlen); + return error; +} + +/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/ +static unsigned generateFixedDistanceTree(HuffmanTree* tree) { + unsigned i, error = 0; + unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); + if(!bitlen) return 83; /*alloc fail*/ + + /*there are 32 distance codes, but 30-31 are unused*/ + for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5; + error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15); + + lodepng_free(bitlen); + return error; +} + +#ifdef LODEPNG_COMPILE_DECODER + +/* +returns the code, or (unsigned)(-1) if error happened +inbitlength is the length of the complete buffer, in bits (so its byte length times 8) +*/ +static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp, + const HuffmanTree* codetree, size_t inbitlength) { + unsigned treepos = 0, ct; + for(;;) { + if(*bp >= inbitlength) return (unsigned)(-1); /*error: end of input memory reached without endcode*/ + /* + decode the symbol from the tree. The "readBitFromStream" code is inlined in + the expression below because this is the biggest bottleneck while decoding + */ + ct = codetree->tree2d[(treepos << 1) + READBIT(*bp, in)]; + ++(*bp); + if(ct < codetree->numcodes) return ct; /*the symbol is decoded, return it*/ + else treepos = ct - codetree->numcodes; /*symbol not yet decoded, instead move tree position*/ + + if(treepos >= codetree->numcodes) return (unsigned)(-1); /*error: it appeared outside the codetree*/ + } +} +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_DECODER + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Inflator (Decompressor) / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/ +static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) { + /*TODO: check for out of memory errors*/ + generateFixedLitLenTree(tree_ll); + generateFixedDistanceTree(tree_d); +} + +/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/ +static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d, + const unsigned char* in, size_t* bp, size_t inlength) { + /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/ + unsigned error = 0; + unsigned n, HLIT, HDIST, HCLEN, i; + size_t inbitlength = inlength * 8; + + /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/ + unsigned* bitlen_ll = 0; /*lit,len code lengths*/ + unsigned* bitlen_d = 0; /*dist code lengths*/ + /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/ + unsigned* bitlen_cl = 0; + HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/ + + if((*bp) + 14 > (inlength << 3)) return 49; /*error: the bit pointer is or will go past the memory*/ + + /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/ + HLIT = readBitsFromStream(bp, in, 5) + 257; + /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/ + HDIST = readBitsFromStream(bp, in, 5) + 1; + /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/ + HCLEN = readBitsFromStream(bp, in, 4) + 4; + + if((*bp) + HCLEN * 3 > (inlength << 3)) return 50; /*error: the bit pointer is or will go past the memory*/ + + HuffmanTree_init(&tree_cl); + + while(!error) { + /*read the code length codes out of 3 * (amount of code length codes) bits*/ + + bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned)); + if(!bitlen_cl) ERROR_BREAK(83 /*alloc fail*/); + + for(i = 0; i != NUM_CODE_LENGTH_CODES; ++i) { + if(i < HCLEN) bitlen_cl[CLCL_ORDER[i]] = readBitsFromStream(bp, in, 3); + else bitlen_cl[CLCL_ORDER[i]] = 0; /*if not, it must stay 0*/ + } + + error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7); + if(error) break; + + /*now we can use this tree to read the lengths for the tree that this function will return*/ + bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); + bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); + if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/); + for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0; + for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0; + + /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/ + i = 0; + while(i < HLIT + HDIST) { + unsigned code = huffmanDecodeSymbol(in, bp, &tree_cl, inbitlength); + if(code <= 15) /*a length code*/ { + if(i < HLIT) bitlen_ll[i] = code; + else bitlen_d[i - HLIT] = code; + ++i; + } else if(code == 16) /*repeat previous*/ { + unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/ + unsigned value; /*set value to the previous code*/ + + if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/ + + if((*bp + 2) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ + replength += readBitsFromStream(bp, in, 2); + + if(i < HLIT + 1) value = bitlen_ll[i - 1]; + else value = bitlen_d[i - HLIT - 1]; + /*repeat this value in the next lengths*/ + for(n = 0; n < replength; ++n) { + if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/ + if(i < HLIT) bitlen_ll[i] = value; + else bitlen_d[i - HLIT] = value; + ++i; + } + } else if(code == 17) /*repeat "0" 3-10 times*/ { + unsigned replength = 3; /*read in the bits that indicate repeat length*/ + if((*bp + 3) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ + replength += readBitsFromStream(bp, in, 3); + + /*repeat this value in the next lengths*/ + for(n = 0; n < replength; ++n) { + if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/ + + if(i < HLIT) bitlen_ll[i] = 0; + else bitlen_d[i - HLIT] = 0; + ++i; + } + } else if(code == 18) /*repeat "0" 11-138 times*/ { + unsigned replength = 11; /*read in the bits that indicate repeat length*/ + if((*bp + 7) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ + replength += readBitsFromStream(bp, in, 7); + + /*repeat this value in the next lengths*/ + for(n = 0; n < replength; ++n) { + if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/ + + if(i < HLIT) bitlen_ll[i] = 0; + else bitlen_d[i - HLIT] = 0; + ++i; + } + } else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ { + if(code == (unsigned)(-1)) { + /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol + (10=no endcode, 11=wrong jump outside of tree)*/ + error = (*bp) > inbitlength ? 10 : 11; + } + else error = 16; /*unexisting code, this can never happen*/ + break; + } + } + if(error) break; + + if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/ + + /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/ + error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15); + if(error) break; + error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15); + + break; /*end of error-while*/ + } + + lodepng_free(bitlen_cl); + lodepng_free(bitlen_ll); + lodepng_free(bitlen_d); + HuffmanTree_cleanup(&tree_cl); + + return error; +} + +/*inflate a block with dynamic of fixed Huffman tree*/ +static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size_t* bp, + size_t* pos, size_t inlength, unsigned btype) { + unsigned error = 0; + HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/ + HuffmanTree tree_d; /*the huffman tree for distance codes*/ + size_t inbitlength = inlength * 8; + + HuffmanTree_init(&tree_ll); + HuffmanTree_init(&tree_d); + + if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d); + else if(btype == 2) error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength); + + while(!error) /*decode all symbols until end reached, breaks at end code*/ { + /*code_ll is literal, length or end code*/ + unsigned code_ll = huffmanDecodeSymbol(in, bp, &tree_ll, inbitlength); + if(code_ll <= 255) /*literal symbol*/ { + /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/ + if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/); + out->data[*pos] = (unsigned char)code_ll; + ++(*pos); + } else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ { + unsigned code_d, distance; + unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/ + size_t start, forward, backward, length; + + /*part 1: get length base*/ + length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX]; + + /*part 2: get extra bits and add the value of that to length*/ + numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX]; + if((*bp + numextrabits_l) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/ + length += readBitsFromStream(bp, in, numextrabits_l); + + /*part 3: get distance code*/ + code_d = huffmanDecodeSymbol(in, bp, &tree_d, inbitlength); + if(code_d > 29) { + if(code_d == (unsigned)(-1)) /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ { + /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol + (10=no endcode, 11=wrong jump outside of tree)*/ + error = (*bp) > inlength * 8 ? 10 : 11; + } + else error = 18; /*error: invalid distance code (30-31 are never used)*/ + break; + } + distance = DISTANCEBASE[code_d]; + + /*part 4: get extra bits from distance*/ + numextrabits_d = DISTANCEEXTRA[code_d]; + if((*bp + numextrabits_d) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/ + distance += readBitsFromStream(bp, in, numextrabits_d); + + /*part 5: fill in all the out[n] values based on the length and dist*/ + start = (*pos); + if(distance > start) ERROR_BREAK(52); /*too long backward distance*/ + backward = start - distance; + + if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/); + if (distance < length) { + for(forward = 0; forward < length; ++forward) { + out->data[(*pos)++] = out->data[backward++]; + } + } else { + memcpy(out->data + *pos, out->data + backward, length); + *pos += length; + } + } else if(code_ll == 256) { + break; /*end code, break the loop*/ + } else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ { + /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol + (10=no endcode, 11=wrong jump outside of tree)*/ + error = ((*bp) > inlength * 8) ? 10 : 11; + break; + } + } + + HuffmanTree_cleanup(&tree_ll); + HuffmanTree_cleanup(&tree_d); + + return error; +} + +static unsigned inflateNoCompression(ucvector* out, const unsigned char* in, size_t* bp, size_t* pos, size_t inlength) { + size_t p; + unsigned LEN, NLEN, n, error = 0; + + /*go to first boundary of byte*/ + while(((*bp) & 0x7) != 0) ++(*bp); + p = (*bp) / 8; /*byte position*/ + + /*read LEN (2 bytes) and NLEN (2 bytes)*/ + if(p + 4 >= inlength) return 52; /*error, bit pointer will jump past memory*/ + LEN = in[p] + 256u * in[p + 1]; p += 2; + NLEN = in[p] + 256u * in[p + 1]; p += 2; + + /*check if 16-bit NLEN is really the one's complement of LEN*/ + if(LEN + NLEN != 65535) return 21; /*error: NLEN is not one's complement of LEN*/ + + if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/ + + /*read the literal data: LEN bytes are now stored in the out buffer*/ + if(p + LEN > inlength) return 23; /*error: reading outside of in buffer*/ + for(n = 0; n < LEN; ++n) out->data[(*pos)++] = in[p++]; + + (*bp) = p * 8; + + return error; +} + +static unsigned lodepng_inflatev(ucvector* out, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings) { + /*bit pointer in the "in" data, current byte is bp >> 3, current bit is bp & 0x7 (from lsb to msb of the byte)*/ + size_t bp = 0; + unsigned BFINAL = 0; + size_t pos = 0; /*byte position in the out buffer*/ + unsigned error = 0; + + (void)settings; + + while(!BFINAL) { + unsigned BTYPE; + if(bp + 2 >= insize * 8) return 52; /*error, bit pointer will jump past memory*/ + BFINAL = readBitFromStream(&bp, in); + BTYPE = 1u * readBitFromStream(&bp, in); + BTYPE += 2u * readBitFromStream(&bp, in); + + if(BTYPE == 3) return 20; /*error: invalid BTYPE*/ + else if(BTYPE == 0) error = inflateNoCompression(out, in, &bp, &pos, insize); /*no compression*/ + else error = inflateHuffmanBlock(out, in, &bp, &pos, insize, BTYPE); /*compression, BTYPE 01 or 10*/ + + if(error) return error; + } + + return error; +} + +unsigned lodepng_inflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings) { + unsigned error; + ucvector v; + ucvector_init_buffer(&v, *out, *outsize); + error = lodepng_inflatev(&v, in, insize, settings); + *out = v.data; + *outsize = v.size; + return error; +} + +static unsigned inflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings) { + if(settings->custom_inflate) { + return settings->custom_inflate(out, outsize, in, insize, settings); + } else { + return lodepng_inflate(out, outsize, in, insize, settings); + } +} + +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Deflator (Compressor) / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258; + +/*bitlen is the size in bits of the code*/ +static void addHuffmanSymbol(size_t* bp, ucvector* compressed, unsigned code, unsigned bitlen) { + addBitsToStreamReversed(bp, compressed, code, bitlen); +} + +/*search the index in the array, that has the largest value smaller than or equal to the given value, +given array must be sorted (if no value is smaller, it returns the size of the given array)*/ +static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value) { + /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/ + size_t left = 1; + size_t right = array_size - 1; + + while(left <= right) { + size_t mid = (left + right) >> 1; + if (array[mid] >= value) right = mid - 1; + else left = mid + 1; + } + if(left >= array_size || array[left] > value) left--; + return left; +} + +static void addLengthDistance(uivector* values, size_t length, size_t distance) { + /*values in encoded vector are those used by deflate: + 0-255: literal bytes + 256: end + 257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits) + 286-287: invalid*/ + + unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length); + unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]); + unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance); + unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]); + + uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX); + uivector_push_back(values, extra_length); + uivector_push_back(values, dist_code); + uivector_push_back(values, extra_distance); +} + +/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3 +bytes as input because 3 is the minimum match length for deflate*/ +static const unsigned HASH_NUM_VALUES = 65536; +static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/ + +typedef struct Hash { + int* head; /*hash value to head circular pos - can be outdated if went around window*/ + /*circular pos to prev circular pos*/ + unsigned short* chain; + int* val; /*circular pos to hash value*/ + + /*TODO: do this not only for zeros but for any repeated byte. However for PNG + it's always going to be the zeros that dominate, so not important for PNG*/ + int* headz; /*similar to head, but for chainz*/ + unsigned short* chainz; /*those with same amount of zeros*/ + unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/ +} Hash; + +static unsigned hash_init(Hash* hash, unsigned windowsize) { + unsigned i; + hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES); + hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize); + hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); + + hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); + hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1)); + hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); + + if(!hash->head || !hash->chain || !hash->val || !hash->headz|| !hash->chainz || !hash->zeros) { + return 83; /*alloc fail*/ + } + + /*initialize hash table*/ + for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1; + for(i = 0; i != windowsize; ++i) hash->val[i] = -1; + for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/ + + for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1; + for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/ + + return 0; +} + +static void hash_cleanup(Hash* hash) { + lodepng_free(hash->head); + lodepng_free(hash->val); + lodepng_free(hash->chain); + + lodepng_free(hash->zeros); + lodepng_free(hash->headz); + lodepng_free(hash->chainz); +} + + + +static unsigned getHash(const unsigned char* data, size_t size, size_t pos) { + unsigned result = 0; + if(pos + 2 < size) { + /*A simple shift and xor hash is used. Since the data of PNGs is dominated + by zeroes due to the filters, a better hash does not have a significant + effect on speed in traversing the chain, and causes more time spend on + calculating the hash.*/ + result ^= (unsigned)(data[pos + 0] << 0u); + result ^= (unsigned)(data[pos + 1] << 4u); + result ^= (unsigned)(data[pos + 2] << 8u); + } else { + size_t amount, i; + if(pos >= size) return 0; + amount = size - pos; + for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u)); + } + return result & HASH_BIT_MASK; +} + +static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) { + const unsigned char* start = data + pos; + const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH; + if(end > data + size) end = data + size; + data = start; + while(data != end && *data == 0) ++data; + /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/ + return (unsigned)(data - start); +} + +/*wpos = pos & (windowsize - 1)*/ +static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros) { + hash->val[wpos] = (int)hashval; + if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval]; + hash->head[hashval] = (int)wpos; + + hash->zeros[wpos] = numzeros; + if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros]; + hash->headz[numzeros] = (int)wpos; +} + +/* +LZ77-encode the data. Return value is error code. The input are raw bytes, the output +is in the form of unsigned integers with codes representing for example literal bytes, or +length/distance pairs. +It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a +sliding window (of windowsize) is used, and all past bytes in that window can be used as +the "dictionary". A brute force search through all possible distances would be slow, and +this hash technique is one out of several ways to speed this up. +*/ +static unsigned encodeLZ77(uivector* out, Hash* hash, + const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize, + unsigned minmatch, unsigned nicematch, unsigned lazymatching) { + size_t pos; + unsigned i, error = 0; + /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/ + unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8; + unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64; + + unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/ + unsigned numzeros = 0; + + unsigned offset; /*the offset represents the distance in LZ77 terminology*/ + unsigned length; + unsigned lazy = 0; + unsigned lazylength = 0, lazyoffset = 0; + unsigned hashval; + unsigned current_offset, current_length; + unsigned prev_offset; + const unsigned char *lastptr, *foreptr, *backptr; + unsigned hashpos; + + if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/ + if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/ + + if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH; + + for(pos = inpos; pos < insize; ++pos) { + size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/ + unsigned chainlength = 0; + + hashval = getHash(in, insize, pos); + + if(usezeros && hashval == 0) { + if(numzeros == 0) numzeros = countZeros(in, insize, pos); + else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros; + } else { + numzeros = 0; + } + + updateHashChain(hash, wpos, hashval, numzeros); + + /*the length and offset found for the current position*/ + length = 0; + offset = 0; + + hashpos = hash->chain[wpos]; + + lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH]; + + /*search for the longest string*/ + prev_offset = 0; + for(;;) { + if(chainlength++ >= maxchainlength) break; + current_offset = (unsigned)(hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize); + + if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/ + prev_offset = current_offset; + if(current_offset > 0) { + /*test the next characters*/ + foreptr = &in[pos]; + backptr = &in[pos - current_offset]; + + /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/ + if(numzeros >= 3) { + unsigned skip = hash->zeros[hashpos]; + if(skip > numzeros) skip = numzeros; + backptr += skip; + foreptr += skip; + } + + while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ { + ++backptr; + ++foreptr; + } + current_length = (unsigned)(foreptr - &in[pos]); + + if(current_length > length) { + length = current_length; /*the longest length*/ + offset = current_offset; /*the offset that is related to this longest length*/ + /*jump out once a length of max length is found (speed gain). This also jumps + out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/ + if(current_length >= nicematch) break; + } + } + + if(hashpos == hash->chain[hashpos]) break; + + if(numzeros >= 3 && length > numzeros) { + hashpos = hash->chainz[hashpos]; + if(hash->zeros[hashpos] != numzeros) break; + } else { + hashpos = hash->chain[hashpos]; + /*outdated hash value, happens if particular value was not encountered in whole last window*/ + if(hash->val[hashpos] != (int)hashval) break; + } + } + + if(lazymatching) { + if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH) { + lazy = 1; + lazylength = length; + lazyoffset = offset; + continue; /*try the next byte*/ + } + if(lazy) { + lazy = 0; + if(pos == 0) ERROR_BREAK(81); + if(length > lazylength + 1) { + /*push the previous character as literal*/ + if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/); + } else { + length = lazylength; + offset = lazyoffset; + hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/ + hash->headz[numzeros] = -1; /*idem*/ + --pos; + } + } + } + if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/); + + /*encode it as length/distance pair or literal value*/ + if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ { + if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/); + } else if(length < minmatch || (length == 3 && offset > 4096)) { + /*compensate for the fact that longer offsets have more extra bits, a + length of only 3 may be not worth it then*/ + if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/); + } else { + addLengthDistance(out, length, offset); + for(i = 1; i < length; ++i) { + ++pos; + wpos = pos & (windowsize - 1); + hashval = getHash(in, insize, pos); + if(usezeros && hashval == 0) { + if(numzeros == 0) numzeros = countZeros(in, insize, pos); + else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros; + } else { + numzeros = 0; + } + updateHashChain(hash, wpos, hashval, numzeros); + } + } + } /*end of the loop through each character of input*/ + + return error; +} + +/* /////////////////////////////////////////////////////////////////////////// */ + +static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize) { + /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte, + 2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/ + + size_t i, j, numdeflateblocks = (datasize + 65534) / 65535; + unsigned datapos = 0; + for(i = 0; i != numdeflateblocks; ++i) { + unsigned BFINAL, BTYPE, LEN, NLEN; + unsigned char firstbyte; + + BFINAL = (i == numdeflateblocks - 1); + BTYPE = 0; + + firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1) << 1) + ((BTYPE & 2) << 1)); + ucvector_push_back(out, firstbyte); + + LEN = 65535; + if(datasize - datapos < 65535) LEN = (unsigned)datasize - datapos; + NLEN = 65535 - LEN; + + ucvector_push_back(out, (unsigned char)(LEN & 255)); + ucvector_push_back(out, (unsigned char)(LEN >> 8)); + ucvector_push_back(out, (unsigned char)(NLEN & 255)); + ucvector_push_back(out, (unsigned char)(NLEN >> 8)); + + /*Decompressed data*/ + for(j = 0; j < 65535 && datapos < datasize; ++j) { + ucvector_push_back(out, data[datapos++]); + } + } + + return 0; +} + +/* +write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees. +tree_ll: the tree for lit and len codes. +tree_d: the tree for distance codes. +*/ +static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encoded, + const HuffmanTree* tree_ll, const HuffmanTree* tree_d) { + size_t i = 0; + for(i = 0; i != lz77_encoded->size; ++i) { + unsigned val = lz77_encoded->data[i]; + addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val)); + if(val > 256) /*for a length code, 3 more things have to be added*/ { + unsigned length_index = val - FIRST_LENGTH_CODE_INDEX; + unsigned n_length_extra_bits = LENGTHEXTRA[length_index]; + unsigned length_extra_bits = lz77_encoded->data[++i]; + + unsigned distance_code = lz77_encoded->data[++i]; + + unsigned distance_index = distance_code; + unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index]; + unsigned distance_extra_bits = lz77_encoded->data[++i]; + + addBitsToStream(bp, out, length_extra_bits, n_length_extra_bits); + addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_d, distance_code), + HuffmanTree_getLength(tree_d, distance_code)); + addBitsToStream(bp, out, distance_extra_bits, n_distance_extra_bits); + } + } +} + +/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/ +static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash, + const unsigned char* data, size_t datapos, size_t dataend, + const LodePNGCompressSettings* settings, unsigned final) { + unsigned error = 0; + + /* + A block is compressed as follows: The PNG data is lz77 encoded, resulting in + literal bytes and length/distance pairs. This is then huffman compressed with + two huffman trees. One huffman tree is used for the lit and len values ("ll"), + another huffman tree is used for the dist values ("d"). These two trees are + stored using their code lengths, and to compress even more these code lengths + are also run-length encoded and huffman compressed. This gives a huffman tree + of code lengths "cl". The code lenghts used to describe this third tree are + the code length code lengths ("clcl"). + */ + + /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/ + uivector lz77_encoded; + HuffmanTree tree_ll; /*tree for lit,len values*/ + HuffmanTree tree_d; /*tree for distance codes*/ + HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/ + uivector frequencies_ll; /*frequency of lit,len codes*/ + uivector frequencies_d; /*frequency of dist codes*/ + uivector frequencies_cl; /*frequency of code length codes*/ + uivector bitlen_lld; /*lit,len,dist code lenghts (int bits), literally (without repeat codes).*/ + uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudemtary run length compression)*/ + /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl + (these are written as is in the file, it would be crazy to compress these using yet another huffman + tree that needs to be represented by yet another set of code lengths)*/ + uivector bitlen_cl; + size_t datasize = dataend - datapos; + + /* + Due to the huffman compression of huffman tree representations ("two levels"), there are some anologies: + bitlen_lld is to tree_cl what data is to tree_ll and tree_d. + bitlen_lld_e is to bitlen_lld what lz77_encoded is to data. + bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded. + */ + + unsigned BFINAL = final; + size_t numcodes_ll, numcodes_d, i; + unsigned HLIT, HDIST, HCLEN; + + uivector_init(&lz77_encoded); + HuffmanTree_init(&tree_ll); + HuffmanTree_init(&tree_d); + HuffmanTree_init(&tree_cl); + uivector_init(&frequencies_ll); + uivector_init(&frequencies_d); + uivector_init(&frequencies_cl); + uivector_init(&bitlen_lld); + uivector_init(&bitlen_lld_e); + uivector_init(&bitlen_cl); + + /*This while loop never loops due to a break at the end, it is here to + allow breaking out of it to the cleanup phase on error conditions.*/ + while(!error) { + if(settings->use_lz77) { + error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize, + settings->minmatch, settings->nicematch, settings->lazymatching); + if(error) break; + } else { + if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/); + for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/ + } + + if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/); + if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/); + + /*Count the frequencies of lit, len and dist codes*/ + for(i = 0; i != lz77_encoded.size; ++i) { + unsigned symbol = lz77_encoded.data[i]; + ++frequencies_ll.data[symbol]; + if(symbol > 256) { + unsigned dist = lz77_encoded.data[i + 2]; + ++frequencies_d.data[dist]; + i += 3; + } + } + frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/ + + /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/ + error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15); + if(error) break; + /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/ + error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15); + if(error) break; + + numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286; + numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30; + /*store the code lengths of both generated trees in bitlen_lld*/ + for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i)); + for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i)); + + /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times), + 17 (3-10 zeroes), 18 (11-138 zeroes)*/ + for(i = 0; i != (unsigned)bitlen_lld.size; ++i) { + unsigned j = 0; /*amount of repititions*/ + while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j; + + if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/ { + ++j; /*include the first zero*/ + if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ { + uivector_push_back(&bitlen_lld_e, 17); + uivector_push_back(&bitlen_lld_e, j - 3); + } else /*repeat code 18 supports max 138 zeroes*/ { + if(j > 138) j = 138; + uivector_push_back(&bitlen_lld_e, 18); + uivector_push_back(&bitlen_lld_e, j - 11); + } + i += (j - 1); + } else if(j >= 3) /*repeat code for value other than zero*/ { + size_t k; + unsigned num = j / 6, rest = j % 6; + uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]); + for(k = 0; k < num; ++k) { + uivector_push_back(&bitlen_lld_e, 16); + uivector_push_back(&bitlen_lld_e, 6 - 3); + } + if(rest >= 3) { + uivector_push_back(&bitlen_lld_e, 16); + uivector_push_back(&bitlen_lld_e, rest - 3); + } + else j -= rest; + i += j; + } else /*too short to benefit from repeat code*/ { + uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]); + } + } + + /*generate tree_cl, the huffmantree of huffmantrees*/ + + if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/); + for(i = 0; i != bitlen_lld_e.size; ++i) { + ++frequencies_cl.data[bitlen_lld_e.data[i]]; + /*after a repeat code come the bits that specify the number of repetitions, + those don't need to be in the frequencies_cl calculation*/ + if(bitlen_lld_e.data[i] >= 16) ++i; + } + + error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data, + frequencies_cl.size, frequencies_cl.size, 7); + if(error) break; + + if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/); + for(i = 0; i != tree_cl.numcodes; ++i) { + /*lenghts of code length tree is in the order as specified by deflate*/ + bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]); + } + while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4) { + /*remove zeros at the end, but minimum size must be 4*/ + if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/); + } + if(error) break; + + /* + Write everything into the output + + After the BFINAL and BTYPE, the dynamic block consists out of the following: + - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN + - (HCLEN+4)*3 bits code lengths of code length alphabet + - HLIT + 257 code lenghts of lit/length alphabet (encoded using the code length + alphabet, + possible repetition codes 16, 17, 18) + - HDIST + 1 code lengths of distance alphabet (encoded using the code length + alphabet, + possible repetition codes 16, 17, 18) + - compressed data + - 256 (end code) + */ + + /*Write block type*/ + addBitToStream(bp, out, BFINAL); + addBitToStream(bp, out, 0); /*first bit of BTYPE "dynamic"*/ + addBitToStream(bp, out, 1); /*second bit of BTYPE "dynamic"*/ + + /*write the HLIT, HDIST and HCLEN values*/ + HLIT = (unsigned)(numcodes_ll - 257); + HDIST = (unsigned)(numcodes_d - 1); + HCLEN = (unsigned)bitlen_cl.size - 4; + /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/ + while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN; + addBitsToStream(bp, out, HLIT, 5); + addBitsToStream(bp, out, HDIST, 5); + addBitsToStream(bp, out, HCLEN, 4); + + /*write the code lenghts of the code length alphabet*/ + for(i = 0; i != HCLEN + 4; ++i) addBitsToStream(bp, out, bitlen_cl.data[i], 3); + + /*write the lenghts of the lit/len AND the dist alphabet*/ + for(i = 0; i != bitlen_lld_e.size; ++i) { + addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]), + HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i])); + /*extra bits of repeat codes*/ + if(bitlen_lld_e.data[i] == 16) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 2); + else if(bitlen_lld_e.data[i] == 17) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 3); + else if(bitlen_lld_e.data[i] == 18) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 7); + } + + /*write the compressed data symbols*/ + writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d); + /*error: the length of the end code 256 must be larger than 0*/ + if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64); + + /*write the end code*/ + addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256)); + + break; /*end of error-while*/ + } + + /*cleanup*/ + uivector_cleanup(&lz77_encoded); + HuffmanTree_cleanup(&tree_ll); + HuffmanTree_cleanup(&tree_d); + HuffmanTree_cleanup(&tree_cl); + uivector_cleanup(&frequencies_ll); + uivector_cleanup(&frequencies_d); + uivector_cleanup(&frequencies_cl); + uivector_cleanup(&bitlen_lld_e); + uivector_cleanup(&bitlen_lld); + uivector_cleanup(&bitlen_cl); + + return error; +} + +static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash, + const unsigned char* data, + size_t datapos, size_t dataend, + const LodePNGCompressSettings* settings, unsigned final) { + HuffmanTree tree_ll; /*tree for literal values and length codes*/ + HuffmanTree tree_d; /*tree for distance codes*/ + + unsigned BFINAL = final; + unsigned error = 0; + size_t i; + + HuffmanTree_init(&tree_ll); + HuffmanTree_init(&tree_d); + + generateFixedLitLenTree(&tree_ll); + generateFixedDistanceTree(&tree_d); + + addBitToStream(bp, out, BFINAL); + addBitToStream(bp, out, 1); /*first bit of BTYPE*/ + addBitToStream(bp, out, 0); /*second bit of BTYPE*/ + + if(settings->use_lz77) /*LZ77 encoded*/ { + uivector lz77_encoded; + uivector_init(&lz77_encoded); + error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize, + settings->minmatch, settings->nicematch, settings->lazymatching); + if(!error) writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d); + uivector_cleanup(&lz77_encoded); + } else /*no LZ77, but still will be Huffman compressed*/ { + for(i = datapos; i < dataend; ++i) { + addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i])); + } + } + /*add END code*/ + if(!error) addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256)); + + /*cleanup*/ + HuffmanTree_cleanup(&tree_ll); + HuffmanTree_cleanup(&tree_d); + + return error; +} + +static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings) { + unsigned error = 0; + size_t i, blocksize, numdeflateblocks; + size_t bp = 0; /*the bit pointer*/ + Hash hash; + + if(settings->btype > 2) return 61; + else if(settings->btype == 0) return deflateNoCompression(out, in, insize); + else if(settings->btype == 1) blocksize = insize; + else /*if(settings->btype == 2)*/ { + /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/ + blocksize = insize / 8 + 8; + if(blocksize < 65536) blocksize = 65536; + if(blocksize > 262144) blocksize = 262144; + } + + numdeflateblocks = (insize + blocksize - 1) / blocksize; + if(numdeflateblocks == 0) numdeflateblocks = 1; + + error = hash_init(&hash, settings->windowsize); + if(error) return error; + + for(i = 0; i != numdeflateblocks && !error; ++i) { + unsigned final = (i == numdeflateblocks - 1); + size_t start = i * blocksize; + size_t end = start + blocksize; + if(end > insize) end = insize; + + if(settings->btype == 1) error = deflateFixed(out, &bp, &hash, in, start, end, settings, final); + else if(settings->btype == 2) error = deflateDynamic(out, &bp, &hash, in, start, end, settings, final); + } + + hash_cleanup(&hash); + + return error; +} + +unsigned lodepng_deflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings) { + unsigned error; + ucvector v; + ucvector_init_buffer(&v, *out, *outsize); + error = lodepng_deflatev(&v, in, insize, settings); + *out = v.data; + *outsize = v.size; + return error; +} + +static unsigned deflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings) { + if(settings->custom_deflate) { + return settings->custom_deflate(out, outsize, in, insize, settings); + } else { + return lodepng_deflate(out, outsize, in, insize, settings); + } +} + +#endif /*LODEPNG_COMPILE_DECODER*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Adler32 */ +/* ////////////////////////////////////////////////////////////////////////// */ + +static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len) { + unsigned s1 = adler & 0xffff; + unsigned s2 = (adler >> 16) & 0xffff; + + while(len > 0) { + /*at least 5552 sums can be done before the sums overflow, saving a lot of module divisions*/ + unsigned amount = len > 5552 ? 5552 : len; + len -= amount; + while(amount > 0) { + s1 += (*data++); + s2 += s1; + --amount; + } + s1 %= 65521; + s2 %= 65521; + } + + return (s2 << 16) | s1; +} + +/*Return the adler32 of the bytes data[0..len-1]*/ +static unsigned adler32(const unsigned char* data, unsigned len) { + return update_adler32(1L, data, len); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Zlib / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_DECODER + +unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGDecompressSettings* settings) { + unsigned error = 0; + unsigned CM, CINFO, FDICT; + + if(insize < 2) return 53; /*error, size of zlib data too small*/ + /*read information from zlib header*/ + if((in[0] * 256 + in[1]) % 31 != 0) { + /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/ + return 24; + } + + CM = in[0] & 15; + CINFO = (in[0] >> 4) & 15; + /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/ + FDICT = (in[1] >> 5) & 1; + /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/ + + if(CM != 8 || CINFO > 7) { + /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/ + return 25; + } + if(FDICT != 0) { + /*error: the specification of PNG says about the zlib stream: + "The additional flags shall not specify a preset dictionary."*/ + return 26; + } + + error = inflate(out, outsize, in + 2, insize - 2, settings); + if(error) return error; + + if(!settings->ignore_adler32) { + unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]); + unsigned checksum = adler32(*out, (unsigned)(*outsize)); + if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/ + } + + return 0; /*no error*/ +} + +static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGDecompressSettings* settings) { + if(settings->custom_zlib) { + return settings->custom_zlib(out, outsize, in, insize, settings); + } else { + return lodepng_zlib_decompress(out, outsize, in, insize, settings); + } +} + +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER + +unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGCompressSettings* settings) { + /*initially, *out must be NULL and outsize 0, if you just give some random *out + that's pointing to a non allocated buffer, this'll crash*/ + ucvector outv; + size_t i; + unsigned error; + unsigned char* deflatedata = 0; + size_t deflatesize = 0; + + /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/ + unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/ + unsigned FLEVEL = 0; + unsigned FDICT = 0; + unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64; + unsigned FCHECK = 31 - CMFFLG % 31; + CMFFLG += FCHECK; + + /*ucvector-controlled version of the output buffer, for dynamic array*/ + ucvector_init_buffer(&outv, *out, *outsize); + + ucvector_push_back(&outv, (unsigned char)(CMFFLG >> 8)); + ucvector_push_back(&outv, (unsigned char)(CMFFLG & 255)); + + error = deflate(&deflatedata, &deflatesize, in, insize, settings); + + if(!error) { + unsigned ADLER32 = adler32(in, (unsigned)insize); + for(i = 0; i != deflatesize; ++i) ucvector_push_back(&outv, deflatedata[i]); + lodepng_free(deflatedata); + lodepng_add32bitInt(&outv, ADLER32); + } + + *out = outv.data; + *outsize = outv.size; + + return error; +} + +/* compress using the default or custom zlib function */ +static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGCompressSettings* settings) { + if(settings->custom_zlib) { + return settings->custom_zlib(out, outsize, in, insize, settings); + } else { + return lodepng_zlib_compress(out, outsize, in, insize, settings); + } +} + +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#else /*no LODEPNG_COMPILE_ZLIB*/ + +#ifdef LODEPNG_COMPILE_DECODER +static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGDecompressSettings* settings) { + if(!settings->custom_zlib) return 87; /*no custom zlib function provided */ + return settings->custom_zlib(out, outsize, in, insize, settings); +} +#endif /*LODEPNG_COMPILE_DECODER*/ +#ifdef LODEPNG_COMPILE_ENCODER +static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGCompressSettings* settings) { + if(!settings->custom_zlib) return 87; /*no custom zlib function provided */ + return settings->custom_zlib(out, outsize, in, insize, settings); +} +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#endif /*LODEPNG_COMPILE_ZLIB*/ + +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_ENCODER + +/*this is a good tradeoff between speed and compression ratio*/ +#define DEFAULT_WINDOWSIZE 2048 + +void lodepng_compress_settings_init(LodePNGCompressSettings* settings) { + /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/ + settings->btype = 2; + settings->use_lz77 = 1; + settings->windowsize = DEFAULT_WINDOWSIZE; + settings->minmatch = 3; + settings->nicematch = 128; + settings->lazymatching = 1; + + settings->custom_zlib = 0; + settings->custom_deflate = 0; + settings->custom_context = 0; +} + +const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0}; + + +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#ifdef LODEPNG_COMPILE_DECODER + +void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) { + settings->ignore_adler32 = 0; + + settings->custom_zlib = 0; + settings->custom_inflate = 0; + settings->custom_context = 0; +} + +const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0}; + +#endif /*LODEPNG_COMPILE_DECODER*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* // End of Zlib related code. Begin of PNG related code. // */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_PNG + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / CRC32 / */ +/* ////////////////////////////////////////////////////////////////////////// */ + + +#ifndef LODEPNG_NO_COMPILE_CRC +/* CRC polynomial: 0xedb88320 */ +static unsigned lodepng_crc32_table[256] = { + 0u, 1996959894u, 3993919788u, 2567524794u, 124634137u, 1886057615u, 3915621685u, 2657392035u, + 249268274u, 2044508324u, 3772115230u, 2547177864u, 162941995u, 2125561021u, 3887607047u, 2428444049u, + 498536548u, 1789927666u, 4089016648u, 2227061214u, 450548861u, 1843258603u, 4107580753u, 2211677639u, + 325883990u, 1684777152u, 4251122042u, 2321926636u, 335633487u, 1661365465u, 4195302755u, 2366115317u, + 997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u, + 901097722u, 1119000684u, 3686517206u, 2898065728u, 853044451u, 1172266101u, 3705015759u, 2882616665u, + 651767980u, 1373503546u, 3369554304u, 3218104598u, 565507253u, 1454621731u, 3485111705u, 3099436303u, + 671266974u, 1594198024u, 3322730930u, 2970347812u, 795835527u, 1483230225u, 3244367275u, 3060149565u, + 1994146192u, 31158534u, 2563907772u, 4023717930u, 1907459465u, 112637215u, 2680153253u, 3904427059u, + 2013776290u, 251722036u, 2517215374u, 3775830040u, 2137656763u, 141376813u, 2439277719u, 3865271297u, + 1802195444u, 476864866u, 2238001368u, 4066508878u, 1812370925u, 453092731u, 2181625025u, 4111451223u, + 1706088902u, 314042704u, 2344532202u, 4240017532u, 1658658271u, 366619977u, 2362670323u, 4224994405u, + 1303535960u, 984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u, + 1131014506u, 879679996u, 2909243462u, 3663771856u, 1141124467u, 855842277u, 2852801631u, 3708648649u, + 1342533948u, 654459306u, 3188396048u, 3373015174u, 1466479909u, 544179635u, 3110523913u, 3462522015u, + 1591671054u, 702138776u, 2966460450u, 3352799412u, 1504918807u, 783551873u, 3082640443u, 3233442989u, + 3988292384u, 2596254646u, 62317068u, 1957810842u, 3939845945u, 2647816111u, 81470997u, 1943803523u, + 3814918930u, 2489596804u, 225274430u, 2053790376u, 3826175755u, 2466906013u, 167816743u, 2097651377u, + 4027552580u, 2265490386u, 503444072u, 1762050814u, 4150417245u, 2154129355u, 426522225u, 1852507879u, + 4275313526u, 2312317920u, 282753626u, 1742555852u, 4189708143u, 2394877945u, 397917763u, 1622183637u, + 3604390888u, 2714866558u, 953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u, + 3624741850u, 2936675148u, 906185462u, 1090812512u, 3747672003u, 2825379669u, 829329135u, 1181335161u, + 3412177804u, 3160834842u, 628085408u, 1382605366u, 3423369109u, 3138078467u, 570562233u, 1426400815u, + 3317316542u, 2998733608u, 733239954u, 1555261956u, 3268935591u, 3050360625u, 752459403u, 1541320221u, + 2607071920u, 3965973030u, 1969922972u, 40735498u, 2617837225u, 3943577151u, 1913087877u, 83908371u, + 2512341634u, 3803740692u, 2075208622u, 213261112u, 2463272603u, 3855990285u, 2094854071u, 198958881u, + 2262029012u, 4057260610u, 1759359992u, 534414190u, 2176718541u, 4139329115u, 1873836001u, 414664567u, + 2282248934u, 4279200368u, 1711684554u, 285281116u, 2405801727u, 4167216745u, 1634467795u, 376229701u, + 2685067896u, 3608007406u, 1308918612u, 956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u, + 2932959818u, 3654703836u, 1088359270u, 936918000u, 2847714899u, 3736837829u, 1202900863u, 817233897u, + 3183342108u, 3401237130u, 1404277552u, 615818150u, 3134207493u, 3453421203u, 1423857449u, 601450431u, + 3009837614u, 3294710456u, 1567103746u, 711928724u, 3020668471u, 3272380065u, 1510334235u, 755167117u +}; + +/*Return the CRC of the bytes buf[0..len-1].*/ +unsigned lodepng_crc32(const unsigned char* data, size_t length) { + unsigned r = 0xffffffffu; + size_t i; + for(i = 0; i < length; ++i) { + r = lodepng_crc32_table[(r ^ data[i]) & 0xff] ^ (r >> 8); + } + return r ^ 0xffffffffu; +} +#else /* !LODEPNG_NO_COMPILE_CRC */ +unsigned lodepng_crc32(const unsigned char* data, size_t length); +#endif /* !LODEPNG_NO_COMPILE_CRC */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Reading and writing single bits and bytes from/to stream for LodePNG / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream) { + unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1); + ++(*bitpointer); + return result; +} + +static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) { + unsigned result = 0; + size_t i; + for(i = 0 ; i < nbits; ++i) { + result <<= 1; + result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream); + } + return result; +} + +#ifdef LODEPNG_COMPILE_DECODER +static void setBitOfReversedStream0(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) { + /*the current bit in bitstream must be 0 for this to work*/ + if(bit) { + /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/ + bitstream[(*bitpointer) >> 3] |= (bit << (7 - ((*bitpointer) & 0x7))); + } + ++(*bitpointer); +} +#endif /*LODEPNG_COMPILE_DECODER*/ + +static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) { + /*the current bit in bitstream may be 0 or 1 for this to work*/ + if(bit == 0) bitstream[(*bitpointer) >> 3] &= (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7)))); + else bitstream[(*bitpointer) >> 3] |= (1 << (7 - ((*bitpointer) & 0x7))); + ++(*bitpointer); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / PNG chunks / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +unsigned lodepng_chunk_length(const unsigned char* chunk) { + return lodepng_read32bitInt(&chunk[0]); +} + +void lodepng_chunk_type(char type[5], const unsigned char* chunk) { + unsigned i; + for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i]; + type[4] = 0; /*null termination char*/ +} + +unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type) { + if(strlen(type) != 4) return 0; + return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]); +} + +unsigned char lodepng_chunk_ancillary(const unsigned char* chunk) { + return((chunk[4] & 32) != 0); +} + +unsigned char lodepng_chunk_private(const unsigned char* chunk) { + return((chunk[6] & 32) != 0); +} + +unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk) { + return((chunk[7] & 32) != 0); +} + +unsigned char* lodepng_chunk_data(unsigned char* chunk) { + return &chunk[8]; +} + +const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk) { + return &chunk[8]; +} + +unsigned lodepng_chunk_check_crc(const unsigned char* chunk) { + unsigned length = lodepng_chunk_length(chunk); + unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]); + /*the CRC is taken of the data and the 4 chunk type letters, not the length*/ + unsigned checksum = lodepng_crc32(&chunk[4], length + 4); + if(CRC != checksum) return 1; + else return 0; +} + +void lodepng_chunk_generate_crc(unsigned char* chunk) { + unsigned length = lodepng_chunk_length(chunk); + unsigned CRC = lodepng_crc32(&chunk[4], length + 4); + lodepng_set32bitInt(chunk + 8 + length, CRC); +} + +unsigned char* lodepng_chunk_next(unsigned char* chunk) { + if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47 + && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) { + /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */ + return chunk + 8; + } else { + unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12; + return chunk + total_chunk_length; + } +} + +const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk) { + if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47 + && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) { + /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */ + return chunk + 8; + } else { + unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12; + return chunk + total_chunk_length; + } +} + +unsigned char* lodepng_chunk_find(unsigned char* chunk, const unsigned char* end, const char type[5]) { + for(;;) { + if(chunk + 12 >= end) return 0; + if(lodepng_chunk_type_equals(chunk, type)) return chunk; + chunk = lodepng_chunk_next(chunk); + } +} + +const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]) { + for(;;) { + if(chunk + 12 >= end) return 0; + if(lodepng_chunk_type_equals(chunk, type)) return chunk; + chunk = lodepng_chunk_next_const(chunk); + } +} + +unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk) { + unsigned i; + unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12; + unsigned char *chunk_start, *new_buffer; + size_t new_length = (*outlength) + total_chunk_length; + if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/ + + new_buffer = (unsigned char*)lodepng_realloc(*out, new_length); + if(!new_buffer) return 83; /*alloc fail*/ + (*out) = new_buffer; + (*outlength) = new_length; + chunk_start = &(*out)[new_length - total_chunk_length]; + + for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i]; + + return 0; +} + +unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length, + const char* type, const unsigned char* data) { + unsigned i; + unsigned char *chunk, *new_buffer; + size_t new_length = (*outlength) + length + 12; + if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/ + new_buffer = (unsigned char*)lodepng_realloc(*out, new_length); + if(!new_buffer) return 83; /*alloc fail*/ + (*out) = new_buffer; + (*outlength) = new_length; + chunk = &(*out)[(*outlength) - length - 12]; + + /*1: length*/ + lodepng_set32bitInt(chunk, (unsigned)length); + + /*2: chunk name (4 letters)*/ + chunk[4] = (unsigned char)type[0]; + chunk[5] = (unsigned char)type[1]; + chunk[6] = (unsigned char)type[2]; + chunk[7] = (unsigned char)type[3]; + + /*3: the data*/ + for(i = 0; i != length; ++i) chunk[8 + i] = data[i]; + + /*4: CRC (of the chunkname characters and the data)*/ + lodepng_chunk_generate_crc(chunk); + + return 0; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Color types and such / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/*return type is a LodePNG error code*/ +static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) /*bd = bitdepth*/ { + switch(colortype) { + case 0: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break; /*gray*/ + case 2: if(!( bd == 8 || bd == 16)) return 37; break; /*RGB*/ + case 3: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 )) return 37; break; /*palette*/ + case 4: if(!( bd == 8 || bd == 16)) return 37; break; /*gray + alpha*/ + case 6: if(!( bd == 8 || bd == 16)) return 37; break; /*RGBA*/ + default: return 31; + } + return 0; /*allowed color type / bits combination*/ +} + +static unsigned getNumColorChannels(LodePNGColorType colortype) { + switch(colortype) { + case 0: return 1; /*gray*/ + case 2: return 3; /*RGB*/ + case 3: return 1; /*palette*/ + case 4: return 2; /*gray + alpha*/ + case 6: return 4; /*RGBA*/ + } + return 0; /*unexisting color type*/ +} + +static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth) { + /*bits per pixel is amount of channels * bits per channel*/ + return getNumColorChannels(colortype) * bitdepth; +} + +/* ////////////////////////////////////////////////////////////////////////// */ + +void lodepng_color_mode_init(LodePNGColorMode* info) { + info->key_defined = 0; + info->key_r = info->key_g = info->key_b = 0; + info->colortype = LCT_RGBA; + info->bitdepth = 8; + info->palette = 0; + info->palettesize = 0; +} + +void lodepng_color_mode_cleanup(LodePNGColorMode* info) { + lodepng_palette_clear(info); +} + +unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source) { + size_t i; + lodepng_color_mode_cleanup(dest); + *dest = *source; + if(source->palette) { + dest->palette = (unsigned char*)lodepng_malloc(1024); + if(!dest->palette && source->palettesize) return 83; /*alloc fail*/ + for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i]; + } + return 0; +} + +LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth) { + LodePNGColorMode result; + lodepng_color_mode_init(&result); + result.colortype = colortype; + result.bitdepth = bitdepth; + return result; +} + +static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b) { + size_t i; + if(a->colortype != b->colortype) return 0; + if(a->bitdepth != b->bitdepth) return 0; + if(a->key_defined != b->key_defined) return 0; + if(a->key_defined) { + if(a->key_r != b->key_r) return 0; + if(a->key_g != b->key_g) return 0; + if(a->key_b != b->key_b) return 0; + } + if(a->palettesize != b->palettesize) return 0; + for(i = 0; i != a->palettesize * 4; ++i) { + if(a->palette[i] != b->palette[i]) return 0; + } + return 1; +} + +void lodepng_palette_clear(LodePNGColorMode* info) { + if(info->palette) lodepng_free(info->palette); + info->palette = 0; + info->palettesize = 0; +} + +unsigned lodepng_palette_add(LodePNGColorMode* info, + unsigned char r, unsigned char g, unsigned char b, unsigned char a) { + unsigned char* data; + /*the same resize technique as C++ std::vectors is used, and here it's made so that for a palette with + the max of 256 colors, it'll have the exact alloc size*/ + if(!info->palette) /*allocate palette if empty*/ { + /*room for 256 colors with 4 bytes each*/ + data = (unsigned char*)lodepng_realloc(info->palette, 1024); + if(!data) return 83; /*alloc fail*/ + else info->palette = data; + } + info->palette[4 * info->palettesize + 0] = r; + info->palette[4 * info->palettesize + 1] = g; + info->palette[4 * info->palettesize + 2] = b; + info->palette[4 * info->palettesize + 3] = a; + ++info->palettesize; + return 0; +} + +/*calculate bits per pixel out of colortype and bitdepth*/ +unsigned lodepng_get_bpp(const LodePNGColorMode* info) { + return lodepng_get_bpp_lct(info->colortype, info->bitdepth); +} + +unsigned lodepng_get_channels(const LodePNGColorMode* info) { + return getNumColorChannels(info->colortype); +} + +unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info) { + return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA; +} + +unsigned lodepng_is_alpha_type(const LodePNGColorMode* info) { + return (info->colortype & 4) != 0; /*4 or 6*/ +} + +unsigned lodepng_is_palette_type(const LodePNGColorMode* info) { + return info->colortype == LCT_PALETTE; +} + +unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info) { + size_t i; + for(i = 0; i != info->palettesize; ++i) { + if(info->palette[i * 4 + 3] < 255) return 1; + } + return 0; +} + +unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) { + return info->key_defined + || lodepng_is_alpha_type(info) + || lodepng_has_palette_alpha(info); +} + +size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) { + size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth); + size_t n = (size_t)w * (size_t)h; + return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8; +} + +size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) { + return lodepng_get_raw_size_lct(w, h, color->colortype, color->bitdepth); +} + + +#ifdef LODEPNG_COMPILE_PNG +#ifdef LODEPNG_COMPILE_DECODER + +/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer, +and in addition has one extra byte per line: the filter byte. So this gives a larger +result than lodepng_get_raw_size. */ +static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color) { + size_t bpp = lodepng_get_bpp(color); + /* + 1 for the filter byte, and possibly plus padding bits per line */ + size_t line = ((size_t)(w / 8) * bpp) + 1 + ((w & 7) * bpp + 7) / 8; + return (size_t)h * line; +} + +/* Safely check if multiplying two integers will overflow (no undefined +behavior, compiler removing the code, etc...) and output result. */ +static int lodepng_mulofl(size_t a, size_t b, size_t* result) { + *result = a * b; /* Unsigned multiplication is well defined and safe in C90 */ + return (a != 0 && *result / a != b); +} + +/* Safely check if adding two integers will overflow (no undefined +behavior, compiler removing the code, etc...) and output result. */ +static int lodepng_addofl(size_t a, size_t b, size_t* result) { + *result = a + b; /* Unsigned addition is well defined and safe in C90 */ + return *result < a; +} + +/*Safely checks whether size_t overflow can be caused due to amount of pixels. +This check is overcautious rather than precise. If this check indicates no overflow, +you can safely compute in a size_t (but not an unsigned): +-(size_t)w * (size_t)h * 8 +-amount of bytes in IDAT (including filter, padding and Adam7 bytes) +-amount of bytes in raw color model +Returns 1 if overflow possible, 0 if not. +*/ +static int lodepng_pixel_overflow(unsigned w, unsigned h, + const LodePNGColorMode* pngcolor, const LodePNGColorMode* rawcolor) { + size_t bpp = LODEPNG_MAX(lodepng_get_bpp(pngcolor), lodepng_get_bpp(rawcolor)); + size_t numpixels, total; + size_t line; /* bytes per line in worst case */ + + if(lodepng_mulofl((size_t)w, (size_t)h, &numpixels)) return 1; + if(lodepng_mulofl(numpixels, 8, &total)) return 1; /* bit pointer with 8-bit color, or 8 bytes per channel color */ + + /* Bytes per scanline with the expression "(w / 8) * bpp) + ((w & 7) * bpp + 7) / 8" */ + if(lodepng_mulofl((size_t)(w / 8), bpp, &line)) return 1; + if(lodepng_addofl(line, ((w & 7) * bpp + 7) / 8, &line)) return 1; + + if(lodepng_addofl(line, 5, &line)) return 1; /* 5 bytes overhead per line: 1 filterbyte, 4 for Adam7 worst case */ + if(lodepng_mulofl(line, h, &total)) return 1; /* Total bytes in worst case */ + + return 0; /* no overflow */ +} +#endif /*LODEPNG_COMPILE_DECODER*/ +#endif /*LODEPNG_COMPILE_PNG*/ + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + +static void LodePNGUnknownChunks_init(LodePNGInfo* info) { + unsigned i; + for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0; + for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0; +} + +static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info) { + unsigned i; + for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]); +} + +static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src) { + unsigned i; + + LodePNGUnknownChunks_cleanup(dest); + + for(i = 0; i != 3; ++i) { + size_t j; + dest->unknown_chunks_size[i] = src->unknown_chunks_size[i]; + dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]); + if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/ + for(j = 0; j < src->unknown_chunks_size[i]; ++j) { + dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j]; + } + } + + return 0; +} + +/******************************************************************************/ + +static void LodePNGText_init(LodePNGInfo* info) { + info->text_num = 0; + info->text_keys = NULL; + info->text_strings = NULL; +} + +static void LodePNGText_cleanup(LodePNGInfo* info) { + size_t i; + for(i = 0; i != info->text_num; ++i) { + string_cleanup(&info->text_keys[i]); + string_cleanup(&info->text_strings[i]); + } + lodepng_free(info->text_keys); + lodepng_free(info->text_strings); +} + +static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) { + size_t i = 0; + dest->text_keys = 0; + dest->text_strings = 0; + dest->text_num = 0; + for(i = 0; i != source->text_num; ++i) { + CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i])); + } + return 0; +} + +void lodepng_clear_text(LodePNGInfo* info) { + LodePNGText_cleanup(info); +} + +unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) { + char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1))); + char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1))); + if(!new_keys || !new_strings) { + lodepng_free(new_keys); + lodepng_free(new_strings); + return 83; /*alloc fail*/ + } + + ++info->text_num; + info->text_keys = new_keys; + info->text_strings = new_strings; + + info->text_keys[info->text_num - 1] = alloc_string(key); + info->text_strings[info->text_num - 1] = alloc_string(str); + + return 0; +} + +/******************************************************************************/ + +static void LodePNGIText_init(LodePNGInfo* info) { + info->itext_num = 0; + info->itext_keys = NULL; + info->itext_langtags = NULL; + info->itext_transkeys = NULL; + info->itext_strings = NULL; +} + +static void LodePNGIText_cleanup(LodePNGInfo* info) { + size_t i; + for(i = 0; i != info->itext_num; ++i) { + string_cleanup(&info->itext_keys[i]); + string_cleanup(&info->itext_langtags[i]); + string_cleanup(&info->itext_transkeys[i]); + string_cleanup(&info->itext_strings[i]); + } + lodepng_free(info->itext_keys); + lodepng_free(info->itext_langtags); + lodepng_free(info->itext_transkeys); + lodepng_free(info->itext_strings); +} + +static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) { + size_t i = 0; + dest->itext_keys = 0; + dest->itext_langtags = 0; + dest->itext_transkeys = 0; + dest->itext_strings = 0; + dest->itext_num = 0; + for(i = 0; i != source->itext_num; ++i) { + CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i], + source->itext_transkeys[i], source->itext_strings[i])); + } + return 0; +} + +void lodepng_clear_itext(LodePNGInfo* info) { + LodePNGIText_cleanup(info); +} + +unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag, + const char* transkey, const char* str) { + char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1))); + char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1))); + char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1))); + char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1))); + if(!new_keys || !new_langtags || !new_transkeys || !new_strings) { + lodepng_free(new_keys); + lodepng_free(new_langtags); + lodepng_free(new_transkeys); + lodepng_free(new_strings); + return 83; /*alloc fail*/ + } + + ++info->itext_num; + info->itext_keys = new_keys; + info->itext_langtags = new_langtags; + info->itext_transkeys = new_transkeys; + info->itext_strings = new_strings; + + info->itext_keys[info->itext_num - 1] = alloc_string(key); + info->itext_langtags[info->itext_num - 1] = alloc_string(langtag); + info->itext_transkeys[info->itext_num - 1] = alloc_string(transkey); + info->itext_strings[info->itext_num - 1] = alloc_string(str); + + return 0; +} + +/* same as set but does not delete */ +static unsigned lodepng_assign_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) { + info->iccp_name = alloc_string(name); + info->iccp_profile = (unsigned char*)lodepng_malloc(profile_size); + + if(!info->iccp_name || !info->iccp_profile) return 83; /*alloc fail*/ + + memcpy(info->iccp_profile, profile, profile_size); + info->iccp_profile_size = profile_size; + + return 0; /*ok*/ +} + +unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) { + if(info->iccp_name) lodepng_clear_icc(info); + info->iccp_defined = 1; + + return lodepng_assign_icc(info, name, profile, profile_size); +} + +void lodepng_clear_icc(LodePNGInfo* info) { + string_cleanup(&info->iccp_name); + lodepng_free(info->iccp_profile); + info->iccp_profile = NULL; + info->iccp_profile_size = 0; + info->iccp_defined = 0; +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +void lodepng_info_init(LodePNGInfo* info) { + lodepng_color_mode_init(&info->color); + info->interlace_method = 0; + info->compression_method = 0; + info->filter_method = 0; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + info->background_defined = 0; + info->background_r = info->background_g = info->background_b = 0; + + LodePNGText_init(info); + LodePNGIText_init(info); + + info->time_defined = 0; + info->phys_defined = 0; + + info->gama_defined = 0; + info->chrm_defined = 0; + info->srgb_defined = 0; + info->iccp_defined = 0; + info->iccp_name = NULL; + info->iccp_profile = NULL; + + LodePNGUnknownChunks_init(info); +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} + +void lodepng_info_cleanup(LodePNGInfo* info) { + lodepng_color_mode_cleanup(&info->color); +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + LodePNGText_cleanup(info); + LodePNGIText_cleanup(info); + + lodepng_clear_icc(info); + + LodePNGUnknownChunks_cleanup(info); +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} + +unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source) { + lodepng_info_cleanup(dest); + *dest = *source; + lodepng_color_mode_init(&dest->color); + CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color)); + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + CERROR_TRY_RETURN(LodePNGText_copy(dest, source)); + CERROR_TRY_RETURN(LodePNGIText_copy(dest, source)); + if(source->iccp_defined) { + CERROR_TRY_RETURN(lodepng_assign_icc(dest, source->iccp_name, source->iccp_profile, source->iccp_profile_size)); + } + + LodePNGUnknownChunks_init(dest); + CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source)); +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + return 0; +} + +/* ////////////////////////////////////////////////////////////////////////// */ + +/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/ +static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in) { + unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/ + /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/ + unsigned p = index & m; + in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/ + in = in << (bits * (m - p)); + if(p == 0) out[index * bits / 8] = in; + else out[index * bits / 8] |= in; +} + +typedef struct ColorTree ColorTree; + +/* +One node of a color tree +This is the data structure used to count the number of unique colors and to get a palette +index for a color. It's like an octree, but because the alpha channel is used too, each +node has 16 instead of 8 children. +*/ +struct ColorTree { + ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/ + int index; /*the payload. Only has a meaningful value if this is in the last level*/ +}; + +static void color_tree_init(ColorTree* tree) { + int i; + for(i = 0; i != 16; ++i) tree->children[i] = 0; + tree->index = -1; +} + +static void color_tree_cleanup(ColorTree* tree) { + int i; + for(i = 0; i != 16; ++i) { + if(tree->children[i]) { + color_tree_cleanup(tree->children[i]); + lodepng_free(tree->children[i]); + } + } +} + +/*returns -1 if color not present, its index otherwise*/ +static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) { + int bit = 0; + for(bit = 0; bit < 8; ++bit) { + int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1); + if(!tree->children[i]) return -1; + else tree = tree->children[i]; + } + return tree ? tree->index : -1; +} + +#ifdef LODEPNG_COMPILE_ENCODER +static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) { + return color_tree_get(tree, r, g, b, a) >= 0; +} +#endif /*LODEPNG_COMPILE_ENCODER*/ + +/*color is not allowed to already exist. +Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/ +static void color_tree_add(ColorTree* tree, + unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) { + int bit; + for(bit = 0; bit < 8; ++bit) { + int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1); + if(!tree->children[i]) { + tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree)); + color_tree_init(tree->children[i]); + } + tree = tree->children[i]; + } + tree->index = (int)index; +} + +/*put a pixel, given its RGBA color, into image of any color type*/ +static unsigned rgba8ToPixel(unsigned char* out, size_t i, + const LodePNGColorMode* mode, ColorTree* tree /*for palette*/, + unsigned char r, unsigned char g, unsigned char b, unsigned char a) { + if(mode->colortype == LCT_GREY) { + unsigned char gray = r; /*((unsigned short)r + g + b) / 3;*/ + if(mode->bitdepth == 8) out[i] = gray; + else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = gray; + else { + /*take the most significant bits of gray*/ + gray = (gray >> (8 - mode->bitdepth)) & ((1 << mode->bitdepth) - 1); + addColorBits(out, i, mode->bitdepth, gray); + } + } else if(mode->colortype == LCT_RGB) { + if(mode->bitdepth == 8) { + out[i * 3 + 0] = r; + out[i * 3 + 1] = g; + out[i * 3 + 2] = b; + } else { + out[i * 6 + 0] = out[i * 6 + 1] = r; + out[i * 6 + 2] = out[i * 6 + 3] = g; + out[i * 6 + 4] = out[i * 6 + 5] = b; + } + } else if(mode->colortype == LCT_PALETTE) { + int index = color_tree_get(tree, r, g, b, a); + if(index < 0) return 82; /*color not in palette*/ + if(mode->bitdepth == 8) out[i] = index; + else addColorBits(out, i, mode->bitdepth, (unsigned)index); + } else if(mode->colortype == LCT_GREY_ALPHA) { + unsigned char gray = r; /*((unsigned short)r + g + b) / 3;*/ + if(mode->bitdepth == 8) { + out[i * 2 + 0] = gray; + out[i * 2 + 1] = a; + } else if(mode->bitdepth == 16) { + out[i * 4 + 0] = out[i * 4 + 1] = gray; + out[i * 4 + 2] = out[i * 4 + 3] = a; + } + } else if(mode->colortype == LCT_RGBA) { + if(mode->bitdepth == 8) { + out[i * 4 + 0] = r; + out[i * 4 + 1] = g; + out[i * 4 + 2] = b; + out[i * 4 + 3] = a; + } else { + out[i * 8 + 0] = out[i * 8 + 1] = r; + out[i * 8 + 2] = out[i * 8 + 3] = g; + out[i * 8 + 4] = out[i * 8 + 5] = b; + out[i * 8 + 6] = out[i * 8 + 7] = a; + } + } + + return 0; /*no error*/ +} + +/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/ +static void rgba16ToPixel(unsigned char* out, size_t i, + const LodePNGColorMode* mode, + unsigned short r, unsigned short g, unsigned short b, unsigned short a) { + if(mode->colortype == LCT_GREY) { + unsigned short gray = r; /*((unsigned)r + g + b) / 3;*/ + out[i * 2 + 0] = (gray >> 8) & 255; + out[i * 2 + 1] = gray & 255; + } else if(mode->colortype == LCT_RGB) { + out[i * 6 + 0] = (r >> 8) & 255; + out[i * 6 + 1] = r & 255; + out[i * 6 + 2] = (g >> 8) & 255; + out[i * 6 + 3] = g & 255; + out[i * 6 + 4] = (b >> 8) & 255; + out[i * 6 + 5] = b & 255; + } else if(mode->colortype == LCT_GREY_ALPHA) { + unsigned short gray = r; /*((unsigned)r + g + b) / 3;*/ + out[i * 4 + 0] = (gray >> 8) & 255; + out[i * 4 + 1] = gray & 255; + out[i * 4 + 2] = (a >> 8) & 255; + out[i * 4 + 3] = a & 255; + } else if(mode->colortype == LCT_RGBA) { + out[i * 8 + 0] = (r >> 8) & 255; + out[i * 8 + 1] = r & 255; + out[i * 8 + 2] = (g >> 8) & 255; + out[i * 8 + 3] = g & 255; + out[i * 8 + 4] = (b >> 8) & 255; + out[i * 8 + 5] = b & 255; + out[i * 8 + 6] = (a >> 8) & 255; + out[i * 8 + 7] = a & 255; + } +} + +/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/ +static void getPixelColorRGBA8(unsigned char* r, unsigned char* g, + unsigned char* b, unsigned char* a, + const unsigned char* in, size_t i, + const LodePNGColorMode* mode) { + if(mode->colortype == LCT_GREY) { + if(mode->bitdepth == 8) { + *r = *g = *b = in[i]; + if(mode->key_defined && *r == mode->key_r) *a = 0; + else *a = 255; + } else if(mode->bitdepth == 16) { + *r = *g = *b = in[i * 2 + 0]; + if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0; + else *a = 255; + } else { + unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ + size_t j = i * mode->bitdepth; + unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); + *r = *g = *b = (value * 255) / highest; + if(mode->key_defined && value == mode->key_r) *a = 0; + else *a = 255; + } + } else if(mode->colortype == LCT_RGB) { + if(mode->bitdepth == 8) { + *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2]; + if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0; + else *a = 255; + } else { + *r = in[i * 6 + 0]; + *g = in[i * 6 + 2]; + *b = in[i * 6 + 4]; + if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r + && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; + else *a = 255; + } + } else if(mode->colortype == LCT_PALETTE) { + unsigned index; + if(mode->bitdepth == 8) index = in[i]; + else { + size_t j = i * mode->bitdepth; + index = readBitsFromReversedStream(&j, in, mode->bitdepth); + } + + if(index >= mode->palettesize) { + /*This is an error according to the PNG spec, but common PNG decoders make it black instead. + Done here too, slightly faster due to no error handling needed.*/ + *r = *g = *b = 0; + *a = 255; + } else { + *r = mode->palette[index * 4 + 0]; + *g = mode->palette[index * 4 + 1]; + *b = mode->palette[index * 4 + 2]; + *a = mode->palette[index * 4 + 3]; + } + } else if(mode->colortype == LCT_GREY_ALPHA) { + if(mode->bitdepth == 8) { + *r = *g = *b = in[i * 2 + 0]; + *a = in[i * 2 + 1]; + } else { + *r = *g = *b = in[i * 4 + 0]; + *a = in[i * 4 + 2]; + } + } else if(mode->colortype == LCT_RGBA) { + if(mode->bitdepth == 8) { + *r = in[i * 4 + 0]; + *g = in[i * 4 + 1]; + *b = in[i * 4 + 2]; + *a = in[i * 4 + 3]; + } else { + *r = in[i * 8 + 0]; + *g = in[i * 8 + 2]; + *b = in[i * 8 + 4]; + *a = in[i * 8 + 6]; + } + } +} + +/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color +mode test cases, optimized to convert the colors much faster, when converting +to RGBA or RGB with 8 bit per cannel. buffer must be RGBA or RGB output with +enough memory, if has_alpha is true the output is RGBA. mode has the color mode +of the input buffer.*/ +static void getPixelColorsRGBA8(unsigned char* buffer, size_t numpixels, + unsigned has_alpha, const unsigned char* in, + const LodePNGColorMode* mode) { + unsigned num_channels = has_alpha ? 4 : 3; + size_t i; + if(mode->colortype == LCT_GREY) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i]; + if(has_alpha) buffer[3] = mode->key_defined && in[i] == mode->key_r ? 0 : 255; + } + } else if(mode->bitdepth == 16) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i * 2]; + if(has_alpha) buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255; + } + } else { + unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ + size_t j = 0; + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); + buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest; + if(has_alpha) buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255; + } + } + } else if(mode->colortype == LCT_RGB) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = in[i * 3 + 0]; + buffer[1] = in[i * 3 + 1]; + buffer[2] = in[i * 3 + 2]; + if(has_alpha) buffer[3] = mode->key_defined && buffer[0] == mode->key_r + && buffer[1]== mode->key_g && buffer[2] == mode->key_b ? 0 : 255; + } + } else { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = in[i * 6 + 0]; + buffer[1] = in[i * 6 + 2]; + buffer[2] = in[i * 6 + 4]; + if(has_alpha) buffer[3] = mode->key_defined + && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r + && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255; + } + } + } else if(mode->colortype == LCT_PALETTE) { + unsigned index; + size_t j = 0; + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + if(mode->bitdepth == 8) index = in[i]; + else index = readBitsFromReversedStream(&j, in, mode->bitdepth); + + if(index >= mode->palettesize) { + /*This is an error according to the PNG spec, but most PNG decoders make it black instead. + Done here too, slightly faster due to no error handling needed.*/ + buffer[0] = buffer[1] = buffer[2] = 0; + if(has_alpha) buffer[3] = 255; + } else { + buffer[0] = mode->palette[index * 4 + 0]; + buffer[1] = mode->palette[index * 4 + 1]; + buffer[2] = mode->palette[index * 4 + 2]; + if(has_alpha) buffer[3] = mode->palette[index * 4 + 3]; + } + } + } else if(mode->colortype == LCT_GREY_ALPHA) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0]; + if(has_alpha) buffer[3] = in[i * 2 + 1]; + } + } else { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0]; + if(has_alpha) buffer[3] = in[i * 4 + 2]; + } + } + } else if(mode->colortype == LCT_RGBA) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = in[i * 4 + 0]; + buffer[1] = in[i * 4 + 1]; + buffer[2] = in[i * 4 + 2]; + if(has_alpha) buffer[3] = in[i * 4 + 3]; + } + } else { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = in[i * 8 + 0]; + buffer[1] = in[i * 8 + 2]; + buffer[2] = in[i * 8 + 4]; + if(has_alpha) buffer[3] = in[i * 8 + 6]; + } + } + } +} + +/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with +given color type, but the given color type must be 16-bit itself.*/ +static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a, + const unsigned char* in, size_t i, const LodePNGColorMode* mode) { + if(mode->colortype == LCT_GREY) { + *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1]; + if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0; + else *a = 65535; + } else if(mode->colortype == LCT_RGB) { + *r = 256u * in[i * 6 + 0] + in[i * 6 + 1]; + *g = 256u * in[i * 6 + 2] + in[i * 6 + 3]; + *b = 256u * in[i * 6 + 4] + in[i * 6 + 5]; + if(mode->key_defined + && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r + && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; + else *a = 65535; + } else if(mode->colortype == LCT_GREY_ALPHA) { + *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1]; + *a = 256u * in[i * 4 + 2] + in[i * 4 + 3]; + } else if(mode->colortype == LCT_RGBA) { + *r = 256u * in[i * 8 + 0] + in[i * 8 + 1]; + *g = 256u * in[i * 8 + 2] + in[i * 8 + 3]; + *b = 256u * in[i * 8 + 4] + in[i * 8 + 5]; + *a = 256u * in[i * 8 + 6] + in[i * 8 + 7]; + } +} + +unsigned lodepng_convert(unsigned char* out, const unsigned char* in, + const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, + unsigned w, unsigned h) { + size_t i; + ColorTree tree; + size_t numpixels = (size_t)w * (size_t)h; + unsigned error = 0; + + if(lodepng_color_mode_equal(mode_out, mode_in)) { + size_t numbytes = lodepng_get_raw_size(w, h, mode_in); + for(i = 0; i != numbytes; ++i) out[i] = in[i]; + return 0; + } + + if(mode_out->colortype == LCT_PALETTE) { + size_t palettesize = mode_out->palettesize; + const unsigned char* palette = mode_out->palette; + size_t palsize = (size_t)1u << mode_out->bitdepth; + /*if the user specified output palette but did not give the values, assume + they want the values of the input color type (assuming that one is palette). + Note that we never create a new palette ourselves.*/ + if(palettesize == 0) { + palettesize = mode_in->palettesize; + palette = mode_in->palette; + /*if the input was also palette with same bitdepth, then the color types are also + equal, so copy literally. This to preserve the exact indices that were in the PNG + even in case there are duplicate colors in the palette.*/ + if (mode_in->colortype == LCT_PALETTE && mode_in->bitdepth == mode_out->bitdepth) { + size_t numbytes = lodepng_get_raw_size(w, h, mode_in); + for(i = 0; i != numbytes; ++i) out[i] = in[i]; + return 0; + } + } + if(palettesize < palsize) palsize = palettesize; + color_tree_init(&tree); + for(i = 0; i != palsize; ++i) { + const unsigned char* p = &palette[i * 4]; + color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned)i); + } + } + + if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) { + for(i = 0; i != numpixels; ++i) { + unsigned short r = 0, g = 0, b = 0, a = 0; + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); + rgba16ToPixel(out, i, mode_out, r, g, b, a); + } + } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) { + getPixelColorsRGBA8(out, numpixels, 1, in, mode_in); + } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) { + getPixelColorsRGBA8(out, numpixels, 0, in, mode_in); + } else { + unsigned char r = 0, g = 0, b = 0, a = 0; + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); + error = rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a); + if (error) break; + } + } + + if(mode_out->colortype == LCT_PALETTE) { + color_tree_cleanup(&tree); + } + + return error; +} + + +/* Converts a single rgb color without alpha from one type to another, color bits truncated to +their bitdepth. In case of single channel (gray or palette), only the r channel is used. Slow +function, do not use to process all pixels of an image. Alpha channel not supported on purpose: +this is for bKGD, supporting alpha may prevent it from finding a color in the palette, from the +specification it looks like bKGD should ignore the alpha values of the palette since it can use +any palette index but doesn't have an alpha channel. Idem with ignoring color key. */ +unsigned lodepng_convert_rgb( + unsigned* r_out, unsigned* g_out, unsigned* b_out, + unsigned r_in, unsigned g_in, unsigned b_in, + const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in) { + unsigned r = 0, g = 0, b = 0; + unsigned mul = 65535 / ((1u << mode_in->bitdepth) - 1u); /*65535, 21845, 4369, 257, 1*/ + unsigned shift = 16 - mode_out->bitdepth; + + if(mode_in->colortype == LCT_GREY || mode_in->colortype == LCT_GREY_ALPHA) { + r = g = b = r_in * mul; + } else if(mode_in->colortype == LCT_RGB || mode_in->colortype == LCT_RGBA) { + r = r_in * mul; + g = g_in * mul; + b = b_in * mul; + } else if(mode_in->colortype == LCT_PALETTE) { + if(r_in >= mode_in->palettesize) return 82; + r = mode_in->palette[r_in * 4 + 0] * 257u; + g = mode_in->palette[r_in * 4 + 1] * 257u; + b = mode_in->palette[r_in * 4 + 2] * 257u; + } else { + return 31; + } + + /* now convert to output format */ + if(mode_out->colortype == LCT_GREY || mode_out->colortype == LCT_GREY_ALPHA) { + *r_out = r >> shift ; + } else if(mode_out->colortype == LCT_RGB || mode_out->colortype == LCT_RGBA) { + *r_out = r >> shift ; + *g_out = g >> shift ; + *b_out = b >> shift ; + } else if(mode_out->colortype == LCT_PALETTE) { + unsigned i; + /* a 16-bit color cannot be in the palette */ + if((r >> 8) != (r & 255) || (g >> 8) != (g & 255) || (b >> 8) != (b & 255)) return 82; + for(i = 0; i < mode_out->palettesize; i++) { + unsigned j = i * 4; + if((r >> 8) == mode_out->palette[j + 0] && (g >> 8) == mode_out->palette[j + 1] && + (b >> 8) == mode_out->palette[j + 2]) { + *r_out = i; + return 0; + } + } + return 82; + } else { + return 31; + } + + return 0; +} + +#ifdef LODEPNG_COMPILE_ENCODER + +void lodepng_color_profile_init(LodePNGColorProfile* profile) { + profile->colored = 0; + profile->key = 0; + profile->key_r = profile->key_g = profile->key_b = 0; + profile->alpha = 0; + profile->numcolors = 0; + profile->bits = 1; + profile->numpixels = 0; +} + +/*function used for debug purposes with C++*/ +/*void printColorProfile(LodePNGColorProfile* p) { + std::cout << "colored: " << (int)p->colored << ", "; + std::cout << "key: " << (int)p->key << ", "; + std::cout << "key_r: " << (int)p->key_r << ", "; + std::cout << "key_g: " << (int)p->key_g << ", "; + std::cout << "key_b: " << (int)p->key_b << ", "; + std::cout << "alpha: " << (int)p->alpha << ", "; + std::cout << "numcolors: " << (int)p->numcolors << ", "; + std::cout << "bits: " << (int)p->bits << std::endl; +}*/ + +/*Returns how many bits needed to represent given value (max 8 bit)*/ +static unsigned getValueRequiredBits(unsigned char value) { + if(value == 0 || value == 255) return 1; + /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/ + if(value % 17 == 0) return value % 85 == 0 ? 2 : 4; + return 8; +} + +/*profile must already have been inited. +It's ok to set some parameters of profile to done already.*/ +unsigned lodepng_get_color_profile(LodePNGColorProfile* profile, + const unsigned char* in, unsigned w, unsigned h, + const LodePNGColorMode* mode_in) { + unsigned error = 0; + size_t i; + ColorTree tree; + size_t numpixels = (size_t)w * (size_t)h; + + /* mark things as done already if it would be impossible to have a more expensive case */ + unsigned colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0; + unsigned alpha_done = lodepng_can_have_alpha(mode_in) ? 0 : 1; + unsigned numcolors_done = 0; + unsigned bpp = lodepng_get_bpp(mode_in); + unsigned bits_done = (profile->bits == 1 && bpp == 1) ? 1 : 0; + unsigned sixteen = 0; /* whether the input image is 16 bit */ + unsigned maxnumcolors = 257; + if(bpp <= 8) maxnumcolors = LODEPNG_MIN(257, profile->numcolors + (1u << bpp)); + + profile->numpixels += numpixels; + + color_tree_init(&tree); + + /*If the profile was already filled in from previous data, fill its palette in tree + and mark things as done already if we know they are the most expensive case already*/ + if(profile->alpha) alpha_done = 1; + if(profile->colored) colored_done = 1; + if(profile->bits == 16) numcolors_done = 1; + if(profile->bits >= bpp) bits_done = 1; + if(profile->numcolors >= maxnumcolors) numcolors_done = 1; + + if(!numcolors_done) { + for(i = 0; i < profile->numcolors; i++) { + const unsigned char* color = &profile->palette[i * 4]; + color_tree_add(&tree, color[0], color[1], color[2], color[3], (unsigned int)i); + } + } + + /*Check if the 16-bit input is truly 16-bit*/ + if(mode_in->bitdepth == 16 && !sixteen) { + unsigned short r, g, b, a; + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); + if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) || + (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/ { + profile->bits = 16; + sixteen = 1; + bits_done = 1; + numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/ + break; + } + } + } + + if(sixteen) { + unsigned short r = 0, g = 0, b = 0, a = 0; + + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); + + if(!colored_done && (r != g || r != b)) { + profile->colored = 1; + colored_done = 1; + } + + if(!alpha_done) { + unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b); + if(a != 65535 && (a != 0 || (profile->key && !matchkey))) { + profile->alpha = 1; + profile->key = 0; + alpha_done = 1; + } else if(a == 0 && !profile->alpha && !profile->key) { + profile->key = 1; + profile->key_r = r; + profile->key_g = g; + profile->key_b = b; + } else if(a == 65535 && profile->key && matchkey) { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + profile->alpha = 1; + profile->key = 0; + alpha_done = 1; + } + } + if(alpha_done && numcolors_done && colored_done && bits_done) break; + } + + if(profile->key && !profile->alpha) { + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); + if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b) { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + profile->alpha = 1; + profile->key = 0; + alpha_done = 1; + } + } + } + } else /* < 16-bit */ { + unsigned char r = 0, g = 0, b = 0, a = 0; + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); + + if(!bits_done && profile->bits < 8) { + /*only r is checked, < 8 bits is only relevant for grayscale*/ + unsigned bits = getValueRequiredBits(r); + if(bits > profile->bits) profile->bits = bits; + } + bits_done = (profile->bits >= bpp); + + if(!colored_done && (r != g || r != b)) { + profile->colored = 1; + colored_done = 1; + if(profile->bits < 8) profile->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/ + } + + if(!alpha_done) { + unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b); + if(a != 255 && (a != 0 || (profile->key && !matchkey))) { + profile->alpha = 1; + profile->key = 0; + alpha_done = 1; + if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ + } else if(a == 0 && !profile->alpha && !profile->key) { + profile->key = 1; + profile->key_r = r; + profile->key_g = g; + profile->key_b = b; + } else if(a == 255 && profile->key && matchkey) { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + profile->alpha = 1; + profile->key = 0; + alpha_done = 1; + if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ + } + } + + if(!numcolors_done) { + if(!color_tree_has(&tree, r, g, b, a)) { + color_tree_add(&tree, r, g, b, a, profile->numcolors); + if(profile->numcolors < 256) { + unsigned char* p = profile->palette; + unsigned n = profile->numcolors; + p[n * 4 + 0] = r; + p[n * 4 + 1] = g; + p[n * 4 + 2] = b; + p[n * 4 + 3] = a; + } + ++profile->numcolors; + numcolors_done = profile->numcolors >= maxnumcolors; + } + } + + if(alpha_done && numcolors_done && colored_done && bits_done) break; + } + + if(profile->key && !profile->alpha) { + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); + if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b) { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + profile->alpha = 1; + profile->key = 0; + alpha_done = 1; + if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ + } + } + } + + /*make the profile's key always 16-bit for consistency - repeat each byte twice*/ + profile->key_r += (profile->key_r << 8); + profile->key_g += (profile->key_g << 8); + profile->key_b += (profile->key_b << 8); + } + + color_tree_cleanup(&tree); + return error; +} + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +/*Adds a single color to the color profile. The profile must already have been inited. The color must be given as 16-bit +(with 2 bytes repeating for 8-bit and 65535 for opaque alpha channel). This function is expensive, do not call it for +all pixels of an image but only for a few additional values. */ +static unsigned lodepng_color_profile_add(LodePNGColorProfile* profile, + unsigned r, unsigned g, unsigned b, unsigned a) { + unsigned error = 0; + unsigned char image[8]; + LodePNGColorMode mode; + lodepng_color_mode_init(&mode); + image[0] = r >> 8; image[1] = r; image[2] = g >> 8; image[3] = g; + image[4] = b >> 8; image[5] = b; image[6] = a >> 8; image[7] = a; + mode.bitdepth = 16; + mode.colortype = LCT_RGBA; + error = lodepng_get_color_profile(profile, image, 1, 1, &mode); + lodepng_color_mode_cleanup(&mode); + return error; +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +/*Autochoose color model given the computed profile. mode_in is to copy palette order from +when relevant.*/ +static unsigned auto_choose_color_from_profile(LodePNGColorMode* mode_out, + const LodePNGColorMode* mode_in, + const LodePNGColorProfile* prof) { + unsigned error = 0; + unsigned palettebits, palette_ok; + size_t i, n; + size_t numpixels = prof->numpixels; + + unsigned alpha = prof->alpha; + unsigned key = prof->key; + unsigned bits = prof->bits; + + mode_out->key_defined = 0; + + if(key && numpixels <= 16) { + alpha = 1; /*too few pixels to justify tRNS chunk overhead*/ + key = 0; + if(bits < 8) bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ + } + n = prof->numcolors; + palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8)); + palette_ok = n <= 256 && bits <= 8; + if(numpixels < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/ + if(!prof->colored && bits <= palettebits) palette_ok = 0; /*gray is less overhead*/ + + if(palette_ok) { + const unsigned char* p = prof->palette; + lodepng_palette_clear(mode_out); /*remove potential earlier palette*/ + for(i = 0; i != prof->numcolors; ++i) { + error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]); + if(error) break; + } + + mode_out->colortype = LCT_PALETTE; + mode_out->bitdepth = palettebits; + + if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize + && mode_in->bitdepth == mode_out->bitdepth) { + /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/ + lodepng_color_mode_cleanup(mode_out); + lodepng_color_mode_copy(mode_out, mode_in); + } + } else /*8-bit or 16-bit per channel*/ { + mode_out->bitdepth = bits; + mode_out->colortype = alpha ? (prof->colored ? LCT_RGBA : LCT_GREY_ALPHA) + : (prof->colored ? LCT_RGB : LCT_GREY); + + if(key) { + unsigned mask = (1u << mode_out->bitdepth) - 1u; /*profile always uses 16-bit, mask converts it*/ + mode_out->key_r = prof->key_r & mask; + mode_out->key_g = prof->key_g & mask; + mode_out->key_b = prof->key_b & mask; + mode_out->key_defined = 1; + } + } + + return error; +} + +/*Automatically chooses color type that gives smallest amount of bits in the +output image, e.g. gray if there are only grayscale pixels, palette if there +are less than 256 colors, color key if only single transparent color, ... +Updates values of mode with a potentially smaller color model. mode_out should +contain the user chosen color model, but will be overwritten with the new chosen one.*/ +unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, + const unsigned char* image, unsigned w, unsigned h, + const LodePNGColorMode* mode_in) { + unsigned error = 0; + LodePNGColorProfile prof; + lodepng_color_profile_init(&prof); + error = lodepng_get_color_profile(&prof, image, w, h, mode_in); + if(error) return error; + return auto_choose_color_from_profile(mode_out, mode_in, &prof); +} + +#endif /* #ifdef LODEPNG_COMPILE_ENCODER */ + +/* +Paeth predicter, used by PNG filter type 4 +The parameters are of type short, but should come from unsigned chars, the shorts +are only needed to make the paeth calculation correct. +*/ +static unsigned char paethPredictor(short a, short b, short c) { + short pa = abs(b - c); + short pb = abs(a - c); + short pc = abs(a + b - c - c); + + if(pc < pa && pc < pb) return (unsigned char)c; + else if(pb < pa) return (unsigned char)b; + else return (unsigned char)a; +} + +/*shared values used by multiple Adam7 related functions*/ + +static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/ +static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/ +static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/ +static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/ + +/* +Outputs various dimensions and positions in the image related to the Adam7 reduced images. +passw: output containing the width of the 7 passes +passh: output containing the height of the 7 passes +filter_passstart: output containing the index of the start and end of each + reduced image with filter bytes +padded_passstart output containing the index of the start and end of each + reduced image when without filter bytes but with padded scanlines +passstart: output containing the index of the start and end of each reduced + image without padding between scanlines, but still padding between the images +w, h: width and height of non-interlaced image +bpp: bits per pixel +"padded" is only relevant if bpp is less than 8 and a scanline or image does not + end at a full byte +*/ +static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8], + size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp) { + /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/ + unsigned i; + + /*calculate width and height in pixels of each pass*/ + for(i = 0; i != 7; ++i) { + passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i]; + passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i]; + if(passw[i] == 0) passh[i] = 0; + if(passh[i] == 0) passw[i] = 0; + } + + filter_passstart[0] = padded_passstart[0] = passstart[0] = 0; + for(i = 0; i != 7; ++i) { + /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/ + filter_passstart[i + 1] = filter_passstart[i] + + ((passw[i] && passh[i]) ? passh[i] * (1 + (passw[i] * bpp + 7) / 8) : 0); + /*bits padded if needed to fill full byte at end of each scanline*/ + padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7) / 8); + /*only padded at end of reduced image*/ + passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7) / 8; + } +} + +#ifdef LODEPNG_COMPILE_DECODER + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / PNG Decoder / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/*read the information from the header and store it in the LodePNGInfo. return value is error*/ +unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state, + const unsigned char* in, size_t insize) { + unsigned width, height; + LodePNGInfo* info = &state->info_png; + if(insize == 0 || in == 0) { + CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/ + } + if(insize < 33) { + CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/ + } + + /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/ + /* TODO: remove this. One should use a new LodePNGState for new sessions */ + lodepng_info_cleanup(info); + lodepng_info_init(info); + + if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71 + || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10) { + CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/ + } + if(lodepng_chunk_length(in + 8) != 13) { + CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/ + } + if(!lodepng_chunk_type_equals(in + 8, "IHDR")) { + CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/ + } + + /*read the values given in the header*/ + width = lodepng_read32bitInt(&in[16]); + height = lodepng_read32bitInt(&in[20]); + info->color.bitdepth = in[24]; + info->color.colortype = (LodePNGColorType)in[25]; + info->compression_method = in[26]; + info->filter_method = in[27]; + info->interlace_method = in[28]; + + if(width == 0 || height == 0) { + CERROR_RETURN_ERROR(state->error, 93); + } + + if(w) *w = width; + if(h) *h = height; + + if(!state->decoder.ignore_crc) { + unsigned CRC = lodepng_read32bitInt(&in[29]); + unsigned checksum = lodepng_crc32(&in[12], 17); + if(CRC != checksum) { + CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/ + } + } + + /*error: only compression method 0 is allowed in the specification*/ + if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32); + /*error: only filter method 0 is allowed in the specification*/ + if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33); + /*error: only interlace methods 0 and 1 exist in the specification*/ + if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34); + + state->error = checkColorValidity(info->color.colortype, info->color.bitdepth); + return state->error; +} + +static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon, + size_t bytewidth, unsigned char filterType, size_t length) { + /* + For PNG filter method 0 + unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte, + the filter works byte per byte (bytewidth = 1) + precon is the previous unfiltered scanline, recon the result, scanline the current one + the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead + recon and scanline MAY be the same memory address! precon must be disjoint. + */ + + size_t i; + switch(filterType) { + case 0: + for(i = 0; i != length; ++i) recon[i] = scanline[i]; + break; + case 1: + for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth]; + break; + case 2: + if(precon) { + for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i]; + } else { + for(i = 0; i != length; ++i) recon[i] = scanline[i]; + } + break; + case 3: + if(precon) { + for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1); + for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1); + } else { + for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1); + } + break; + case 4: + if(precon) { + for(i = 0; i != bytewidth; ++i) { + recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/ + } + for(i = bytewidth; i < length; ++i) { + recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth])); + } + } else { + for(i = 0; i != bytewidth; ++i) { + recon[i] = scanline[i]; + } + for(i = bytewidth; i < length; ++i) { + /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/ + recon[i] = (scanline[i] + recon[i - bytewidth]); + } + } + break; + default: return 36; /*error: unexisting filter type given*/ + } + return 0; +} + +static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) { + /* + For PNG filter method 0 + this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times) + out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline + w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel + in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes) + */ + + unsigned y; + unsigned char* prevline = 0; + + /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/ + size_t bytewidth = (bpp + 7) / 8; + size_t linebytes = (w * bpp + 7) / 8; + + for(y = 0; y < h; ++y) { + size_t outindex = linebytes * y; + size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ + unsigned char filterType = in[inindex]; + + CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes)); + + prevline = &out[outindex]; + } + + return 0; +} + +/* +in: Adam7 interlaced image, with no padding bits between scanlines, but between + reduced images so that each reduced image starts at a byte. +out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h +bpp: bits per pixel +out has the following size in bits: w * h * bpp. +in is possibly bigger due to padding bits between reduced images. +out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation +(because that's likely a little bit faster) +NOTE: comments about padding bits are only relevant if bpp < 8 +*/ +static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) { + unsigned passw[7], passh[7]; + size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned i; + + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + + if(bpp >= 8) { + for(i = 0; i != 7; ++i) { + unsigned x, y, b; + size_t bytewidth = bpp / 8; + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { + size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth; + size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth; + for(b = 0; b < bytewidth; ++b) { + out[pixeloutstart + b] = in[pixelinstart + b]; + } + } + } + } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ { + for(i = 0; i != 7; ++i) { + unsigned x, y, b; + unsigned ilinebits = bpp * passw[i]; + unsigned olinebits = bpp * w; + size_t obp, ibp; /*bit pointers (for out and in buffer)*/ + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { + ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp); + obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp; + for(b = 0; b < bpp; ++b) { + unsigned char bit = readBitFromReversedStream(&ibp, in); + /*note that this function assumes the out buffer is completely 0, use setBitOfReversedStream otherwise*/ + setBitOfReversedStream0(&obp, out, bit); + } + } + } + } +} + +static void removePaddingBits(unsigned char* out, const unsigned char* in, + size_t olinebits, size_t ilinebits, unsigned h) { + /* + After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need + to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers + for the Adam7 code, the color convert code and the output to the user. + in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must + have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits + also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7 + only useful if (ilinebits - olinebits) is a value in the range 1..7 + */ + unsigned y; + size_t diff = ilinebits - olinebits; + size_t ibp = 0, obp = 0; /*input and output bit pointers*/ + for(y = 0; y < h; ++y) { + size_t x; + for(x = 0; x < olinebits; ++x) { + unsigned char bit = readBitFromReversedStream(&ibp, in); + setBitOfReversedStream(&obp, out, bit); + } + ibp += diff; + } +} + +/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from +the IDAT chunks (with filter index bytes and possible padding bits) +return value is error*/ +static unsigned postProcessScanlines(unsigned char* out, unsigned char* in, + unsigned w, unsigned h, const LodePNGInfo* info_png) { + /* + This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype. + Steps: + *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8) + *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace + NOTE: the in buffer will be overwritten with intermediate data! + */ + unsigned bpp = lodepng_get_bpp(&info_png->color); + if(bpp == 0) return 31; /*error: invalid colortype*/ + + if(info_png->interlace_method == 0) { + if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8) { + CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp)); + removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h); + } + /*we can immediately filter into the out buffer, no other steps needed*/ + else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp)); + } else /*interlace_method is 1 (Adam7)*/ { + unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned i; + + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + + for(i = 0; i != 7; ++i) { + CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp)); + /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline, + move bytes instead of bits or move not at all*/ + if(bpp < 8) { + /*remove padding bits in scanlines; after this there still may be padding + bits between the different reduced images: each reduced image still starts nicely at a byte*/ + removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp, + ((passw[i] * bpp + 7) / 8) * 8, passh[i]); + } + } + + Adam7_deinterlace(out, in, w, h, bpp); + } + + return 0; +} + +static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) { + unsigned pos = 0, i; + if(color->palette) lodepng_free(color->palette); + color->palettesize = chunkLength / 3; + color->palette = (unsigned char*)lodepng_malloc(4 * color->palettesize); + if(!color->palette && color->palettesize) { + color->palettesize = 0; + return 83; /*alloc fail*/ + } + if(color->palettesize > 256) return 38; /*error: palette too big*/ + + for(i = 0; i != color->palettesize; ++i) { + color->palette[4 * i + 0] = data[pos++]; /*R*/ + color->palette[4 * i + 1] = data[pos++]; /*G*/ + color->palette[4 * i + 2] = data[pos++]; /*B*/ + color->palette[4 * i + 3] = 255; /*alpha*/ + } + + return 0; /* OK */ +} + +static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) { + unsigned i; + if(color->colortype == LCT_PALETTE) { + /*error: more alpha values given than there are palette entries*/ + if(chunkLength > color->palettesize) return 39; + + for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i]; + } else if(color->colortype == LCT_GREY) { + /*error: this chunk must be 2 bytes for grayscale image*/ + if(chunkLength != 2) return 30; + + color->key_defined = 1; + color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1]; + } else if(color->colortype == LCT_RGB) { + /*error: this chunk must be 6 bytes for RGB image*/ + if(chunkLength != 6) return 41; + + color->key_defined = 1; + color->key_r = 256u * data[0] + data[1]; + color->key_g = 256u * data[2] + data[3]; + color->key_b = 256u * data[4] + data[5]; + } + else return 42; /*error: tRNS chunk not allowed for other color models*/ + + return 0; /* OK */ +} + + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +/*background color chunk (bKGD)*/ +static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(info->color.colortype == LCT_PALETTE) { + /*error: this chunk must be 1 byte for indexed color image*/ + if(chunkLength != 1) return 43; + + /*error: invalid palette index, or maybe this chunk appeared before PLTE*/ + if(data[0] >= info->color.palettesize) return 103; + + info->background_defined = 1; + info->background_r = info->background_g = info->background_b = data[0]; + } else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) { + /*error: this chunk must be 2 bytes for grayscale image*/ + if(chunkLength != 2) return 44; + + /*the values are truncated to bitdepth in the PNG file*/ + info->background_defined = 1; + info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1]; + } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) { + /*error: this chunk must be 6 bytes for grayscale image*/ + if(chunkLength != 6) return 45; + + /*the values are truncated to bitdepth in the PNG file*/ + info->background_defined = 1; + info->background_r = 256u * data[0] + data[1]; + info->background_g = 256u * data[2] + data[3]; + info->background_b = 256u * data[4] + data[5]; + } + + return 0; /* OK */ +} + +/*text chunk (tEXt)*/ +static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + unsigned error = 0; + char *key = 0, *str = 0; + unsigned i; + + while(!error) /*not really a while loop, only used to break on error*/ { + unsigned length, string2_begin; + + length = 0; + while(length < chunkLength && data[length] != 0) ++length; + /*even though it's not allowed by the standard, no error is thrown if + there's no null termination char, if the text is empty*/ + if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ + + key = (char*)lodepng_malloc(length + 1); + if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ + + key[length] = 0; + for(i = 0; i != length; ++i) key[i] = (char)data[i]; + + string2_begin = length + 1; /*skip keyword null terminator*/ + + length = (unsigned)(chunkLength < string2_begin ? 0 : chunkLength - string2_begin); + str = (char*)lodepng_malloc(length + 1); + if(!str) CERROR_BREAK(error, 83); /*alloc fail*/ + + str[length] = 0; + for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i]; + + error = lodepng_add_text(info, key, str); + + break; + } + + lodepng_free(key); + lodepng_free(str); + + return error; +} + +/*compressed text chunk (zTXt)*/ +static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings, + const unsigned char* data, size_t chunkLength) { + unsigned error = 0; + unsigned i; + + unsigned length, string2_begin; + char *key = 0; + ucvector decoded; + + ucvector_init(&decoded); + + while(!error) /*not really a while loop, only used to break on error*/ { + for(length = 0; length < chunkLength && data[length] != 0; ++length) ; + if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/ + if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ + + key = (char*)lodepng_malloc(length + 1); + if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ + + key[length] = 0; + for(i = 0; i != length; ++i) key[i] = (char)data[i]; + + if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/ + + string2_begin = length + 2; + if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/ + + length = (unsigned)chunkLength - string2_begin; + /*will fail if zlib error, e.g. if length is too small*/ + error = zlib_decompress(&decoded.data, &decoded.size, + (unsigned char*)(&data[string2_begin]), + length, zlibsettings); + if(error) break; + ucvector_push_back(&decoded, 0); + + error = lodepng_add_text(info, key, (char*)decoded.data); + + break; + } + + lodepng_free(key); + ucvector_cleanup(&decoded); + + return error; +} + +/*international text chunk (iTXt)*/ +static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings, + const unsigned char* data, size_t chunkLength) { + unsigned error = 0; + unsigned i; + + unsigned length, begin, compressed; + char *key = 0, *langtag = 0, *transkey = 0; + ucvector decoded; + ucvector_init(&decoded); /* TODO: only use in case of compressed text */ + + while(!error) /*not really a while loop, only used to break on error*/ { + /*Quick check if the chunk length isn't too small. Even without check + it'd still fail with other error checks below if it's too short. This just gives a different error code.*/ + if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/ + + /*read the key*/ + for(length = 0; length < chunkLength && data[length] != 0; ++length) ; + if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/ + if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ + + key = (char*)lodepng_malloc(length + 1); + if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ + + key[length] = 0; + for(i = 0; i != length; ++i) key[i] = (char)data[i]; + + /*read the compression method*/ + compressed = data[length + 1]; + if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/ + + /*even though it's not allowed by the standard, no error is thrown if + there's no null termination char, if the text is empty for the next 3 texts*/ + + /*read the langtag*/ + begin = length + 3; + length = 0; + for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length; + + langtag = (char*)lodepng_malloc(length + 1); + if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/ + + langtag[length] = 0; + for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i]; + + /*read the transkey*/ + begin += length + 1; + length = 0; + for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length; + + transkey = (char*)lodepng_malloc(length + 1); + if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/ + + transkey[length] = 0; + for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i]; + + /*read the actual text*/ + begin += length + 1; + + length = (unsigned)chunkLength < begin ? 0 : (unsigned)chunkLength - begin; + + if(compressed) { + /*will fail if zlib error, e.g. if length is too small*/ + error = zlib_decompress(&decoded.data, &decoded.size, + (unsigned char*)(&data[begin]), + length, zlibsettings); + if(error) break; + if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size; + ucvector_push_back(&decoded, 0); + } else { + if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/); + + decoded.data[length] = 0; + for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i]; + } + + error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data); + + break; + } + + lodepng_free(key); + lodepng_free(langtag); + lodepng_free(transkey); + ucvector_cleanup(&decoded); + + return error; +} + +static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 7) return 73; /*invalid tIME chunk size*/ + + info->time_defined = 1; + info->time.year = 256u * data[0] + data[1]; + info->time.month = data[2]; + info->time.day = data[3]; + info->time.hour = data[4]; + info->time.minute = data[5]; + info->time.second = data[6]; + + return 0; /* OK */ +} + +static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/ + + info->phys_defined = 1; + info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3]; + info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7]; + info->phys_unit = data[8]; + + return 0; /* OK */ +} + +static unsigned readChunk_gAMA(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 4) return 96; /*invalid gAMA chunk size*/ + + info->gama_defined = 1; + info->gama_gamma = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3]; + + return 0; /* OK */ +} + +static unsigned readChunk_cHRM(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 32) return 97; /*invalid cHRM chunk size*/ + + info->chrm_defined = 1; + info->chrm_white_x = 16777216u * data[ 0] + 65536u * data[ 1] + 256u * data[ 2] + data[ 3]; + info->chrm_white_y = 16777216u * data[ 4] + 65536u * data[ 5] + 256u * data[ 6] + data[ 7]; + info->chrm_red_x = 16777216u * data[ 8] + 65536u * data[ 9] + 256u * data[10] + data[11]; + info->chrm_red_y = 16777216u * data[12] + 65536u * data[13] + 256u * data[14] + data[15]; + info->chrm_green_x = 16777216u * data[16] + 65536u * data[17] + 256u * data[18] + data[19]; + info->chrm_green_y = 16777216u * data[20] + 65536u * data[21] + 256u * data[22] + data[23]; + info->chrm_blue_x = 16777216u * data[24] + 65536u * data[25] + 256u * data[26] + data[27]; + info->chrm_blue_y = 16777216u * data[28] + 65536u * data[29] + 256u * data[30] + data[31]; + + return 0; /* OK */ +} + +static unsigned readChunk_sRGB(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 1) return 98; /*invalid sRGB chunk size (this one is never ignored)*/ + + info->srgb_defined = 1; + info->srgb_intent = data[0]; + + return 0; /* OK */ +} + +static unsigned readChunk_iCCP(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings, + const unsigned char* data, size_t chunkLength) { + unsigned error = 0; + unsigned i; + + unsigned length, string2_begin; + ucvector decoded; + + info->iccp_defined = 1; + if(info->iccp_name) lodepng_clear_icc(info); + + for(length = 0; length < chunkLength && data[length] != 0; ++length) ; + if(length + 2 >= chunkLength) return 75; /*no null termination, corrupt?*/ + if(length < 1 || length > 79) return 89; /*keyword too short or long*/ + + info->iccp_name = (char*)lodepng_malloc(length + 1); + if(!info->iccp_name) return 83; /*alloc fail*/ + + info->iccp_name[length] = 0; + for(i = 0; i != length; ++i) info->iccp_name[i] = (char)data[i]; + + if(data[length + 1] != 0) return 72; /*the 0 byte indicating compression must be 0*/ + + string2_begin = length + 2; + if(string2_begin > chunkLength) return 75; /*no null termination, corrupt?*/ + + length = (unsigned)chunkLength - string2_begin; + ucvector_init(&decoded); + error = zlib_decompress(&decoded.data, &decoded.size, + (unsigned char*)(&data[string2_begin]), + length, zlibsettings); + if(!error) { + info->iccp_profile_size = (unsigned int)decoded.size; + info->iccp_profile = (unsigned char*)lodepng_malloc(decoded.size); + if(info->iccp_profile) { + memcpy(info->iccp_profile, decoded.data, decoded.size); + } else { + error = 83; /* alloc fail */ + } + } + ucvector_cleanup(&decoded); + return error; +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos, + const unsigned char* in, size_t insize) { + const unsigned char* chunk = in + pos; + unsigned chunkLength; + const unsigned char* data; + unsigned unhandled = 0; + unsigned error = 0; + + if (pos + 4 > insize) return 30; + chunkLength = lodepng_chunk_length(chunk); + if(chunkLength > 2147483647) return 63; + data = lodepng_chunk_data_const(chunk); + if(data + chunkLength + 4 > in + insize) return 30; + + if(lodepng_chunk_type_equals(chunk, "PLTE")) { + error = readChunk_PLTE(&state->info_png.color, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "tRNS")) { + error = readChunk_tRNS(&state->info_png.color, data, chunkLength); +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + } else if(lodepng_chunk_type_equals(chunk, "bKGD")) { + error = readChunk_bKGD(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "tEXt")) { + error = readChunk_tEXt(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "zTXt")) { + error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "iTXt")) { + error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "tIME")) { + error = readChunk_tIME(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "pHYs")) { + error = readChunk_pHYs(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "gAMA")) { + error = readChunk_gAMA(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "cHRM")) { + error = readChunk_cHRM(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "sRGB")) { + error = readChunk_sRGB(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "iCCP")) { + error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } else { + /* unhandled chunk is ok (is not an error) */ + unhandled = 1; + } + + if(!error && !unhandled && !state->decoder.ignore_crc) { + if(lodepng_chunk_check_crc(chunk)) return 57; /*invalid CRC*/ + } + + return error; +} + +/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/ +static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, + LodePNGState* state, + const unsigned char* in, size_t insize) { + unsigned char IEND = 0; + const unsigned char* chunk; + size_t i; + ucvector idat; /*the data from idat chunks*/ + ucvector scanlines; + size_t predict; + size_t outsize = 0; + + /*for unknown chunk order*/ + unsigned unknown = 0; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/ +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + + + /* safe output values in case error happens */ + *out = 0; + *w = *h = 0; + + state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/ + if(state->error) return; + + if(lodepng_pixel_overflow(*w, *h, &state->info_png.color, &state->info_raw)) { + CERROR_RETURN(state->error, 92); /*overflow possible due to amount of pixels*/ + } + + ucvector_init(&idat); + chunk = &in[33]; /*first byte of the first chunk after the header*/ + + /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk. + IDAT data is put at the start of the in buffer*/ + while(!IEND && !state->error) { + unsigned chunkLength; + const unsigned char* data; /*the data in the chunk*/ + + /*error: size of the in buffer too small to contain next chunk*/ + if((size_t)((chunk - in) + 12) > insize || chunk < in) { + if(state->decoder.ignore_end) break; /*other errors may still happen though*/ + CERROR_BREAK(state->error, 30); + } + + /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/ + chunkLength = lodepng_chunk_length(chunk); + /*error: chunk length larger than the max PNG chunk size*/ + if(chunkLength > 2147483647) { + if(state->decoder.ignore_end) break; /*other errors may still happen though*/ + CERROR_BREAK(state->error, 63); + } + + if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in) { + CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/ + } + + data = lodepng_chunk_data_const(chunk); + + unknown = 0; + + /*IDAT chunk, containing compressed image data*/ + if(lodepng_chunk_type_equals(chunk, "IDAT")) { + size_t oldsize = idat.size; + size_t newsize; + if(lodepng_addofl(oldsize, chunkLength, &newsize)) CERROR_BREAK(state->error, 95); + if(!ucvector_resize(&idat, newsize)) CERROR_BREAK(state->error, 83 /*alloc fail*/); + for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i]; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + critical_pos = 3; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } else if(lodepng_chunk_type_equals(chunk, "IEND")) { + /*IEND chunk*/ + IEND = 1; + } else if(lodepng_chunk_type_equals(chunk, "PLTE")) { + /*palette chunk (PLTE)*/ + state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength); + if(state->error) break; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + critical_pos = 2; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } else if(lodepng_chunk_type_equals(chunk, "tRNS")) { + /*palette transparency chunk (tRNS). Even though this one is an ancillary chunk , it is still compiled + in without 'LODEPNG_COMPILE_ANCILLARY_CHUNKS' because it contains essential color information that + affects the alpha channel of pixels. */ + state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength); + if(state->error) break; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*background color chunk (bKGD)*/ + } else if(lodepng_chunk_type_equals(chunk, "bKGD")) { + state->error = readChunk_bKGD(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "tEXt")) { + /*text chunk (tEXt)*/ + if(state->decoder.read_text_chunks) { + state->error = readChunk_tEXt(&state->info_png, data, chunkLength); + if(state->error) break; + } + } else if(lodepng_chunk_type_equals(chunk, "zTXt")) { + /*compressed text chunk (zTXt)*/ + if(state->decoder.read_text_chunks) { + state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); + if(state->error) break; + } + } else if(lodepng_chunk_type_equals(chunk, "iTXt")) { + /*international text chunk (iTXt)*/ + if(state->decoder.read_text_chunks) { + state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); + if(state->error) break; + } + } else if(lodepng_chunk_type_equals(chunk, "tIME")) { + state->error = readChunk_tIME(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "pHYs")) { + state->error = readChunk_pHYs(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "gAMA")) { + state->error = readChunk_gAMA(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "cHRM")) { + state->error = readChunk_cHRM(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "sRGB")) { + state->error = readChunk_sRGB(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "iCCP")) { + state->error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); + if(state->error) break; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } else /*it's not an implemented chunk type, so ignore it: skip over the data*/ { + /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/ + if(!state->decoder.ignore_critical && !lodepng_chunk_ancillary(chunk)) { + CERROR_BREAK(state->error, 69); + } + + unknown = 1; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + if(state->decoder.remember_unknown_chunks) { + state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1], + &state->info_png.unknown_chunks_size[critical_pos - 1], chunk); + if(state->error) break; + } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } + + if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/ { + if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/ + } + + if(!IEND) chunk = lodepng_chunk_next_const(chunk); + } + + ucvector_init(&scanlines); + /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation. + If the decompressed size does not match the prediction, the image must be corrupt.*/ + if(state->info_png.interlace_method == 0) { + predict = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color); + } else { + /*Adam-7 interlaced: predicted size is the sum of the 7 sub-images sizes*/ + const LodePNGColorMode* color = &state->info_png.color; + predict = 0; + predict += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color); + if(*w > 4) predict += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color); + predict += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color); + if(*w > 2) predict += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color); + predict += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color); + if(*w > 1) predict += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color); + predict += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color); + } + if(!state->error && !ucvector_reserve(&scanlines, predict)) state->error = 83; /*alloc fail*/ + if(!state->error) { + state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data, + idat.size, &state->decoder.zlibsettings); + if(!state->error && scanlines.size != predict) state->error = 91; /*decompressed size doesn't match prediction*/ + } + ucvector_cleanup(&idat); + + if(!state->error) { + outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color); + *out = (unsigned char*)lodepng_malloc(outsize); + if(!*out) state->error = 83; /*alloc fail*/ + } + if(!state->error) { + for(i = 0; i < outsize; i++) (*out)[i] = 0; + state->error = postProcessScanlines(*out, scanlines.data, *w, *h, &state->info_png); + } + ucvector_cleanup(&scanlines); +} + +unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h, + LodePNGState* state, + const unsigned char* in, size_t insize) { + *out = 0; + decodeGeneric(out, w, h, state, in, insize); + if(state->error) return state->error; + if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color)) { + /*same color type, no copying or converting of data needed*/ + /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype + the raw image has to the end user*/ + if(!state->decoder.color_convert) { + state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color); + if(state->error) return state->error; + } + } else { + /*color conversion needed; sort of copy of the data*/ + unsigned char* data = *out; + size_t outsize; + + /*TODO: check if this works according to the statement in the documentation: "The converter can convert + from grayscale input color type, to 8-bit grayscale or grayscale with alpha"*/ + if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA) + && !(state->info_raw.bitdepth == 8)) { + return 56; /*unsupported color mode conversion*/ + } + + outsize = lodepng_get_raw_size(*w, *h, &state->info_raw); + *out = (unsigned char*)lodepng_malloc(outsize); + if(!(*out)) { + state->error = 83; /*alloc fail*/ + } + else state->error = lodepng_convert(*out, data, &state->info_raw, + &state->info_png.color, *w, *h); + lodepng_free(data); + } + return state->error; +} + +unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, + size_t insize, LodePNGColorType colortype, unsigned bitdepth) { + unsigned error; + LodePNGState state; + lodepng_state_init(&state); + state.info_raw.colortype = colortype; + state.info_raw.bitdepth = bitdepth; + error = lodepng_decode(out, w, h, &state, in, insize); + lodepng_state_cleanup(&state); + return error; +} + +unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) { + return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8); +} + +unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) { + return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8); +} + +#ifdef LODEPNG_COMPILE_DISK +unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename, + LodePNGColorType colortype, unsigned bitdepth) { + unsigned char* buffer = 0; + size_t buffersize; + unsigned error; + /* safe output values in case error happens */ + *out = 0; + *w = *h = 0; + error = lodepng_load_file(&buffer, &buffersize, filename); + if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth); + lodepng_free(buffer); + return error; +} + +unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) { + return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8); +} + +unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) { + return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8); +} +#endif /*LODEPNG_COMPILE_DISK*/ + +void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings) { + settings->color_convert = 1; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + settings->read_text_chunks = 1; + settings->remember_unknown_chunks = 0; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + settings->ignore_crc = 0; + settings->ignore_critical = 0; + settings->ignore_end = 0; + lodepng_decompress_settings_init(&settings->zlibsettings); +} + +#endif /*LODEPNG_COMPILE_DECODER*/ + +#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) + +void lodepng_state_init(LodePNGState* state) { +#ifdef LODEPNG_COMPILE_DECODER + lodepng_decoder_settings_init(&state->decoder); +#endif /*LODEPNG_COMPILE_DECODER*/ +#ifdef LODEPNG_COMPILE_ENCODER + lodepng_encoder_settings_init(&state->encoder); +#endif /*LODEPNG_COMPILE_ENCODER*/ + lodepng_color_mode_init(&state->info_raw); + lodepng_info_init(&state->info_png); + state->error = 1; +} + +void lodepng_state_cleanup(LodePNGState* state) { + lodepng_color_mode_cleanup(&state->info_raw); + lodepng_info_cleanup(&state->info_png); +} + +void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source) { + lodepng_state_cleanup(dest); + *dest = *source; + lodepng_color_mode_init(&dest->info_raw); + lodepng_info_init(&dest->info_png); + dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return; + dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return; +} + +#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */ + +#ifdef LODEPNG_COMPILE_ENCODER + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / PNG Encoder / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/*chunkName must be string of 4 characters*/ +static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length) { + CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data)); + out->allocsize = out->size; /*fix the allocsize again*/ + return 0; +} + +static void writeSignature(ucvector* out) { + /*8 bytes PNG signature, aka the magic bytes*/ + ucvector_push_back(out, 137); + ucvector_push_back(out, 80); + ucvector_push_back(out, 78); + ucvector_push_back(out, 71); + ucvector_push_back(out, 13); + ucvector_push_back(out, 10); + ucvector_push_back(out, 26); + ucvector_push_back(out, 10); +} + +static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method) { + unsigned error = 0; + ucvector header; + ucvector_init(&header); + + lodepng_add32bitInt(&header, w); /*width*/ + lodepng_add32bitInt(&header, h); /*height*/ + ucvector_push_back(&header, (unsigned char)bitdepth); /*bit depth*/ + ucvector_push_back(&header, (unsigned char)colortype); /*color type*/ + ucvector_push_back(&header, 0); /*compression method*/ + ucvector_push_back(&header, 0); /*filter method*/ + ucvector_push_back(&header, interlace_method); /*interlace method*/ + + error = addChunk(out, "IHDR", header.data, header.size); + ucvector_cleanup(&header); + + return error; +} + +static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info) { + unsigned error = 0; + size_t i; + ucvector PLTE; + ucvector_init(&PLTE); + for(i = 0; i != info->palettesize * 4; ++i) { + /*add all channels except alpha channel*/ + if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]); + } + error = addChunk(out, "PLTE", PLTE.data, PLTE.size); + ucvector_cleanup(&PLTE); + + return error; +} + +static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info) { + unsigned error = 0; + size_t i; + ucvector tRNS; + ucvector_init(&tRNS); + if(info->colortype == LCT_PALETTE) { + size_t amount = info->palettesize; + /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/ + for(i = info->palettesize; i != 0; --i) { + if(info->palette[4 * (i - 1) + 3] == 255) --amount; + else break; + } + /*add only alpha channel*/ + for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]); + } else if(info->colortype == LCT_GREY) { + if(info->key_defined) { + ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255)); + } + } else if(info->colortype == LCT_RGB) { + if(info->key_defined) { + ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255)); + } + } + + error = addChunk(out, "tRNS", tRNS.data, tRNS.size); + ucvector_cleanup(&tRNS); + + return error; +} + +static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize, + LodePNGCompressSettings* zlibsettings) { + ucvector zlibdata; + unsigned error = 0; + + /*compress with the Zlib compressor*/ + ucvector_init(&zlibdata); + error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings); + if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size); + ucvector_cleanup(&zlibdata); + + return error; +} + +static unsigned addChunk_IEND(ucvector* out) { + unsigned error = 0; + error = addChunk(out, "IEND", 0, 0); + return error; +} + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + +static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring) { + unsigned error = 0; + size_t i; + ucvector text; + ucvector_init(&text); + for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]); + if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ + ucvector_push_back(&text, 0); /*0 termination char*/ + for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]); + error = addChunk(out, "tEXt", text.data, text.size); + ucvector_cleanup(&text); + + return error; +} + +static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring, + LodePNGCompressSettings* zlibsettings) { + unsigned error = 0; + ucvector data, compressed; + size_t i, textsize = strlen(textstring); + + ucvector_init(&data); + ucvector_init(&compressed); + for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]); + if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ + ucvector_push_back(&data, 0); /*0 termination char*/ + ucvector_push_back(&data, 0); /*compression method: 0*/ + + error = zlib_compress(&compressed.data, &compressed.size, + (unsigned char*)textstring, textsize, zlibsettings); + if(!error) { + for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]); + error = addChunk(out, "zTXt", data.data, data.size); + } + + ucvector_cleanup(&compressed); + ucvector_cleanup(&data); + return error; +} + +static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag, + const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings) { + unsigned error = 0; + ucvector data; + size_t i, textsize = strlen(textstring); + + ucvector_init(&data); + + for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]); + if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ + ucvector_push_back(&data, 0); /*null termination char*/ + ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/ + ucvector_push_back(&data, 0); /*compression method*/ + for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]); + ucvector_push_back(&data, 0); /*null termination char*/ + for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]); + ucvector_push_back(&data, 0); /*null termination char*/ + + if(compressed) { + ucvector compressed_data; + ucvector_init(&compressed_data); + error = zlib_compress(&compressed_data.data, &compressed_data.size, + (unsigned char*)textstring, textsize, zlibsettings); + if(!error) { + for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]); + } + ucvector_cleanup(&compressed_data); + } else /*not compressed*/ { + for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]); + } + + if(!error) error = addChunk(out, "iTXt", data.data, data.size); + ucvector_cleanup(&data); + return error; +} + +static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) { + unsigned error = 0; + ucvector bKGD; + ucvector_init(&bKGD); + if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) { + ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); + } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) { + ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_g >> 8)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_g & 255)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_b >> 8)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_b & 255)); + } else if(info->color.colortype == LCT_PALETTE) { + ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); /*palette index*/ + } + + error = addChunk(out, "bKGD", bKGD.data, bKGD.size); + ucvector_cleanup(&bKGD); + + return error; +} + +static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time) { + unsigned error = 0; + unsigned char* data = (unsigned char*)lodepng_malloc(7); + if(!data) return 83; /*alloc fail*/ + data[0] = (unsigned char)(time->year >> 8); + data[1] = (unsigned char)(time->year & 255); + data[2] = (unsigned char)time->month; + data[3] = (unsigned char)time->day; + data[4] = (unsigned char)time->hour; + data[5] = (unsigned char)time->minute; + data[6] = (unsigned char)time->second; + error = addChunk(out, "tIME", data, 7); + lodepng_free(data); + return error; +} + +static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info) { + unsigned error = 0; + ucvector data; + ucvector_init(&data); + + lodepng_add32bitInt(&data, info->phys_x); + lodepng_add32bitInt(&data, info->phys_y); + ucvector_push_back(&data, info->phys_unit); + + error = addChunk(out, "pHYs", data.data, data.size); + ucvector_cleanup(&data); + + return error; +} + +static unsigned addChunk_gAMA(ucvector* out, const LodePNGInfo* info) { + unsigned error = 0; + ucvector data; + ucvector_init(&data); + + lodepng_add32bitInt(&data, info->gama_gamma); + + error = addChunk(out, "gAMA", data.data, data.size); + ucvector_cleanup(&data); + + return error; +} + +static unsigned addChunk_cHRM(ucvector* out, const LodePNGInfo* info) { + unsigned error = 0; + ucvector data; + ucvector_init(&data); + + lodepng_add32bitInt(&data, info->chrm_white_x); + lodepng_add32bitInt(&data, info->chrm_white_y); + lodepng_add32bitInt(&data, info->chrm_red_x); + lodepng_add32bitInt(&data, info->chrm_red_y); + lodepng_add32bitInt(&data, info->chrm_green_x); + lodepng_add32bitInt(&data, info->chrm_green_y); + lodepng_add32bitInt(&data, info->chrm_blue_x); + lodepng_add32bitInt(&data, info->chrm_blue_y); + + error = addChunk(out, "cHRM", data.data, data.size); + ucvector_cleanup(&data); + + return error; +} + +static unsigned addChunk_sRGB(ucvector* out, const LodePNGInfo* info) { + unsigned char data = info->srgb_intent; + return addChunk(out, "sRGB", &data, 1); +} + +static unsigned addChunk_iCCP(ucvector* out, const LodePNGInfo* info, LodePNGCompressSettings* zlibsettings) { + unsigned error = 0; + ucvector data, compressed; + size_t i; + + ucvector_init(&data); + ucvector_init(&compressed); + for(i = 0; info->iccp_name[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)info->iccp_name[i]); + if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ + ucvector_push_back(&data, 0); /*0 termination char*/ + ucvector_push_back(&data, 0); /*compression method: 0*/ + + error = zlib_compress(&compressed.data, &compressed.size, + info->iccp_profile, info->iccp_profile_size, zlibsettings); + if(!error) { + for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]); + error = addChunk(out, "iCCP", data.data, data.size); + } + + ucvector_cleanup(&compressed); + ucvector_cleanup(&data); + return error; +} + +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline, + size_t length, size_t bytewidth, unsigned char filterType) { + size_t i; + switch(filterType) { + case 0: /*None*/ + for(i = 0; i != length; ++i) out[i] = scanline[i]; + break; + case 1: /*Sub*/ + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth]; + break; + case 2: /*Up*/ + if(prevline) { + for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i]; + } else { + for(i = 0; i != length; ++i) out[i] = scanline[i]; + } + break; + case 3: /*Average*/ + if(prevline) { + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1); + for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1); + } else { + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1); + } + break; + case 4: /*Paeth*/ + if(prevline) { + /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/ + for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]); + for(i = bytewidth; i < length; ++i) { + out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth])); + } + } else { + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; + /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/ + for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]); + } + break; + default: return; /*unexisting filter type given*/ + } +} + +/* log2 approximation. A slight bit faster than std::log. */ +static float flog2(float f) { + float result = 0; + while(f > 32) { result += 4; f /= 16; } + while(f > 2) { ++result; f /= 2; } + return result + 1.442695f * (f * f * f / 3 - 3 * f * f / 2 + 3 * f - 1.83333f); +} + +static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, + const LodePNGColorMode* info, const LodePNGEncoderSettings* settings) { + /* + For PNG filter method 0 + out must be a buffer with as size: h + (w * h * bpp + 7) / 8, because there are + the scanlines with 1 extra byte per scanline + */ + + unsigned bpp = lodepng_get_bpp(info); + /*the width of a scanline in bytes, not including the filter type*/ + size_t linebytes = (w * bpp + 7) / 8; + /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/ + size_t bytewidth = (bpp + 7) / 8; + const unsigned char* prevline = 0; + unsigned x, y; + unsigned error = 0; + LodePNGFilterStrategy strategy = settings->filter_strategy; + + /* + There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard: + * If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e. + use fixed filtering, with the filter None). + * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is + not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply + all five filters and select the filter that produces the smallest sum of absolute values per row. + This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true. + + If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed, + but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum + heuristic is used. + */ + if(settings->filter_palette_zero && + (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO; + + if(bpp == 0) return 31; /*error: invalid color type*/ + + if(strategy == LFS_ZERO) { + for(y = 0; y != h; ++y) { + size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ + size_t inindex = linebytes * y; + out[outindex] = 0; /*filter type byte*/ + filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, 0); + prevline = &in[inindex]; + } + } else if(strategy == LFS_MINSUM) { + /*adaptive filtering*/ + size_t sum[5]; + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ + size_t smallest = 0; + unsigned char type, bestType = 0; + + for(type = 0; type != 5; ++type) { + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) return 83; /*alloc fail*/ + } + + if(!error) { + for(y = 0; y != h; ++y) { + /*try the 5 filter types*/ + for(type = 0; type != 5; ++type) { + filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + + /*calculate the sum of the result*/ + sum[type] = 0; + if(type == 0) { + for(x = 0; x != linebytes; ++x) sum[type] += (unsigned char)(attempt[type][x]); + } else { + for(x = 0; x != linebytes; ++x) { + /*For differences, each byte should be treated as signed, values above 127 are negative + (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there. + This means filtertype 0 is almost never chosen, but that is justified.*/ + unsigned char s = attempt[type][x]; + sum[type] += s < 128 ? s : (255U - s); + } + } + + /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ + if(type == 0 || sum[type] < smallest) { + bestType = type; + smallest = sum[type]; + } + } + + prevline = &in[y * linebytes]; + + /*now fill the out values*/ + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + } + } + + for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); + } else if(strategy == LFS_ENTROPY) { + float sum[5]; + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ + float smallest = 0; + unsigned type, bestType = 0; + unsigned count[256]; + + for(type = 0; type != 5; ++type) { + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) return 83; /*alloc fail*/ + } + + for(y = 0; y != h; ++y) { + /*try the 5 filter types*/ + for(type = 0; type != 5; ++type) { + filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + for(x = 0; x != 256; ++x) count[x] = 0; + for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]]; + ++count[type]; /*the filter type itself is part of the scanline*/ + sum[type] = 0; + for(x = 0; x != 256; ++x) { + float p = count[x] / (float)(linebytes + 1); + sum[type] += count[x] == 0 ? 0 : flog2(1 / p) * p; + } + /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ + if(type == 0 || sum[type] < smallest) { + bestType = type; + smallest = sum[type]; + } + } + + prevline = &in[y * linebytes]; + + /*now fill the out values*/ + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + } + + for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); + } else if(strategy == LFS_PREDEFINED) { + for(y = 0; y != h; ++y) { + size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ + size_t inindex = linebytes * y; + unsigned char type = settings->predefined_filters[y]; + out[outindex] = type; /*filter type byte*/ + filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type); + prevline = &in[inindex]; + } + } else if(strategy == LFS_BRUTE_FORCE) { + /*brute force filter chooser. + deflate the scanline after every filter attempt to see which one deflates best. + This is very slow and gives only slightly smaller, sometimes even larger, result*/ + size_t size[5]; + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ + size_t smallest = 0; + unsigned type = 0, bestType = 0; + unsigned char* dummy; + LodePNGCompressSettings zlibsettings = settings->zlibsettings; + /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose, + to simulate the true case where the tree is the same for the whole image. Sometimes it gives + better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare + cases better compression. It does make this a bit less slow, so it's worth doing this.*/ + zlibsettings.btype = 1; + /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG + images only, so disable it*/ + zlibsettings.custom_zlib = 0; + zlibsettings.custom_deflate = 0; + for(type = 0; type != 5; ++type) { + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) return 83; /*alloc fail*/ + } + for(y = 0; y != h; ++y) /*try the 5 filter types*/ { + for(type = 0; type != 5; ++type) { + unsigned testsize = (unsigned)linebytes; + /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/ + + filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + size[type] = 0; + dummy = 0; + zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings); + lodepng_free(dummy); + /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ + if(type == 0 || size[type] < smallest) { + bestType = type; + smallest = size[type]; + } + } + prevline = &in[y * linebytes]; + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + } + for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); + } + else return 88; /* unknown filter strategy */ + + return error; +} + +static void addPaddingBits(unsigned char* out, const unsigned char* in, + size_t olinebits, size_t ilinebits, unsigned h) { + /*The opposite of the removePaddingBits function + olinebits must be >= ilinebits*/ + unsigned y; + size_t diff = olinebits - ilinebits; + size_t obp = 0, ibp = 0; /*bit pointers*/ + for(y = 0; y != h; ++y) { + size_t x; + for(x = 0; x < ilinebits; ++x) { + unsigned char bit = readBitFromReversedStream(&ibp, in); + setBitOfReversedStream(&obp, out, bit); + } + /*obp += diff; --> no, fill in some value in the padding bits too, to avoid + "Use of uninitialised value of size ###" warning from valgrind*/ + for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0); + } +} + +/* +in: non-interlaced image with size w*h +out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with + no padding bits between scanlines, but between reduced images so that each + reduced image starts at a byte. +bpp: bits per pixel +there are no padding bits, not between scanlines, not between reduced images +in has the following size in bits: w * h * bpp. +out is possibly bigger due to padding bits between reduced images +NOTE: comments about padding bits are only relevant if bpp < 8 +*/ +static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) { + unsigned passw[7], passh[7]; + size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned i; + + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + + if(bpp >= 8) { + for(i = 0; i != 7; ++i) { + unsigned x, y, b; + size_t bytewidth = bpp / 8; + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { + size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth; + size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth; + for(b = 0; b < bytewidth; ++b) { + out[pixeloutstart + b] = in[pixelinstart + b]; + } + } + } + } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ { + for(i = 0; i != 7; ++i) { + unsigned x, y, b; + unsigned ilinebits = bpp * passw[i]; + unsigned olinebits = bpp * w; + size_t obp, ibp; /*bit pointers (for out and in buffer)*/ + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { + ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp; + obp = (8 * passstart[i]) + (y * ilinebits + x * bpp); + for(b = 0; b < bpp; ++b) { + unsigned char bit = readBitFromReversedStream(&ibp, in); + setBitOfReversedStream(&obp, out, bit); + } + } + } + } +} + +/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image. +return value is error**/ +static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in, + unsigned w, unsigned h, + const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings) { + /* + This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps: + *) if no Adam7: 1) add padding bits (= posible extra bits per scanline if bpp < 8) 2) filter + *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter + */ + unsigned bpp = lodepng_get_bpp(&info_png->color); + unsigned error = 0; + + if(info_png->interlace_method == 0) { + *outsize = h + (h * ((w * bpp + 7) / 8)); /*image size plus an extra byte per scanline + possible padding bits*/ + *out = (unsigned char*)lodepng_malloc(*outsize); + if(!(*out) && (*outsize)) error = 83; /*alloc fail*/ + + if(!error) { + /*non multiple of 8 bits per scanline, padding bits needed per scanline*/ + if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8) { + unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7) / 8)); + if(!padded) error = 83; /*alloc fail*/ + if(!error) { + addPaddingBits(padded, in, ((w * bpp + 7) / 8) * 8, w * bpp, h); + error = filter(*out, padded, w, h, &info_png->color, settings); + } + lodepng_free(padded); + } else { + /*we can immediately filter into the out buffer, no other steps needed*/ + error = filter(*out, in, w, h, &info_png->color, settings); + } + } + } else /*interlace_method is 1 (Adam7)*/ { + unsigned passw[7], passh[7]; + size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned char* adam7; + + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + + *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/ + *out = (unsigned char*)lodepng_malloc(*outsize); + if(!(*out)) error = 83; /*alloc fail*/ + + adam7 = (unsigned char*)lodepng_malloc(passstart[7]); + if(!adam7 && passstart[7]) error = 83; /*alloc fail*/ + + if(!error) { + unsigned i; + + Adam7_interlace(adam7, in, w, h, bpp); + for(i = 0; i != 7; ++i) { + if(bpp < 8) { + unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]); + if(!padded) ERROR_BREAK(83); /*alloc fail*/ + addPaddingBits(padded, &adam7[passstart[i]], + ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]); + error = filter(&(*out)[filter_passstart[i]], padded, + passw[i], passh[i], &info_png->color, settings); + lodepng_free(padded); + } else { + error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]], + passw[i], passh[i], &info_png->color, settings); + } + + if(error) break; + } + } + + lodepng_free(adam7); + } + + return error; +} + +/* +palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA... +returns 0 if the palette is opaque, +returns 1 if the palette has a single color with alpha 0 ==> color key +returns 2 if the palette is semi-translucent. +*/ +static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize) { + size_t i; + unsigned key = 0; + unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/ + for(i = 0; i != palettesize; ++i) { + if(!key && palette[4 * i + 3] == 0) { + r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2]; + key = 1; + i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/ + } + else if(palette[4 * i + 3] != 255) return 2; + /*when key, no opaque RGB may have key's RGB*/ + else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2; + } + return key; +} + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize) { + unsigned char* inchunk = data; + while((size_t)(inchunk - data) < datasize) { + CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk)); + out->allocsize = out->size; /*fix the allocsize again*/ + inchunk = lodepng_chunk_next(inchunk); + } + return 0; +} + +static unsigned isGrayICCProfile(const unsigned char* profile, unsigned size) { + /* + It is a gray profile if bytes 16-19 are "GRAY", rgb profile if bytes 16-19 + are "RGB ". We do not perform any full parsing of the ICC profile here, other + than check those 4 bytes to grayscale profile. Other than that, validity of + the profile is not checked. This is needed only because the PNG specification + requires using a non-gray color model if there is an ICC profile with "RGB " + (sadly limiting compression opportunities if the input data is grayscale RGB + data), and requires using a gray color model if it is "GRAY". + */ + if(size < 20) return 0; + return profile[16] == 'G' && profile[17] == 'R' && profile[18] == 'A' && profile[19] == 'Y'; +} + +static unsigned isRGBICCProfile(const unsigned char* profile, unsigned size) { + /* See comment in isGrayICCProfile*/ + if(size < 20) return 0; + return profile[16] == 'R' && profile[17] == 'G' && profile[18] == 'B' && profile[19] == ' '; +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +unsigned lodepng_encode(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h, + LodePNGState* state) { + unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/ + size_t datasize = 0; + ucvector outv; + LodePNGInfo info; + + ucvector_init(&outv); + lodepng_info_init(&info); + + /*provide some proper output values if error will happen*/ + *out = 0; + *outsize = 0; + state->error = 0; + + /*check input values validity*/ + if((state->info_png.color.colortype == LCT_PALETTE || state->encoder.force_palette) + && (state->info_png.color.palettesize == 0 || state->info_png.color.palettesize > 256)) { + state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/ + goto cleanup; + } + if(state->encoder.zlibsettings.btype > 2) { + state->error = 61; /*error: unexisting btype*/ + goto cleanup; + } + if(state->info_png.interlace_method > 1) { + state->error = 71; /*error: unexisting interlace mode*/ + goto cleanup; + } + state->error = checkColorValidity(state->info_png.color.colortype, state->info_png.color.bitdepth); + if(state->error) goto cleanup; /*error: unexisting color type given*/ + state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth); + if(state->error) goto cleanup; /*error: unexisting color type given*/ + + /* color convert and compute scanline filter types */ + lodepng_info_copy(&info, &state->info_png); + if(state->encoder.auto_convert) { +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + if(state->info_png.background_defined) { + unsigned bg_r = state->info_png.background_r; + unsigned bg_g = state->info_png.background_g; + unsigned bg_b = state->info_png.background_b; + unsigned r = 0, g = 0, b = 0; + LodePNGColorProfile prof; + LodePNGColorMode mode16 = lodepng_color_mode_make(LCT_RGB, 16); + lodepng_convert_rgb(&r, &g, &b, bg_r, bg_g, bg_b, &mode16, &state->info_png.color); + lodepng_color_profile_init(&prof); + state->error = lodepng_get_color_profile(&prof, image, w, h, &state->info_raw); + if(state->error) goto cleanup; + lodepng_color_profile_add(&prof, r, g, b, 65535); + state->error = auto_choose_color_from_profile(&info.color, &state->info_raw, &prof); + if(state->error) goto cleanup; + if(lodepng_convert_rgb(&info.background_r, &info.background_g, &info.background_b, + bg_r, bg_g, bg_b, &info.color, &state->info_png.color)) { + state->error = 104; + goto cleanup; + } + } + else +#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */ + { + state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw); + if(state->error) goto cleanup; + } + } +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + if(state->info_png.iccp_defined) { + unsigned gray_icc = isGrayICCProfile(state->info_png.iccp_profile, state->info_png.iccp_profile_size); + unsigned gray_png = info.color.colortype == LCT_GREY || info.color.colortype == LCT_GREY_ALPHA; + /* TODO: perhaps instead of giving errors or less optimal compression, we can automatically modify + the ICC profile here to say "GRAY" or "RGB " to match the PNG color type, unless this will require + non trivial changes to the rest of the ICC profile */ + if(!gray_icc && !isRGBICCProfile(state->info_png.iccp_profile, state->info_png.iccp_profile_size)) { + state->error = 100; /* Disallowed profile color type for PNG */ + goto cleanup; + } + if(!state->encoder.auto_convert && gray_icc != gray_png) { + /* Non recoverable: encoder not allowed to convert color type, and requested color type not + compatible with ICC color type */ + state->error = 101; + goto cleanup; + } + if(gray_icc && !gray_png) { + /* Non recoverable: trying to set grayscale ICC profile while colored pixels were given */ + state->error = 102; + goto cleanup; + /* NOTE: this relies on the fact that lodepng_auto_choose_color never returns palette for grayscale pixels */ + } + if(!gray_icc && gray_png) { + /* Recoverable but an unfortunate loss in compression density: We have grayscale pixels but + are forced to store them in more expensive RGB format that will repeat each value 3 times + because the PNG spec does not allow an RGB ICC profile with internal grayscale color data */ + if(info.color.colortype == LCT_GREY) info.color.colortype = LCT_RGB; + if(info.color.colortype == LCT_GREY_ALPHA) info.color.colortype = LCT_RGBA; + if(info.color.bitdepth < 8) info.color.bitdepth = 8; + } + } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + if(!lodepng_color_mode_equal(&state->info_raw, &info.color)) { + unsigned char* converted; + size_t size = ((size_t)w * (size_t)h * (size_t)lodepng_get_bpp(&info.color) + 7) / 8; + + converted = (unsigned char*)lodepng_malloc(size); + if(!converted && size) state->error = 83; /*alloc fail*/ + if(!state->error) { + state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h); + } + if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder); + lodepng_free(converted); + if(state->error) goto cleanup; + } + else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder); + + /* output all PNG chunks */ { +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + size_t i; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + /*write signature and chunks*/ + writeSignature(&outv); + /*IHDR*/ + addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method); +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*unknown chunks between IHDR and PLTE*/ + if(info.unknown_chunks_data[0]) { + state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]); + if(state->error) goto cleanup; + } + /*color profile chunks must come before PLTE */ + if(info.iccp_defined) addChunk_iCCP(&outv, &info, &state->encoder.zlibsettings); + if(info.srgb_defined) addChunk_sRGB(&outv, &info); + if(info.gama_defined) addChunk_gAMA(&outv, &info); + if(info.chrm_defined) addChunk_cHRM(&outv, &info); +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + /*PLTE*/ + if(info.color.colortype == LCT_PALETTE) { + addChunk_PLTE(&outv, &info.color); + } + if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA)) { + addChunk_PLTE(&outv, &info.color); + } + /*tRNS*/ + if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0) { + addChunk_tRNS(&outv, &info.color); + } + if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined) { + addChunk_tRNS(&outv, &info.color); + } +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*bKGD (must come between PLTE and the IDAt chunks*/ + if(info.background_defined) { + state->error = addChunk_bKGD(&outv, &info); + if(state->error) goto cleanup; + } + /*pHYs (must come before the IDAT chunks)*/ + if(info.phys_defined) addChunk_pHYs(&outv, &info); + + /*unknown chunks between PLTE and IDAT*/ + if(info.unknown_chunks_data[1]) { + state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]); + if(state->error) goto cleanup; + } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + /*IDAT (multiple IDAT chunks must be consecutive)*/ + state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings); + if(state->error) goto cleanup; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*tIME*/ + if(info.time_defined) addChunk_tIME(&outv, &info.time); + /*tEXt and/or zTXt*/ + for(i = 0; i != info.text_num; ++i) { + if(strlen(info.text_keys[i]) > 79) { + state->error = 66; /*text chunk too large*/ + goto cleanup; + } + if(strlen(info.text_keys[i]) < 1) { + state->error = 67; /*text chunk too small*/ + goto cleanup; + } + if(state->encoder.text_compression) { + addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings); + } else { + addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]); + } + } + /*LodePNG version id in text chunk*/ + if(state->encoder.add_id) { + unsigned already_added_id_text = 0; + for(i = 0; i != info.text_num; ++i) { + if(!strcmp(info.text_keys[i], "LodePNG")) { + already_added_id_text = 1; + break; + } + } + if(already_added_id_text == 0) { + addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/ + } + } + /*iTXt*/ + for(i = 0; i != info.itext_num; ++i) { + if(strlen(info.itext_keys[i]) > 79) { + state->error = 66; /*text chunk too large*/ + goto cleanup; + } + if(strlen(info.itext_keys[i]) < 1) { + state->error = 67; /*text chunk too small*/ + goto cleanup; + } + addChunk_iTXt(&outv, state->encoder.text_compression, + info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i], + &state->encoder.zlibsettings); + } + + /*unknown chunks between IDAT and IEND*/ + if(info.unknown_chunks_data[2]) { + state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]); + if(state->error) goto cleanup; + } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + addChunk_IEND(&outv); + } + +cleanup: + lodepng_info_cleanup(&info); + lodepng_free(data); + + /*instead of cleaning the vector up, give it to the output*/ + *out = outv.data; + *outsize = outv.size; + + return state->error; +} + +unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image, + unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) { + unsigned error; + LodePNGState state; + lodepng_state_init(&state); + state.info_raw.colortype = colortype; + state.info_raw.bitdepth = bitdepth; + state.info_png.color.colortype = colortype; + state.info_png.color.bitdepth = bitdepth; + lodepng_encode(out, outsize, image, w, h, &state); + error = state.error; + lodepng_state_cleanup(&state); + return error; +} + +unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) { + return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8); +} + +unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) { + return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8); +} + +#ifdef LODEPNG_COMPILE_DISK +unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + unsigned char* buffer; + size_t buffersize; + unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth); + if(!error) error = lodepng_save_file(buffer, buffersize, filename); + lodepng_free(buffer); + return error; +} + +unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) { + return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8); +} + +unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) { + return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8); +} +#endif /*LODEPNG_COMPILE_DISK*/ + +void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings) { + lodepng_compress_settings_init(&settings->zlibsettings); + settings->filter_palette_zero = 1; + settings->filter_strategy = LFS_MINSUM; + settings->auto_convert = 1; + settings->force_palette = 0; + settings->predefined_filters = 0; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + settings->add_id = 0; + settings->text_compression = 1; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} + +#endif /*LODEPNG_COMPILE_ENCODER*/ +#endif /*LODEPNG_COMPILE_PNG*/ + +#ifdef LODEPNG_COMPILE_ERROR_TEXT +/* +This returns the description of a numerical error code in English. This is also +the documentation of all the error codes. +*/ +const char* lodepng_error_text(unsigned code) { + switch(code) { + case 0: return "no error, everything went ok"; + case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/ + case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/ + case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/ + case 13: return "problem while processing dynamic deflate block"; + case 14: return "problem while processing dynamic deflate block"; + case 15: return "problem while processing dynamic deflate block"; + case 16: return "unexisting code while processing dynamic deflate block"; + case 17: return "end of out buffer memory reached while inflating"; + case 18: return "invalid distance code while inflating"; + case 19: return "end of out buffer memory reached while inflating"; + case 20: return "invalid deflate block BTYPE encountered while decoding"; + case 21: return "NLEN is not ones complement of LEN in a deflate block"; + + /*end of out buffer memory reached while inflating: + This can happen if the inflated deflate data is longer than the amount of bytes required to fill up + all the pixels of the image, given the color depth and image dimensions. Something that doesn't + happen in a normal, well encoded, PNG image.*/ + case 22: return "end of out buffer memory reached while inflating"; + case 23: return "end of in buffer memory reached while inflating"; + case 24: return "invalid FCHECK in zlib header"; + case 25: return "invalid compression method in zlib header"; + case 26: return "FDICT encountered in zlib header while it's not used for PNG"; + case 27: return "PNG file is smaller than a PNG header"; + /*Checks the magic file header, the first 8 bytes of the PNG file*/ + case 28: return "incorrect PNG signature, it's no PNG or corrupted"; + case 29: return "first chunk is not the header chunk"; + case 30: return "chunk length too large, chunk broken off at end of file"; + case 31: return "illegal PNG color type or bpp"; + case 32: return "illegal PNG compression method"; + case 33: return "illegal PNG filter method"; + case 34: return "illegal PNG interlace method"; + case 35: return "chunk length of a chunk is too large or the chunk too small"; + case 36: return "illegal PNG filter type encountered"; + case 37: return "illegal bit depth for this color type given"; + case 38: return "the palette is too big"; /*more than 256 colors*/ + case 39: return "tRNS chunk before PLTE or has more entries than palette size"; + case 40: return "tRNS chunk has wrong size for grayscale image"; + case 41: return "tRNS chunk has wrong size for RGB image"; + case 42: return "tRNS chunk appeared while it was not allowed for this color type"; + case 43: return "bKGD chunk has wrong size for palette image"; + case 44: return "bKGD chunk has wrong size for grayscale image"; + case 45: return "bKGD chunk has wrong size for RGB image"; + case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?"; + case 49: return "jumped past memory while generating dynamic huffman tree"; + case 50: return "jumped past memory while generating dynamic huffman tree"; + case 51: return "jumped past memory while inflating huffman block"; + case 52: return "jumped past memory while inflating"; + case 53: return "size of zlib data too small"; + case 54: return "repeat symbol in tree while there was no value symbol yet"; + /*jumped past tree while generating huffman tree, this could be when the + tree will have more leaves than symbols after generating it out of the + given lenghts. They call this an oversubscribed dynamic bit lengths tree in zlib.*/ + case 55: return "jumped past tree while generating huffman tree"; + case 56: return "given output image colortype or bitdepth not supported for color conversion"; + case 57: return "invalid CRC encountered (checking CRC can be disabled)"; + case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)"; + case 59: return "requested color conversion not supported"; + case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)"; + case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)"; + /*LodePNG leaves the choice of RGB to grayscale conversion formula to the user.*/ + case 62: return "conversion from color to grayscale not supported"; + /*(2^31-1)*/ + case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk"; + /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/ + case 64: return "the length of the END symbol 256 in the Huffman tree is 0"; + case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes"; + case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte"; + case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors"; + case 69: return "unknown chunk type with 'critical' flag encountered by the decoder"; + case 71: return "unexisting interlace mode given to encoder (must be 0 or 1)"; + case 72: return "while decoding, unexisting compression method encountering in zTXt or iTXt chunk (it must be 0)"; + case 73: return "invalid tIME chunk size"; + case 74: return "invalid pHYs chunk size"; + /*length could be wrong, or data chopped off*/ + case 75: return "no null termination char found while decoding text chunk"; + case 76: return "iTXt chunk too short to contain required bytes"; + case 77: return "integer overflow in buffer size"; + case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/ + case 79: return "failed to open file for writing"; + case 80: return "tried creating a tree of 0 symbols"; + case 81: return "lazy matching at pos 0 is impossible"; + case 82: return "color conversion to palette requested while a color isn't in palette, or index out of bounds"; + case 83: return "memory allocation failed"; + case 84: return "given image too small to contain all pixels to be encoded"; + case 86: return "impossible offset in lz77 encoding (internal bug)"; + case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined"; + case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy"; + case 89: return "text chunk keyword too short or long: must have size 1-79"; + /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/ + case 90: return "windowsize must be a power of two"; + case 91: return "invalid decompressed idat size"; + case 92: return "integer overflow due to too many pixels"; + case 93: return "zero width or height is invalid"; + case 94: return "header chunk must have a size of 13 bytes"; + case 95: return "integer overflow with combined idat chunk size"; + case 96: return "invalid gAMA chunk size"; + case 97: return "invalid cHRM chunk size"; + case 98: return "invalid sRGB chunk size"; + case 99: return "invalid sRGB rendering intent"; + case 100: return "invalid ICC profile color type, the PNG specification only allows RGB or GRAY"; + case 101: return "PNG specification does not allow RGB ICC profile on gray color types and vice versa"; + case 102: return "not allowed to set grayscale ICC profile with colored pixels by PNG specification"; + case 103: return "invalid palette index in bKGD chunk. Maybe it came before PLTE chunk?"; + case 104: return "invalid bKGD color while encoding (e.g. palette index out of range)"; + } + return "unknown error code"; +} +#endif /*LODEPNG_COMPILE_ERROR_TEXT*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* // C++ Wrapper // */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_CPP +namespace lodepng { + +#ifdef LODEPNG_COMPILE_DISK +unsigned load_file(std::vector& buffer, const std::string& filename) { + long size = lodepng_filesize(filename.c_str()); + if(size < 0) return 78; + buffer.resize((size_t)size); + return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str()); +} + +/*write given buffer to the file, overwriting the file, it doesn't append to it.*/ +unsigned save_file(const std::vector& buffer, const std::string& filename) { + return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str()); +} +#endif /* LODEPNG_COMPILE_DISK */ + +#ifdef LODEPNG_COMPILE_ZLIB +#ifdef LODEPNG_COMPILE_DECODER +unsigned decompress(std::vector& out, const unsigned char* in, size_t insize, + const LodePNGDecompressSettings& settings) { + unsigned char* buffer = 0; + size_t buffersize = 0; + unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings); + if(buffer) { + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + lodepng_free(buffer); + } + return error; +} + +unsigned decompress(std::vector& out, const std::vector& in, + const LodePNGDecompressSettings& settings) { + return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings); +} +#endif /* LODEPNG_COMPILE_DECODER */ + +#ifdef LODEPNG_COMPILE_ENCODER +unsigned compress(std::vector& out, const unsigned char* in, size_t insize, + const LodePNGCompressSettings& settings) { + unsigned char* buffer = 0; + size_t buffersize = 0; + unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings); + if(buffer) { + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + lodepng_free(buffer); + } + return error; +} + +unsigned compress(std::vector& out, const std::vector& in, + const LodePNGCompressSettings& settings) { + return compress(out, in.empty() ? 0 : &in[0], in.size(), settings); +} +#endif /* LODEPNG_COMPILE_ENCODER */ +#endif /* LODEPNG_COMPILE_ZLIB */ + + +#ifdef LODEPNG_COMPILE_PNG + +State::State() { + lodepng_state_init(this); +} + +State::State(const State& other) { + lodepng_state_init(this); + lodepng_state_copy(this, &other); +} + +State::~State() { + lodepng_state_cleanup(this); +} + +State& State::operator=(const State& other) { + lodepng_state_copy(this, &other); + return *this; +} + +#ifdef LODEPNG_COMPILE_DECODER + +unsigned decode(std::vector& out, unsigned& w, unsigned& h, const unsigned char* in, + size_t insize, LodePNGColorType colortype, unsigned bitdepth) { + unsigned char* buffer; + unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth); + if(buffer && !error) { + State state; + state.info_raw.colortype = colortype; + state.info_raw.bitdepth = bitdepth; + size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw); + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + lodepng_free(buffer); + } + return error; +} + +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + const std::vector& in, LodePNGColorType colortype, unsigned bitdepth) { + return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth); +} + +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + State& state, + const unsigned char* in, size_t insize) { + unsigned char* buffer = NULL; + unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize); + if(buffer && !error) { + size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw); + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + } + lodepng_free(buffer); + return error; +} + +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + State& state, + const std::vector& in) { + return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size()); +} + +#ifdef LODEPNG_COMPILE_DISK +unsigned decode(std::vector& out, unsigned& w, unsigned& h, const std::string& filename, + LodePNGColorType colortype, unsigned bitdepth) { + std::vector buffer; + /* safe output values in case error happens */ + w = h = 0; + unsigned error = load_file(buffer, filename); + if(error) return error; + return decode(out, w, h, buffer, colortype, bitdepth); +} +#endif /* LODEPNG_COMPILE_DECODER */ +#endif /* LODEPNG_COMPILE_DISK */ + +#ifdef LODEPNG_COMPILE_ENCODER +unsigned encode(std::vector& out, const unsigned char* in, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + unsigned char* buffer; + size_t buffersize; + unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth); + if(buffer) { + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + lodepng_free(buffer); + } + return error; +} + +unsigned encode(std::vector& out, + const std::vector& in, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84; + return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth); +} + +unsigned encode(std::vector& out, + const unsigned char* in, unsigned w, unsigned h, + State& state) { + unsigned char* buffer; + size_t buffersize; + unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state); + if(buffer) { + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + lodepng_free(buffer); + } + return error; +} + +unsigned encode(std::vector& out, + const std::vector& in, unsigned w, unsigned h, + State& state) { + if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84; + return encode(out, in.empty() ? 0 : &in[0], w, h, state); +} + +#ifdef LODEPNG_COMPILE_DISK +unsigned encode(const std::string& filename, + const unsigned char* in, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + std::vector buffer; + unsigned error = encode(buffer, in, w, h, colortype, bitdepth); + if(!error) error = save_file(buffer, filename); + return error; +} + +unsigned encode(const std::string& filename, + const std::vector& in, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84; + return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth); +} +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_ENCODER */ +#endif /* LODEPNG_COMPILE_PNG */ +} /* namespace lodepng */ +#endif /*LODEPNG_COMPILE_CPP*/ diff --git a/lodepng.h b/lodepng.h new file mode 100644 index 0000000..476a206 --- /dev/null +++ b/lodepng.h @@ -0,0 +1,1930 @@ +/* +LodePNG version 20190210 + +Copyright (c) 2005-2019 Lode Vandevenne + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + +#ifndef LODEPNG_H +#define LODEPNG_H + +#include /*for size_t*/ + +extern const char* LODEPNG_VERSION_STRING; + +/* +The following #defines are used to create code sections. They can be disabled +to disable code sections, which can give faster compile time and smaller binary. +The "NO_COMPILE" defines are designed to be used to pass as defines to the +compiler command to disable them without modifying this header, e.g. +-DLODEPNG_NO_COMPILE_ZLIB for gcc. +In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to +allow implementing a custom lodepng_crc32. +*/ +/*deflate & zlib. If disabled, you must specify alternative zlib functions in +the custom_zlib field of the compress and decompress settings*/ +#ifndef LODEPNG_NO_COMPILE_ZLIB +#define LODEPNG_COMPILE_ZLIB +#endif + +/*png encoder and png decoder*/ +#ifndef LODEPNG_NO_COMPILE_PNG +#define LODEPNG_COMPILE_PNG +#endif + +/*deflate&zlib decoder and png decoder*/ +#ifndef LODEPNG_NO_COMPILE_DECODER +#define LODEPNG_COMPILE_DECODER +#endif + +/*deflate&zlib encoder and png encoder*/ +#ifndef LODEPNG_NO_COMPILE_ENCODER +#define LODEPNG_COMPILE_ENCODER +#endif + +/*the optional built in harddisk file loading and saving functions*/ +#ifndef LODEPNG_NO_COMPILE_DISK +#define LODEPNG_COMPILE_DISK +#endif + +/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/ +#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS +#define LODEPNG_COMPILE_ANCILLARY_CHUNKS +#endif + +/*ability to convert error numerical codes to English text string*/ +#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT +#define LODEPNG_COMPILE_ERROR_TEXT +#endif + +/*Compile the default allocators (C's free, malloc and realloc). If you disable this, +you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your +source files with custom allocators.*/ +#ifndef LODEPNG_NO_COMPILE_ALLOCATORS +#define LODEPNG_COMPILE_ALLOCATORS +#endif + +/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/ +#ifdef __cplusplus +#ifndef LODEPNG_NO_COMPILE_CPP +#define LODEPNG_COMPILE_CPP +#endif +#endif + +#ifdef LODEPNG_COMPILE_CPP +#include +#include +#endif /*LODEPNG_COMPILE_CPP*/ + +#ifdef LODEPNG_COMPILE_PNG +/*The PNG color types (also used for raw).*/ +typedef enum LodePNGColorType { + LCT_GREY = 0, /*grayscale: 1,2,4,8,16 bit*/ + LCT_RGB = 2, /*RGB: 8,16 bit*/ + LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/ + LCT_GREY_ALPHA = 4, /*grayscale with alpha: 8,16 bit*/ + LCT_RGBA = 6 /*RGB with alpha: 8,16 bit*/ +} LodePNGColorType; + +#ifdef LODEPNG_COMPILE_DECODER +/* +Converts PNG data in memory to raw pixel data. +out: Output parameter. Pointer to buffer that will contain the raw pixel data. + After decoding, its size is w * h * (bytes per pixel) bytes larger than + initially. Bytes per pixel depends on colortype and bitdepth. + Must be freed after usage with free(*out). + Note: for 16-bit per channel colors, uses big endian format like PNG does. +w: Output parameter. Pointer to width of pixel data. +h: Output parameter. Pointer to height of pixel data. +in: Memory buffer with the PNG file. +insize: size of the in buffer. +colortype: the desired color type for the raw output image. See explanation on PNG color types. +bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types. +Return value: LodePNG error code (0 means no error). +*/ +unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, + const unsigned char* in, size_t insize, + LodePNGColorType colortype, unsigned bitdepth); + +/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/ +unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, + const unsigned char* in, size_t insize); + +/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/ +unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, + const unsigned char* in, size_t insize); + +#ifdef LODEPNG_COMPILE_DISK +/* +Load PNG from disk, from file with given name. +Same as the other decode functions, but instead takes a filename as input. +*/ +unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, + const char* filename, + LodePNGColorType colortype, unsigned bitdepth); + +/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/ +unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, + const char* filename); + +/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/ +unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, + const char* filename); +#endif /*LODEPNG_COMPILE_DISK*/ +#endif /*LODEPNG_COMPILE_DECODER*/ + + +#ifdef LODEPNG_COMPILE_ENCODER +/* +Converts raw pixel data into a PNG image in memory. The colortype and bitdepth + of the output PNG image cannot be chosen, they are automatically determined + by the colortype, bitdepth and content of the input pixel data. + Note: for 16-bit per channel colors, needs big endian format like PNG does. +out: Output parameter. Pointer to buffer that will contain the PNG image data. + Must be freed after usage with free(*out). +outsize: Output parameter. Pointer to the size in bytes of the out buffer. +image: The raw pixel data to encode. The size of this buffer should be + w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth. +w: width of the raw pixel data in pixels. +h: height of the raw pixel data in pixels. +colortype: the color type of the raw input image. See explanation on PNG color types. +bitdepth: the bit depth of the raw input image. See explanation on PNG color types. +Return value: LodePNG error code (0 means no error). +*/ +unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth); + +/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/ +unsigned lodepng_encode32(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h); + +/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/ +unsigned lodepng_encode24(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h); + +#ifdef LODEPNG_COMPILE_DISK +/* +Converts raw pixel data into a PNG file on disk. +Same as the other encode functions, but instead takes a filename as output. +NOTE: This overwrites existing files without warning! +*/ +unsigned lodepng_encode_file(const char* filename, + const unsigned char* image, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth); + +/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/ +unsigned lodepng_encode32_file(const char* filename, + const unsigned char* image, unsigned w, unsigned h); + +/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/ +unsigned lodepng_encode24_file(const char* filename, + const unsigned char* image, unsigned w, unsigned h); +#endif /*LODEPNG_COMPILE_DISK*/ +#endif /*LODEPNG_COMPILE_ENCODER*/ + + +#ifdef LODEPNG_COMPILE_CPP +namespace lodepng { +#ifdef LODEPNG_COMPILE_DECODER +/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype +is the format to output the pixels to. Default is RGBA 8-bit per channel.*/ +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + const unsigned char* in, size_t insize, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + const std::vector& in, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +#ifdef LODEPNG_COMPILE_DISK +/* +Converts PNG file from disk to raw pixel data in memory. +Same as the other decode functions, but instead takes a filename as input. +*/ +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + const std::string& filename, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_DECODER */ + +#ifdef LODEPNG_COMPILE_ENCODER +/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype +is that of the raw input data. The output PNG color type will be auto chosen.*/ +unsigned encode(std::vector& out, + const unsigned char* in, unsigned w, unsigned h, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +unsigned encode(std::vector& out, + const std::vector& in, unsigned w, unsigned h, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +#ifdef LODEPNG_COMPILE_DISK +/* +Converts 32-bit RGBA raw pixel data into a PNG file on disk. +Same as the other encode functions, but instead takes a filename as output. +NOTE: This overwrites existing files without warning! +*/ +unsigned encode(const std::string& filename, + const unsigned char* in, unsigned w, unsigned h, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +unsigned encode(const std::string& filename, + const std::vector& in, unsigned w, unsigned h, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_ENCODER */ +} /* namespace lodepng */ +#endif /*LODEPNG_COMPILE_CPP*/ +#endif /*LODEPNG_COMPILE_PNG*/ + +#ifdef LODEPNG_COMPILE_ERROR_TEXT +/*Returns an English description of the numerical error code.*/ +const char* lodepng_error_text(unsigned code); +#endif /*LODEPNG_COMPILE_ERROR_TEXT*/ + +#ifdef LODEPNG_COMPILE_DECODER +/*Settings for zlib decompression*/ +typedef struct LodePNGDecompressSettings LodePNGDecompressSettings; +struct LodePNGDecompressSettings { + /* Check LodePNGDecoderSettings for more ignorable errors such as ignore_crc */ + unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/ + + /*use custom zlib decoder instead of built in one (default: null)*/ + unsigned (*custom_zlib)(unsigned char**, size_t*, + const unsigned char*, size_t, + const LodePNGDecompressSettings*); + /*use custom deflate decoder instead of built in one (default: null) + if custom_zlib is used, custom_deflate is ignored since only the built in + zlib function will call custom_deflate*/ + unsigned (*custom_inflate)(unsigned char**, size_t*, + const unsigned char*, size_t, + const LodePNGDecompressSettings*); + + const void* custom_context; /*optional custom settings for custom functions*/ +}; + +extern const LodePNGDecompressSettings lodepng_default_decompress_settings; +void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings); +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER +/* +Settings for zlib compression. Tweaking these settings tweaks the balance +between speed and compression ratio. +*/ +typedef struct LodePNGCompressSettings LodePNGCompressSettings; +struct LodePNGCompressSettings /*deflate = compress*/ { + /*LZ77 related settings*/ + unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/ + unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/ + unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/ + unsigned minmatch; /*mininum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/ + unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/ + unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/ + + /*use custom zlib encoder instead of built in one (default: null)*/ + unsigned (*custom_zlib)(unsigned char**, size_t*, + const unsigned char*, size_t, + const LodePNGCompressSettings*); + /*use custom deflate encoder instead of built in one (default: null) + if custom_zlib is used, custom_deflate is ignored since only the built in + zlib function will call custom_deflate*/ + unsigned (*custom_deflate)(unsigned char**, size_t*, + const unsigned char*, size_t, + const LodePNGCompressSettings*); + + const void* custom_context; /*optional custom settings for custom functions*/ +}; + +extern const LodePNGCompressSettings lodepng_default_compress_settings; +void lodepng_compress_settings_init(LodePNGCompressSettings* settings); +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#ifdef LODEPNG_COMPILE_PNG +/* +Color mode of an image. Contains all information required to decode the pixel +bits to RGBA colors. This information is the same as used in the PNG file +format, and is used both for PNG and raw image data in LodePNG. +*/ +typedef struct LodePNGColorMode { + /*header (IHDR)*/ + LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/ + unsigned bitdepth; /*bits per sample, see PNG standard or documentation further in this header file*/ + + /* + palette (PLTE and tRNS) + + Dynamically allocated with the colors of the palette, including alpha. + When encoding a PNG, to store your colors in the palette of the LodePNGColorMode, first use + lodepng_palette_clear, then for each color use lodepng_palette_add. + If you encode an image without alpha with palette, don't forget to put value 255 in each A byte of the palette. + + When decoding, by default you can ignore this palette, since LodePNG already + fills the palette colors in the pixels of the raw RGBA output. + + The palette is only supported for color type 3. + */ + unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/ + size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/ + + /* + transparent color key (tRNS) + + This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit. + For grayscale PNGs, r, g and b will all 3 be set to the same. + + When decoding, by default you can ignore this information, since LodePNG sets + pixels with this key to transparent already in the raw RGBA output. + + The color key is only supported for color types 0 and 2. + */ + unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/ + unsigned key_r; /*red/grayscale component of color key*/ + unsigned key_g; /*green component of color key*/ + unsigned key_b; /*blue component of color key*/ +} LodePNGColorMode; + +/*init, cleanup and copy functions to use with this struct*/ +void lodepng_color_mode_init(LodePNGColorMode* info); +void lodepng_color_mode_cleanup(LodePNGColorMode* info); +/*return value is error code (0 means no error)*/ +unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source); +/* Makes a temporary LodePNGColorMode that does not need cleanup (no palette) */ +LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth); + +void lodepng_palette_clear(LodePNGColorMode* info); +/*add 1 color to the palette*/ +unsigned lodepng_palette_add(LodePNGColorMode* info, + unsigned char r, unsigned char g, unsigned char b, unsigned char a); + +/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/ +unsigned lodepng_get_bpp(const LodePNGColorMode* info); +/*get the amount of color channels used, based on colortype in the struct. +If a palette is used, it counts as 1 channel.*/ +unsigned lodepng_get_channels(const LodePNGColorMode* info); +/*is it a grayscale type? (only colortype 0 or 4)*/ +unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info); +/*has it got an alpha channel? (only colortype 2 or 6)*/ +unsigned lodepng_is_alpha_type(const LodePNGColorMode* info); +/*has it got a palette? (only colortype 3)*/ +unsigned lodepng_is_palette_type(const LodePNGColorMode* info); +/*only returns true if there is a palette and there is a value in the palette with alpha < 255. +Loops through the palette to check this.*/ +unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info); +/* +Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image. +Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels). +Returns false if the image can only have opaque pixels. +In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values, +or if "key_defined" is true. +*/ +unsigned lodepng_can_have_alpha(const LodePNGColorMode* info); +/*Returns the byte size of a raw image buffer with given width, height and color mode*/ +size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color); + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +/*The information of a Time chunk in PNG.*/ +typedef struct LodePNGTime { + unsigned year; /*2 bytes used (0-65535)*/ + unsigned month; /*1-12*/ + unsigned day; /*1-31*/ + unsigned hour; /*0-23*/ + unsigned minute; /*0-59*/ + unsigned second; /*0-60 (to allow for leap seconds)*/ +} LodePNGTime; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +/*Information about the PNG image, except pixels, width and height.*/ +typedef struct LodePNGInfo { + /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/ + unsigned compression_method;/*compression method of the original file. Always 0.*/ + unsigned filter_method; /*filter method of the original file*/ + unsigned interlace_method; /*interlace method of the original file: 0=none, 1=Adam7*/ + LodePNGColorMode color; /*color type and bits, palette and transparency of the PNG file*/ + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /* + Suggested background color chunk (bKGD) + + This uses the same color mode and bit depth as the PNG (except no alpha channel), + with values truncated to the bit depth in the unsigned integer. + + For grayscale and palette PNGs, the value is stored in background_r. The values + in background_g and background_b are then unused. + + So when decoding, you may get these in a different color mode than the one you requested + for the raw pixels. + + When encoding with auto_convert, you must use the color model defined in info_png.color for + these values. The encoder normally ignores info_png.color when auto_convert is on, but will + use it to interpret these values (and convert copies of them to its chosen color model). + + When encoding, avoid setting this to an expensive color, such as a non-gray value + when the image is gray, or the compression will be worse since it will be forced to + write the PNG with a more expensive color mode (when auto_convert is on). + + The decoder does not use this background color to edit the color of pixels. This is a + completely optional metadata feature. + */ + unsigned background_defined; /*is a suggested background color given?*/ + unsigned background_r; /*red/gray/palette component of suggested background color*/ + unsigned background_g; /*green component of suggested background color*/ + unsigned background_b; /*blue component of suggested background color*/ + + /* + non-international text chunks (tEXt and zTXt) + + The char** arrays each contain num strings. The actual messages are in + text_strings, while text_keys are keywords that give a short description what + the actual text represents, e.g. Title, Author, Description, or anything else. + + All the string fields below including keys, names and language tags are null terminated. + The PNG specification uses null characters for the keys, names and tags, and forbids null + characters to appear in the main text which is why we can use null termination everywhere here. + + A keyword is minimum 1 character and maximum 79 characters long. It's + discouraged to use a single line length longer than 79 characters for texts. + + Don't allocate these text buffers yourself. Use the init/cleanup functions + correctly and use lodepng_add_text and lodepng_clear_text. + */ + size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/ + char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/ + char** text_strings; /*the actual text*/ + + /* + international text chunks (iTXt) + Similar to the non-international text chunks, but with additional strings + "langtags" and "transkeys". + */ + size_t itext_num; /*the amount of international texts in this PNG*/ + char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/ + char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/ + char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/ + char** itext_strings; /*the actual international text - UTF-8 string*/ + + /*time chunk (tIME)*/ + unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/ + LodePNGTime time; + + /*phys chunk (pHYs)*/ + unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/ + unsigned phys_x; /*pixels per unit in x direction*/ + unsigned phys_y; /*pixels per unit in y direction*/ + unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/ + + /* + Color profile related chunks: gAMA, cHRM, sRGB, iCPP + + LodePNG does not apply any color conversions on pixels in the encoder or decoder and does not interpret these color + profile values. It merely passes on the information. If you wish to use color profiles and convert colors, please + use these values with a color management library. + + See the PNG, ICC and sRGB specifications for more information about the meaning of these values. + */ + + /* gAMA chunk: optional, overridden by sRGB or iCCP if those are present. */ + unsigned gama_defined; /* Whether a gAMA chunk is present (0 = not present, 1 = present). */ + unsigned gama_gamma; /* Gamma exponent times 100000 */ + + /* cHRM chunk: optional, overridden by sRGB or iCCP if those are present. */ + unsigned chrm_defined; /* Whether a cHRM chunk is present (0 = not present, 1 = present). */ + unsigned chrm_white_x; /* White Point x times 100000 */ + unsigned chrm_white_y; /* White Point y times 100000 */ + unsigned chrm_red_x; /* Red x times 100000 */ + unsigned chrm_red_y; /* Red y times 100000 */ + unsigned chrm_green_x; /* Green x times 100000 */ + unsigned chrm_green_y; /* Green y times 100000 */ + unsigned chrm_blue_x; /* Blue x times 100000 */ + unsigned chrm_blue_y; /* Blue y times 100000 */ + + /* + sRGB chunk: optional. May not appear at the same time as iCCP. + If gAMA is also present gAMA must contain value 45455. + If cHRM is also present cHRM must contain respectively 31270,32900,64000,33000,30000,60000,15000,6000. + */ + unsigned srgb_defined; /* Whether an sRGB chunk is present (0 = not present, 1 = present). */ + unsigned srgb_intent; /* Rendering intent: 0=perceptual, 1=rel. colorimetric, 2=saturation, 3=abs. colorimetric */ + + /* + iCCP chunk: optional. May not appear at the same time as sRGB. + + LodePNG does not parse or use the ICC profile (except its color space header field for an edge case), a + separate library to handle the ICC data (not included in LodePNG) format is needed to use it for color + management and conversions. + + For encoding, if iCCP is present, gAMA and cHRM are recommended to be added as well with values that match the ICC + profile as closely as possible, if you wish to do this you should provide the correct values for gAMA and cHRM and + enable their '_defined' flags since LodePNG will not automatically compute them from the ICC profile. + + For encoding, the ICC profile is required by the PNG specification to be an "RGB" profile for non-gray + PNG color types and a "GRAY" profile for gray PNG color types. If you disable auto_convert, you must ensure + the ICC profile type matches your requested color type, else the encoder gives an error. If auto_convert is + enabled (the default), and the ICC profile is not a good match for the pixel data, this will result in an encoder + error if the pixel data has non-gray pixels for a GRAY profile, or a silent less-optimal compression of the pixel + data if the pixels could be encoded as grayscale but the ICC profile is RGB. + + To avoid this do not set an ICC profile in the image unless there is a good reason for it, and when doing so + make sure you compute it carefully to avoid the above problems. + */ + unsigned iccp_defined; /* Whether an iCCP chunk is present (0 = not present, 1 = present). */ + char* iccp_name; /* Null terminated string with profile name, 1-79 bytes */ + /* + The ICC profile in iccp_profile_size bytes. + Don't allocate this buffer yourself. Use the init/cleanup functions + correctly and use lodepng_set_icc and lodepng_clear_icc. + */ + unsigned char* iccp_profile; + unsigned iccp_profile_size; /* The size of iccp_profile in bytes */ + + /* End of color profile related chunks */ + + + /* + unknown chunks: chunks not known by LodePNG, passed on byte for byte. + + There are 3 buffers, one for each position in the PNG where unknown chunks can appear. + Each buffer contains all unknown chunks for that position consecutively. + The 3 positions are: + 0: between IHDR and PLTE, 1: between PLTE and IDAT, 2: between IDAT and IEND. + + For encoding, do not store critical chunks or known chunks that are enabled with a "_defined" flag + above in here, since the encoder will blindly follow this and could then encode an invalid PNG file + (such as one with two IHDR chunks or the disallowed combination of sRGB with iCCP). But do use + this if you wish to store an ancillary chunk that is not supported by LodePNG (such as sPLT or hIST), + or any non-standard PNG chunk. + + Do not allocate or traverse this data yourself. Use the chunk traversing functions declared + later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct. + */ + unsigned char* unknown_chunks_data[3]; + size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/ +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} LodePNGInfo; + +/*init, cleanup and copy functions to use with this struct*/ +void lodepng_info_init(LodePNGInfo* info); +void lodepng_info_cleanup(LodePNGInfo* info); +/*return value is error code (0 means no error)*/ +unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source); + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/ +void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/ + +unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag, + const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/ +void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/ + +/*replaces if exists*/ +unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size); +void lodepng_clear_icc(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/ +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +/* +Converts raw buffer from one color type to another color type, based on +LodePNGColorMode structs to describe the input and output color type. +See the reference manual at the end of this header file to see which color conversions are supported. +return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported) +The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel +of the output color type (lodepng_get_bpp). +For < 8 bpp images, there should not be padding bits at the end of scanlines. +For 16-bit per channel colors, uses big endian format like PNG does. +Return value is LodePNG error code +*/ +unsigned lodepng_convert(unsigned char* out, const unsigned char* in, + const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, + unsigned w, unsigned h); + +#ifdef LODEPNG_COMPILE_DECODER +/* +Settings for the decoder. This contains settings for the PNG and the Zlib +decoder, but not the Info settings from the Info structs. +*/ +typedef struct LodePNGDecoderSettings { + LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/ + + /* Check LodePNGDecompressSettings for more ignorable errors such as ignore_adler32 */ + unsigned ignore_crc; /*ignore CRC checksums*/ + unsigned ignore_critical; /*ignore unknown critical chunks*/ + unsigned ignore_end; /*ignore issues at end of file if possible (missing IEND chunk, too large chunk, ...)*/ + /* TODO: make a system involving warnings with levels and a strict mode instead. Other potentially recoverable + errors: srgb rendering intent value, size of content of ancillary chunks, more than 79 characters for some + strings, placement/combination rules for ancillary chunks, crc of unknown chunks, allowed characters + in string keys, etc... */ + + unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/ + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/ + /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/ + unsigned remember_unknown_chunks; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} LodePNGDecoderSettings; + +void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings); +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER +/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/ +typedef enum LodePNGFilterStrategy { + /*every filter at zero*/ + LFS_ZERO, + /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/ + LFS_MINSUM, + /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending + on the image, this is better or worse than minsum.*/ + LFS_ENTROPY, + /* + Brute-force-search PNG filters by compressing each filter for each scanline. + Experimental, very slow, and only rarely gives better compression than MINSUM. + */ + LFS_BRUTE_FORCE, + /*use predefined_filters buffer: you specify the filter type for each scanline*/ + LFS_PREDEFINED +} LodePNGFilterStrategy; + +/*Gives characteristics about the integer RGBA colors of the image (count, alpha channel usage, bit depth, ...), +which helps decide which color model to use for encoding. +Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms. +NOTE: This is not related to the ICC color profile, search "iccp_profile" instead to find the ICC/chromacity/... +fields in this header file.*/ +typedef struct LodePNGColorProfile { + unsigned colored; /*not grayscale*/ + unsigned key; /*image is not opaque and color key is possible instead of full alpha*/ + unsigned short key_r; /*key values, always as 16-bit, in 8-bit case the byte is duplicated, e.g. 65535 means 255*/ + unsigned short key_g; + unsigned short key_b; + unsigned alpha; /*image is not opaque and alpha channel or alpha palette required*/ + unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16.*/ + unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order*/ + unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for grayscale only. 16 if 16-bit per channel required.*/ + size_t numpixels; +} LodePNGColorProfile; + +void lodepng_color_profile_init(LodePNGColorProfile* profile); + +/*Get a LodePNGColorProfile of the image. The profile must already have been inited. +NOTE: This is not related to the ICC color profile, search "iccp_profile" instead to find the ICC/chromacity/... +fields in this header file.*/ +unsigned lodepng_get_color_profile(LodePNGColorProfile* profile, + const unsigned char* image, unsigned w, unsigned h, + const LodePNGColorMode* mode_in); +/*The function LodePNG uses internally to decide the PNG color with auto_convert. +Chooses an optimal color model, e.g. gray if only gray pixels, palette if < 256 colors, ...*/ +unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, + const unsigned char* image, unsigned w, unsigned h, + const LodePNGColorMode* mode_in); + +/*Settings for the encoder.*/ +typedef struct LodePNGEncoderSettings { + LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/ + + unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/ + + /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than + 8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to + completely follow the official PNG heuristic, filter_palette_zero must be true and + filter_strategy must be LFS_MINSUM*/ + unsigned filter_palette_zero; + /*Which filter strategy to use when not using zeroes due to filter_palette_zero. + Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/ + LodePNGFilterStrategy filter_strategy; + /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with + the same length as the amount of scanlines in the image, and each value must <= 5. You + have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero + must be set to 0 to ensure this is also used on palette or low bitdepth images.*/ + const unsigned char* predefined_filters; + + /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette). + If colortype is 3, PLTE is _always_ created.*/ + unsigned force_palette; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*add LodePNG identifier and version as a text chunk, for debugging*/ + unsigned add_id; + /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/ + unsigned text_compression; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} LodePNGEncoderSettings; + +void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings); +#endif /*LODEPNG_COMPILE_ENCODER*/ + + +#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) +/*The settings, state and information for extended encoding and decoding.*/ +typedef struct LodePNGState { +#ifdef LODEPNG_COMPILE_DECODER + LodePNGDecoderSettings decoder; /*the decoding settings*/ +#endif /*LODEPNG_COMPILE_DECODER*/ +#ifdef LODEPNG_COMPILE_ENCODER + LodePNGEncoderSettings encoder; /*the encoding settings*/ +#endif /*LODEPNG_COMPILE_ENCODER*/ + LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/ + LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/ + unsigned error; +#ifdef LODEPNG_COMPILE_CPP + /* For the lodepng::State subclass. */ + virtual ~LodePNGState(){} +#endif +} LodePNGState; + +/*init, cleanup and copy functions to use with this struct*/ +void lodepng_state_init(LodePNGState* state); +void lodepng_state_cleanup(LodePNGState* state); +void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source); +#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */ + +#ifdef LODEPNG_COMPILE_DECODER +/* +Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and +getting much more information about the PNG image and color mode. +*/ +unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h, + LodePNGState* state, + const unsigned char* in, size_t insize); + +/* +Read the PNG header, but not the actual data. This returns only the information +that is in the IHDR chunk of the PNG, such as width, height and color type. The +information is placed in the info_png field of the LodePNGState. +*/ +unsigned lodepng_inspect(unsigned* w, unsigned* h, + LodePNGState* state, + const unsigned char* in, size_t insize); +#endif /*LODEPNG_COMPILE_DECODER*/ + +/* +Reads one metadata chunk (other than IHDR) of the PNG file and outputs what it +read in the state. Returns error code on failure. +Use lodepng_inspect first with a new state, then e.g. lodepng_chunk_find_const +to find the desired chunk type, and if non null use lodepng_inspect_chunk (with +chunk_pointer - start_of_file as pos). +Supports most metadata chunks from the PNG standard (gAMA, bKGD, tEXt, ...). +Ignores unsupported, unknown, non-metadata or IHDR chunks (without error). +Requirements: &in[pos] must point to start of a chunk, must use regular +lodepng_inspect first since format of most other chunks depends on IHDR, and if +there is a PLTE chunk, that one must be inspected before tRNS or bKGD. +*/ +unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos, + const unsigned char* in, size_t insize); + +#ifdef LODEPNG_COMPILE_ENCODER +/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/ +unsigned lodepng_encode(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h, + LodePNGState* state); +#endif /*LODEPNG_COMPILE_ENCODER*/ + +/* +The lodepng_chunk functions are normally not needed, except to traverse the +unknown chunks stored in the LodePNGInfo struct, or add new ones to it. +It also allows traversing the chunks of an encoded PNG file yourself. + +The chunk pointer always points to the beginning of the chunk itself, that is +the first byte of the 4 length bytes. + +In the PNG file format, chunks have the following format: +-4 bytes length: length of the data of the chunk in bytes (chunk itself is 12 bytes longer) +-4 bytes chunk type (ASCII a-z,A-Z only, see below) +-length bytes of data (may be 0 bytes if length was 0) +-4 bytes of CRC, computed on chunk name + data + +The first chunk starts at the 8th byte of the PNG file, the entire rest of the file +exists out of concatenated chunks with the above format. + +PNG standard chunk ASCII naming conventions: +-First byte: uppercase = critical, lowercase = ancillary +-Second byte: uppercase = public, lowercase = private +-Third byte: must be uppercase +-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy +*/ + +/* +Gets the length of the data of the chunk. Total chunk length has 12 bytes more. +There must be at least 4 bytes to read from. If the result value is too large, +it may be corrupt data. +*/ +unsigned lodepng_chunk_length(const unsigned char* chunk); + +/*puts the 4-byte type in null terminated string*/ +void lodepng_chunk_type(char type[5], const unsigned char* chunk); + +/*check if the type is the given type*/ +unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type); + +/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/ +unsigned char lodepng_chunk_ancillary(const unsigned char* chunk); + +/*0: public, 1: private (see PNG standard)*/ +unsigned char lodepng_chunk_private(const unsigned char* chunk); + +/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/ +unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk); + +/*get pointer to the data of the chunk, where the input points to the header of the chunk*/ +unsigned char* lodepng_chunk_data(unsigned char* chunk); +const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk); + +/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/ +unsigned lodepng_chunk_check_crc(const unsigned char* chunk); + +/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/ +void lodepng_chunk_generate_crc(unsigned char* chunk); + +/* +Iterate to next chunks, allows iterating through all chunks of the PNG file. +Input must be at the beginning of a chunk (result of a previous lodepng_chunk_next call, +or the 8th byte of a PNG file which always has the first chunk), or alternatively may +point to the first byte of the PNG file (which is not a chunk but the magic header, the +function will then skip over it and return the first real chunk). +Expects at least 8 readable bytes of memory in the input pointer. +Will output pointer to the start of the next chunk or the end of the file if there +is no more chunk after this. Start this process at the 8th byte of the PNG file. +In a non-corrupt PNG file, the last chunk should have name "IEND". +*/ +unsigned char* lodepng_chunk_next(unsigned char* chunk); +const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk); + +/*Finds the first chunk with the given type in the range [chunk, end), or returns NULL if not found.*/ +unsigned char* lodepng_chunk_find(unsigned char* chunk, const unsigned char* end, const char type[5]); +const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]); + +/* +Appends chunk to the data in out. The given chunk should already have its chunk header. +The out variable and outlength are updated to reflect the new reallocated buffer. +Returns error code (0 if it went ok) +*/ +unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk); + +/* +Appends new chunk to out. The chunk to append is given by giving its length, type +and data separately. The type is a 4-letter string. +The out variable and outlength are updated to reflect the new reallocated buffer. +Returne error code (0 if it went ok) +*/ +unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length, + const char* type, const unsigned char* data); + + +/*Calculate CRC32 of buffer*/ +unsigned lodepng_crc32(const unsigned char* buf, size_t len); +#endif /*LODEPNG_COMPILE_PNG*/ + + +#ifdef LODEPNG_COMPILE_ZLIB +/* +This zlib part can be used independently to zlib compress and decompress a +buffer. It cannot be used to create gzip files however, and it only supports the +part of zlib that is required for PNG, it does not support dictionaries. +*/ + +#ifdef LODEPNG_COMPILE_DECODER +/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/ +unsigned lodepng_inflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings); + +/* +Decompresses Zlib data. Reallocates the out buffer and appends the data. The +data must be according to the zlib specification. +Either, *out must be NULL and *outsize must be 0, or, *out must be a valid +buffer and *outsize its size in bytes. out must be freed by user after usage. +*/ +unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings); +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER +/* +Compresses data with Zlib. Reallocates the out buffer and appends the data. +Zlib adds a small header and trailer around the deflate data. +The data is output in the format of the zlib specification. +Either, *out must be NULL and *outsize must be 0, or, *out must be a valid +buffer and *outsize its size in bytes. out must be freed by user after usage. +*/ +unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings); + +/* +Find length-limited Huffman code for given frequencies. This function is in the +public interface only for tests, it's used internally by lodepng_deflate. +*/ +unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies, + size_t numcodes, unsigned maxbitlen); + +/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/ +unsigned lodepng_deflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings); + +#endif /*LODEPNG_COMPILE_ENCODER*/ +#endif /*LODEPNG_COMPILE_ZLIB*/ + +#ifdef LODEPNG_COMPILE_DISK +/* +Load a file from disk into buffer. The function allocates the out buffer, and +after usage you should free it. +out: output parameter, contains pointer to loaded buffer. +outsize: output parameter, size of the allocated out buffer +filename: the path to the file to load +return value: error code (0 means ok) +*/ +unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename); + +/* +Save a file from buffer to disk. Warning, if it exists, this function overwrites +the file without warning! +buffer: the buffer to write +buffersize: size of the buffer to write +filename: the path to the file to save to +return value: error code (0 means ok) +*/ +unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename); +#endif /*LODEPNG_COMPILE_DISK*/ + +#ifdef LODEPNG_COMPILE_CPP +/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */ +namespace lodepng { +#ifdef LODEPNG_COMPILE_PNG +class State : public LodePNGState { + public: + State(); + State(const State& other); + virtual ~State(); + State& operator=(const State& other); +}; + +#ifdef LODEPNG_COMPILE_DECODER +/* Same as other lodepng::decode, but using a State for more settings and information. */ +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + State& state, + const unsigned char* in, size_t insize); +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + State& state, + const std::vector& in); +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER +/* Same as other lodepng::encode, but using a State for more settings and information. */ +unsigned encode(std::vector& out, + const unsigned char* in, unsigned w, unsigned h, + State& state); +unsigned encode(std::vector& out, + const std::vector& in, unsigned w, unsigned h, + State& state); +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#ifdef LODEPNG_COMPILE_DISK +/* +Load a file from disk into an std::vector. +return value: error code (0 means ok) +*/ +unsigned load_file(std::vector& buffer, const std::string& filename); + +/* +Save the binary data in an std::vector to a file on disk. The file is overwritten +without warning. +*/ +unsigned save_file(const std::vector& buffer, const std::string& filename); +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_PNG */ + +#ifdef LODEPNG_COMPILE_ZLIB +#ifdef LODEPNG_COMPILE_DECODER +/* Zlib-decompress an unsigned char buffer */ +unsigned decompress(std::vector& out, const unsigned char* in, size_t insize, + const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings); + +/* Zlib-decompress an std::vector */ +unsigned decompress(std::vector& out, const std::vector& in, + const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings); +#endif /* LODEPNG_COMPILE_DECODER */ + +#ifdef LODEPNG_COMPILE_ENCODER +/* Zlib-compress an unsigned char buffer */ +unsigned compress(std::vector& out, const unsigned char* in, size_t insize, + const LodePNGCompressSettings& settings = lodepng_default_compress_settings); + +/* Zlib-compress an std::vector */ +unsigned compress(std::vector& out, const std::vector& in, + const LodePNGCompressSettings& settings = lodepng_default_compress_settings); +#endif /* LODEPNG_COMPILE_ENCODER */ +#endif /* LODEPNG_COMPILE_ZLIB */ +} /* namespace lodepng */ +#endif /*LODEPNG_COMPILE_CPP*/ + +/* +TODO: +[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often +[.] check compatibility with various compilers - done but needs to be redone for every newer version +[X] converting color to 16-bit per channel types +[X] support color profile chunk types (but never let them touch RGB values by default) +[ ] support all public PNG chunk types (almost done except sBIT, sPLT and hIST) +[ ] make sure encoder generates no chunks with size > (2^31)-1 +[ ] partial decoding (stream processing) +[X] let the "isFullyOpaque" function check color keys and transparent palettes too +[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl" +[ ] allow treating some errors like warnings, when image is recoverable (e.g. 69, 57, 58) +[ ] make warnings like: oob palette, checksum fail, data after iend, wrong/unknown crit chunk, no null terminator in text, ... +[ ] error messages with line numbers (and version) +[ ] errors in state instead of as return code? +[ ] new errors/warnings like suspiciously big decompressed ztxt or iccp chunk +[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes +[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ... +[ ] allow user to give data (void*) to custom allocator +[ ] provide alternatives for C library functions not present on some platforms (memcpy, ...) +[ ] rename "grey" to "gray" everywhere since "color" also uses US spelling (keep "grey" copies for backwards compatibility) +*/ + +#endif /*LODEPNG_H inclusion guard*/ + +/* +LodePNG Documentation +--------------------- + +0. table of contents +-------------------- + + 1. about + 1.1. supported features + 1.2. features not supported + 2. C and C++ version + 3. security + 4. decoding + 5. encoding + 6. color conversions + 6.1. PNG color types + 6.2. color conversions + 6.3. padding bits + 6.4. A note about 16-bits per channel and endianness + 7. error values + 8. chunks and PNG editing + 9. compiler support + 10. examples + 10.1. decoder C++ example + 10.2. decoder C example + 11. state settings reference + 12. changes + 13. contact information + + +1. about +-------- + +PNG is a file format to store raster images losslessly with good compression, +supporting different color types and alpha channel. + +LodePNG is a PNG codec according to the Portable Network Graphics (PNG) +Specification (Second Edition) - W3C Recommendation 10 November 2003. + +The specifications used are: + +*) Portable Network Graphics (PNG) Specification (Second Edition): + http://www.w3.org/TR/2003/REC-PNG-20031110 +*) RFC 1950 ZLIB Compressed Data Format version 3.3: + http://www.gzip.org/zlib/rfc-zlib.html +*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3: + http://www.gzip.org/zlib/rfc-deflate.html + +The most recent version of LodePNG can currently be found at +http://lodev.org/lodepng/ + +LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds +extra functionality. + +LodePNG exists out of two files: +-lodepng.h: the header file for both C and C++ +-lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage + +If you want to start using LodePNG right away without reading this doc, get the +examples from the LodePNG website to see how to use it in code, or check the +smaller examples in chapter 13 here. + +LodePNG is simple but only supports the basic requirements. To achieve +simplicity, the following design choices were made: There are no dependencies +on any external library. There are functions to decode and encode a PNG with +a single function call, and extended versions of these functions taking a +LodePNGState struct allowing to specify or get more information. By default +the colors of the raw image are always RGB or RGBA, no matter what color type +the PNG file uses. To read and write files, there are simple functions to +convert the files to/from buffers in memory. + +This all makes LodePNG suitable for loading textures in games, demos and small +programs, ... It's less suitable for full fledged image editors, loading PNGs +over network (it requires all the image data to be available before decoding can +begin), life-critical systems, ... + +1.1. supported features +----------------------- + +The following features are supported by the decoder: + +*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image, + or the same color type as the PNG +*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image +*) Adam7 interlace and deinterlace for any color type +*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk +*) support for alpha channels, including RGBA color model, translucent palettes and color keying +*) zlib decompression (inflate) +*) zlib compression (deflate) +*) CRC32 and ADLER32 checksums +*) colorimetric color profile conversions: currently experimentally available in lodepng_util.cpp only, + plus alternatively ability to pass on chroma/gamma/ICC profile information to other color management system. +*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks. +*) the following chunks are supported by both encoder and decoder: + IHDR: header information + PLTE: color palette + IDAT: pixel data + IEND: the final chunk + tRNS: transparency for palettized images + tEXt: textual information + zTXt: compressed textual information + iTXt: international textual information + bKGD: suggested background color + pHYs: physical dimensions + tIME: modification time + cHRM: RGB chromaticities + gAMA: RGB gamma correction + iCCP: ICC color profile + sRGB: rendering intent + +1.2. features not supported +--------------------------- + +The following features are _not_ supported: + +*) some features needed to make a conformant PNG-Editor might be still missing. +*) partial loading/stream processing. All data must be available and is processed in one call. +*) The following public chunks are not (yet) supported but treated as unknown chunks by LodePNG: + sBIT + hIST + sPLT + + +2. C and C++ version +-------------------- + +The C version uses buffers allocated with alloc that you need to free() +yourself. You need to use init and cleanup functions for each struct whenever +using a struct from the C version to avoid exploits and memory leaks. + +The C++ version has extra functions with std::vectors in the interface and the +lodepng::State class which is a LodePNGState with constructor and destructor. + +These files work without modification for both C and C++ compilers because all +the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers +ignore it, and the C code is made to compile both with strict ISO C90 and C++. + +To use the C++ version, you need to rename the source file to lodepng.cpp +(instead of lodepng.c), and compile it with a C++ compiler. + +To use the C version, you need to rename the source file to lodepng.c (instead +of lodepng.cpp), and compile it with a C compiler. + + +3. Security +----------- + +Even if carefully designed, it's always possible that LodePNG contains possible +exploits. If you discover one, please let me know, and it will be fixed. + +When using LodePNG, care has to be taken with the C version of LodePNG, as well +as the C-style structs when working with C++. The following conventions are used +for all C-style structs: + +-if a struct has a corresponding init function, always call the init function when making a new one +-if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks +-if a struct has a corresponding copy function, use the copy function instead of "=". + The destination must also be inited already. + + +4. Decoding +----------- + +Decoding converts a PNG compressed image to a raw pixel buffer. + +Most documentation on using the decoder is at its declarations in the header +above. For C, simple decoding can be done with functions such as +lodepng_decode32, and more advanced decoding can be done with the struct +LodePNGState and lodepng_decode. For C++, all decoding can be done with the +various lodepng::decode functions, and lodepng::State can be used for advanced +features. + +When using the LodePNGState, it uses the following fields for decoding: +*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here +*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get +*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use + +LodePNGInfo info_png +-------------------- + +After decoding, this contains extra information of the PNG image, except the actual +pixels, width and height because these are already gotten directly from the decoder +functions. + +It contains for example the original color type of the PNG image, text comments, +suggested background color, etc... More details about the LodePNGInfo struct are +at its declaration documentation. + +LodePNGColorMode info_raw +------------------------- + +When decoding, here you can specify which color type you want +the resulting raw image to be. If this is different from the colortype of the +PNG, then the decoder will automatically convert the result. This conversion +always works, except if you want it to convert a color PNG to grayscale or to +a palette with missing colors. + +By default, 32-bit color is used for the result. + +LodePNGDecoderSettings decoder +------------------------------ + +The settings can be used to ignore the errors created by invalid CRC and Adler32 +chunks, and to disable the decoding of tEXt chunks. + +There's also a setting color_convert, true by default. If false, no conversion +is done, the resulting data will be as it was in the PNG (after decompression) +and you'll have to puzzle the colors of the pixels together yourself using the +color type information in the LodePNGInfo. + + +5. Encoding +----------- + +Encoding converts a raw pixel buffer to a PNG compressed image. + +Most documentation on using the encoder is at its declarations in the header +above. For C, simple encoding can be done with functions such as +lodepng_encode32, and more advanced decoding can be done with the struct +LodePNGState and lodepng_encode. For C++, all encoding can be done with the +various lodepng::encode functions, and lodepng::State can be used for advanced +features. + +Like the decoder, the encoder can also give errors. However it gives less errors +since the encoder input is trusted, the decoder input (a PNG image that could +be forged by anyone) is not trusted. + +When using the LodePNGState, it uses the following fields for encoding: +*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be. +*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has +*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use + +LodePNGInfo info_png +-------------------- + +When encoding, you use this the opposite way as when decoding: for encoding, +you fill in the values you want the PNG to have before encoding. By default it's +not needed to specify a color type for the PNG since it's automatically chosen, +but it's possible to choose it yourself given the right settings. + +The encoder will not always exactly match the LodePNGInfo struct you give, +it tries as close as possible. Some things are ignored by the encoder. The +encoder uses, for example, the following settings from it when applicable: +colortype and bitdepth, text chunks, time chunk, the color key, the palette, the +background color, the interlace method, unknown chunks, ... + +When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk. +If the palette contains any colors for which the alpha channel is not 255 (so +there are translucent colors in the palette), it'll add a tRNS chunk. + +LodePNGColorMode info_raw +------------------------- + +You specify the color type of the raw image that you give to the input here, +including a possible transparent color key and palette you happen to be using in +your raw image data. + +By default, 32-bit color is assumed, meaning your input has to be in RGBA +format with 4 bytes (unsigned chars) per pixel. + +LodePNGEncoderSettings encoder +------------------------------ + +The following settings are supported (some are in sub-structs): +*) auto_convert: when this option is enabled, the encoder will +automatically choose the smallest possible color mode (including color key) that +can encode the colors of all pixels without information loss. +*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree, + 2 = dynamic huffman tree (best compression). Should be 2 for proper + compression. +*) use_lz77: whether or not to use LZ77 for compressed block types. Should be + true for proper compression. +*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value + 2048 by default, but can be set to 32768 for better, but slow, compression. +*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE + chunk if force_palette is true. This can used as suggested palette to convert + to by viewers that don't support more than 256 colors (if those still exist) +*) add_id: add text chunk "Encoder: LodePNG " to the image. +*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks. + zTXt chunks use zlib compression on the text. This gives a smaller result on + large texts but a larger result on small texts (such as a single program name). + It's all tEXt or all zTXt though, there's no separate setting per text yet. + + +6. color conversions +-------------------- + +An important thing to note about LodePNG, is that the color type of the PNG, and +the color type of the raw image, are completely independent. By default, when +you decode a PNG, you get the result as a raw image in the color type you want, +no matter whether the PNG was encoded with a palette, grayscale or RGBA color. +And if you encode an image, by default LodePNG will automatically choose the PNG +color type that gives good compression based on the values of colors and amount +of colors in the image. It can be configured to let you control it instead as +well, though. + +To be able to do this, LodePNG does conversions from one color mode to another. +It can convert from almost any color type to any other color type, except the +following conversions: RGB to grayscale is not supported, and converting to a +palette when the palette doesn't have a required color is not supported. This is +not supported on purpose: this is information loss which requires a color +reduction algorithm that is beyong the scope of a PNG encoder (yes, RGB to gray +is easy, but there are multiple ways if you want to give some channels more +weight). + +By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB +color, no matter what color type the PNG has. And by default when encoding, +LodePNG automatically picks the best color model for the output PNG, and expects +the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control +the color format of the images yourself, you can skip this chapter. + +6.1. PNG color types +-------------------- + +A PNG image can have many color types, ranging from 1-bit color to 64-bit color, +as well as palettized color modes. After the zlib decompression and unfiltering +in the PNG image is done, the raw pixel data will have that color type and thus +a certain amount of bits per pixel. If you want the output raw image after +decoding to have another color type, a conversion is done by LodePNG. + +The PNG specification gives the following color types: + +0: grayscale, bit depths 1, 2, 4, 8, 16 +2: RGB, bit depths 8 and 16 +3: palette, bit depths 1, 2, 4 and 8 +4: grayscale with alpha, bit depths 8 and 16 +6: RGBA, bit depths 8 and 16 + +Bit depth is the amount of bits per pixel per color channel. So the total amount +of bits per pixel is: amount of channels * bitdepth. + +6.2. color conversions +---------------------- + +As explained in the sections about the encoder and decoder, you can specify +color types and bit depths in info_png and info_raw to change the default +behaviour. + +If, when decoding, you want the raw image to be something else than the default, +you need to set the color type and bit depth you want in the LodePNGColorMode, +or the parameters colortype and bitdepth of the simple decoding function. + +If, when encoding, you use another color type than the default in the raw input +image, you need to specify its color type and bit depth in the LodePNGColorMode +of the raw image, or use the parameters colortype and bitdepth of the simple +encoding function. + +If, when encoding, you don't want LodePNG to choose the output PNG color type +but control it yourself, you need to set auto_convert in the encoder settings +to false, and specify the color type you want in the LodePNGInfo of the +encoder (including palette: it can generate a palette if auto_convert is true, +otherwise not). + +If the input and output color type differ (whether user chosen or auto chosen), +LodePNG will do a color conversion, which follows the rules below, and may +sometimes result in an error. + +To avoid some confusion: +-the decoder converts from PNG to raw image +-the encoder converts from raw image to PNG +-the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image +-the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG +-when encoding, the color type in LodePNGInfo is ignored if auto_convert + is enabled, it is automatically generated instead +-when decoding, the color type in LodePNGInfo is set by the decoder to that of the original + PNG image, but it can be ignored since the raw image has the color type you requested instead +-if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion + between the color types is done if the color types are supported. If it is not + supported, an error is returned. If the types are the same, no conversion is done. +-even though some conversions aren't supported, LodePNG supports loading PNGs from any + colortype and saving PNGs to any colortype, sometimes it just requires preparing + the raw image correctly before encoding. +-both encoder and decoder use the same color converter. + +The function lodepng_convert does the color conversion. It is available in the +interface but normally isn't needed since the encoder and decoder already call +it. + +Non supported color conversions: +-color to grayscale when non-gray pixels are present: no error is thrown, but +the result will look ugly because only the red channel is taken (it assumes all +three channels are the same in this case so ignores green and blue). The reason +no error is given is to allow converting from three-channel grayscale images to +one-channel even if there are numerical imprecisions. +-anything to palette when the palette does not have an exact match for a from-color +in it: in this case an error is thrown + +Supported color conversions: +-anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA +-any gray or gray+alpha, to gray or gray+alpha +-anything to a palette, as long as the palette has the requested colors in it +-removing alpha channel +-higher to smaller bitdepth, and vice versa + +If you want no color conversion to be done (e.g. for speed or control): +-In the encoder, you can make it save a PNG with any color type by giving the +raw color mode and LodePNGInfo the same color mode, and setting auto_convert to +false. +-In the decoder, you can make it store the pixel data in the same color type +as the PNG has, by setting the color_convert setting to false. Settings in +info_raw are then ignored. + +6.3. padding bits +----------------- + +In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines +have a bit amount that isn't a multiple of 8, then padding bits are used so that each +scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output. +The raw input image you give to the encoder, and the raw output image you get from the decoder +will NOT have these padding bits, e.g. in the case of a 1-bit image with a width +of 7 pixels, the first pixel of the second scanline will the the 8th bit of the first byte, +not the first bit of a new byte. + +6.4. A note about 16-bits per channel and endianness +---------------------------------------------------- + +LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like +for any other color format. The 16-bit values are stored in big endian (most +significant byte first) in these arrays. This is the opposite order of the +little endian used by x86 CPU's. + +LodePNG always uses big endian because the PNG file format does so internally. +Conversions to other formats than PNG uses internally are not supported by +LodePNG on purpose, there are myriads of formats, including endianness of 16-bit +colors, the order in which you store R, G, B and A, and so on. Supporting and +converting to/from all that is outside the scope of LodePNG. + +This may mean that, depending on your use case, you may want to convert the big +endian output of LodePNG to little endian with a for loop. This is certainly not +always needed, many applications and libraries support big endian 16-bit colors +anyway, but it means you cannot simply cast the unsigned char* buffer to an +unsigned short* buffer on x86 CPUs. + + +7. error values +--------------- + +All functions in LodePNG that return an error code, return 0 if everything went +OK, or a non-zero code if there was an error. + +The meaning of the LodePNG error values can be retrieved with the function +lodepng_error_text: given the numerical error code, it returns a description +of the error in English as a string. + +Check the implementation of lodepng_error_text to see the meaning of each code. + + +8. chunks and PNG editing +------------------------- + +If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG +editor that should follow the rules about handling of unknown chunks, or if your +program is able to read other types of chunks than the ones handled by LodePNG, +then that's possible with the chunk functions of LodePNG. + +A PNG chunk has the following layout: + +4 bytes length +4 bytes type name +length bytes data +4 bytes CRC + +8.1. iterating through chunks +----------------------------- + +If you have a buffer containing the PNG image data, then the first chunk (the +IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the +signature of the PNG and are not part of a chunk. But if you start at byte 8 +then you have a chunk, and can check the following things of it. + +NOTE: none of these functions check for memory buffer boundaries. To avoid +exploits, always make sure the buffer contains all the data of the chunks. +When using lodepng_chunk_next, make sure the returned value is within the +allocated memory. + +unsigned lodepng_chunk_length(const unsigned char* chunk): + +Get the length of the chunk's data. The total chunk length is this length + 12. + +void lodepng_chunk_type(char type[5], const unsigned char* chunk): +unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type): + +Get the type of the chunk or compare if it's a certain type + +unsigned char lodepng_chunk_critical(const unsigned char* chunk): +unsigned char lodepng_chunk_private(const unsigned char* chunk): +unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk): + +Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are). +Check if the chunk is private (public chunks are part of the standard, private ones not). +Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical +chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your +program doesn't handle that type of unknown chunk. + +unsigned char* lodepng_chunk_data(unsigned char* chunk): +const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk): + +Get a pointer to the start of the data of the chunk. + +unsigned lodepng_chunk_check_crc(const unsigned char* chunk): +void lodepng_chunk_generate_crc(unsigned char* chunk): + +Check if the crc is correct or generate a correct one. + +unsigned char* lodepng_chunk_next(unsigned char* chunk): +const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk): + +Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these +functions do no boundary checking of the allocated data whatsoever, so make sure there is enough +data available in the buffer to be able to go to the next chunk. + +unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk): +unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length, + const char* type, const unsigned char* data): + +These functions are used to create new chunks that are appended to the data in *out that has +length *outlength. The append function appends an existing chunk to the new data. The create +function creates a new chunk with the given parameters and appends it. Type is the 4-letter +name of the chunk. + +8.2. chunks in info_png +----------------------- + +The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3 +buffers (each with size) to contain 3 types of unknown chunks: +the ones that come before the PLTE chunk, the ones that come between the PLTE +and the IDAT chunks, and the ones that come after the IDAT chunks. +It's necessary to make the distionction between these 3 cases because the PNG +standard forces to keep the ordering of unknown chunks compared to the critical +chunks, but does not force any other ordering rules. + +info_png.unknown_chunks_data[0] is the chunks before PLTE +info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT +info_png.unknown_chunks_data[2] is the chunks after IDAT + +The chunks in these 3 buffers can be iterated through and read by using the same +way described in the previous subchapter. + +When using the decoder to decode a PNG, you can make it store all unknown chunks +if you set the option settings.remember_unknown_chunks to 1. By default, this +option is off (0). + +The encoder will always encode unknown chunks that are stored in the info_png. +If you need it to add a particular chunk that isn't known by LodePNG, you can +use lodepng_chunk_append or lodepng_chunk_create to the chunk data in +info_png.unknown_chunks_data[x]. + +Chunks that are known by LodePNG should not be added in that way. E.g. to make +LodePNG add a bKGD chunk, set background_defined to true and add the correct +parameters there instead. + + +9. compiler support +------------------- + +No libraries other than the current standard C library are needed to compile +LodePNG. For the C++ version, only the standard C++ library is needed on top. +Add the files lodepng.c(pp) and lodepng.h to your project, include +lodepng.h where needed, and your program can read/write PNG files. + +It is compatible with C90 and up, and C++03 and up. + +If performance is important, use optimization when compiling! For both the +encoder and decoder, this makes a large difference. + +Make sure that LodePNG is compiled with the same compiler of the same version +and with the same settings as the rest of the program, or the interfaces with +std::vectors and std::strings in C++ can be incompatible. + +CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets. + +*) gcc and g++ + +LodePNG is developed in gcc so this compiler is natively supported. It gives no +warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++ +version 4.7.1 on Linux, 32-bit and 64-bit. + +*) Clang + +Fully supported and warning-free. + +*) Mingw + +The Mingw compiler (a port of gcc for Windows) should be fully supported by +LodePNG. + +*) Visual Studio and Visual C++ Express Edition + +LodePNG should be warning-free with warning level W4. Two warnings were disabled +with pragmas though: warning 4244 about implicit conversions, and warning 4996 +where it wants to use a non-standard function fopen_s instead of the standard C +fopen. + +Visual Studio may want "stdafx.h" files to be included in each source file and +give an error "unexpected end of file while looking for precompiled header". +This is not standard C++ and will not be added to the stock LodePNG. You can +disable it for lodepng.cpp only by right clicking it, Properties, C/C++, +Precompiled Headers, and set it to Not Using Precompiled Headers there. + +NOTE: Modern versions of VS should be fully supported, but old versions, e.g. +VS6, are not guaranteed to work. + +*) Compilers on Macintosh + +LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for +C and C++. + +*) Other Compilers + +If you encounter problems on any compilers, feel free to let me know and I may +try to fix it if the compiler is modern and standards complient. + + +10. examples +------------ + +This decoder example shows the most basic usage of LodePNG. More complex +examples can be found on the LodePNG website. + +10.1. decoder C++ example +------------------------- + +#include "lodepng.h" +#include + +int main(int argc, char *argv[]) { + const char* filename = argc > 1 ? argv[1] : "test.png"; + + //load and decode + std::vector image; + unsigned width, height; + unsigned error = lodepng::decode(image, width, height, filename); + + //if there's an error, display it + if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl; + + //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ... +} + +10.2. decoder C example +----------------------- + +#include "lodepng.h" + +int main(int argc, char *argv[]) { + unsigned error; + unsigned char* image; + size_t width, height; + const char* filename = argc > 1 ? argv[1] : "test.png"; + + error = lodepng_decode32_file(&image, &width, &height, filename); + + if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error)); + + / * use image here * / + + free(image); + return 0; +} + +11. state settings reference +---------------------------- + +A quick reference of some settings to set on the LodePNGState + +For decoding: + +state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums +state.decoder.zlibsettings.custom_...: use custom inflate function +state.decoder.ignore_crc: ignore CRC checksums +state.decoder.ignore_critical: ignore unknown critical chunks +state.decoder.ignore_end: ignore missing IEND chunk. May fail if this corruption causes other errors +state.decoder.color_convert: convert internal PNG color to chosen one +state.decoder.read_text_chunks: whether to read in text metadata chunks +state.decoder.remember_unknown_chunks: whether to read in unknown chunks +state.info_raw.colortype: desired color type for decoded image +state.info_raw.bitdepth: desired bit depth for decoded image +state.info_raw....: more color settings, see struct LodePNGColorMode +state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo + +For encoding: + +state.encoder.zlibsettings.btype: disable compression by setting it to 0 +state.encoder.zlibsettings.use_lz77: use LZ77 in compression +state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize +state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match +state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching +state.encoder.zlibsettings.lazymatching: try one more LZ77 matching +state.encoder.zlibsettings.custom_...: use custom deflate function +state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png +state.encoder.filter_palette_zero: PNG filter strategy for palette +state.encoder.filter_strategy: PNG filter strategy to encode with +state.encoder.force_palette: add palette even if not encoding to one +state.encoder.add_id: add LodePNG identifier and version as a text chunk +state.encoder.text_compression: use compressed text chunks for metadata +state.info_raw.colortype: color type of raw input image you provide +state.info_raw.bitdepth: bit depth of raw input image you provide +state.info_raw: more color settings, see struct LodePNGColorMode +state.info_png.color.colortype: desired color type if auto_convert is false +state.info_png.color.bitdepth: desired bit depth if auto_convert is false +state.info_png.color....: more color settings, see struct LodePNGColorMode +state.info_png....: more PNG related settings, see struct LodePNGInfo + + +12. changes +----------- + +The version number of LodePNG is the date of the change given in the format +yyyymmdd. + +Some changes aren't backwards compatible. Those are indicated with a (!) +symbol. + +*) 30 dec 2018: code style changes only: removed newlines before opening braces. +*) 10 sep 2018: added way to inspect metadata chunks without full decoding. +*) 19 aug 2018 (!): fixed color mode bKGD is encoded with and made it use + palette index in case of palette. +*) 10 aug 2018 (!): added support for gAMA, cHRM, sRGB and iCCP chunks. This + change is backwards compatible unless you relied on unknown_chunks for those. +*) 11 jun 2018: less restrictive check for pixel size integer overflow +*) 14 jan 2018: allow optionally ignoring a few more recoverable errors +*) 17 sep 2017: fix memory leak for some encoder input error cases +*) 27 nov 2016: grey+alpha auto color model detection bugfix +*) 18 apr 2016: Changed qsort to custom stable sort (for platforms w/o qsort). +*) 09 apr 2016: Fixed colorkey usage detection, and better file loading (within + the limits of pure C90). +*) 08 dec 2015: Made load_file function return error if file can't be opened. +*) 24 okt 2015: Bugfix with decoding to palette output. +*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding. +*) 23 aug 2014: Reduced needless memory usage of decoder. +*) 28 jun 2014: Removed fix_png setting, always support palette OOB for + simplicity. Made ColorProfile public. +*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization. +*) 22 dec 2013: Power of two windowsize required for optimization. +*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key. +*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png). +*) 11 mar 2013 (!): Bugfix with custom free. Changed from "my" to "lodepng_" + prefix for the custom allocators and made it possible with a new #define to + use custom ones in your project without needing to change lodepng's code. +*) 28 jan 2013: Bugfix with color key. +*) 27 okt 2012: Tweaks in text chunk keyword length error handling. +*) 8 okt 2012 (!): Added new filter strategy (entropy) and new auto color mode. + (no palette). Better deflate tree encoding. New compression tweak settings. + Faster color conversions while decoding. Some internal cleanups. +*) 23 sep 2012: Reduced warnings in Visual Studio a little bit. +*) 1 sep 2012 (!): Removed #define's for giving custom (de)compression functions + and made it work with function pointers instead. +*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc + and free functions and toggle #defines from compiler flags. Small fixes. +*) 6 may 2012 (!): Made plugging in custom zlib/deflate functions more flexible. +*) 22 apr 2012 (!): Made interface more consistent, renaming a lot. Removed + redundant C++ codec classes. Reduced amount of structs. Everything changed, + but it is cleaner now imho and functionality remains the same. Also fixed + several bugs and shrunk the implementation code. Made new samples. +*) 6 nov 2011 (!): By default, the encoder now automatically chooses the best + PNG color model and bit depth, based on the amount and type of colors of the + raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color. +*) 9 okt 2011: simpler hash chain implementation for the encoder. +*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching. +*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking. + A bug with the PNG filtertype heuristic was fixed, so that it chooses much + better ones (it's quite significant). A setting to do an experimental, slow, + brute force search for PNG filter types is added. +*) 17 aug 2011 (!): changed some C zlib related function names. +*) 16 aug 2011: made the code less wide (max 120 characters per line). +*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors. +*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled. +*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman + to optimize long sequences of zeros. +*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and + LodePNG_InfoColor_canHaveAlpha functions for convenience. +*) 7 nov 2010: added LodePNG_error_text function to get error code description. +*) 30 okt 2010: made decoding slightly faster +*) 26 okt 2010: (!) changed some C function and struct names (more consistent). + Reorganized the documentation and the declaration order in the header. +*) 08 aug 2010: only changed some comments and external samples. +*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version. +*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers. +*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could + read by ignoring the problem but windows apps couldn't. +*) 06 jun 2008: added more error checks for out of memory cases. +*) 26 apr 2008: added a few more checks here and there to ensure more safety. +*) 06 mar 2008: crash with encoding of strings fixed +*) 02 feb 2008: support for international text chunks added (iTXt) +*) 23 jan 2008: small cleanups, and #defines to divide code in sections +*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor. +*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder. +*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added + Also various fixes, such as in the deflate and the padding bits code. +*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved + filtering code of encoder. +*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A + C++ wrapper around this provides an interface almost identical to before. + Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code + are together in these files but it works both for C and C++ compilers. +*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks +*) 30 aug 2007: bug fixed which makes this Borland C++ compatible +*) 09 aug 2007: some VS2005 warnings removed again +*) 21 jul 2007: deflate code placed in new namespace separate from zlib code +*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images +*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing + invalid std::vector element [0] fixed, and level 3 and 4 warnings removed +*) 02 jun 2007: made the encoder add a tag with version by default +*) 27 may 2007: zlib and png code separated (but still in the same file), + simple encoder/decoder functions added for more simple usage cases +*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69), + moved some examples from here to lodepng_examples.cpp +*) 12 may 2007: palette decoding bug fixed +*) 24 apr 2007: changed the license from BSD to the zlib license +*) 11 mar 2007: very simple addition: ability to encode bKGD chunks. +*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding + palettized PNG images. Plus little interface change with palette and texts. +*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes. + Fixed a bug where the end code of a block had length 0 in the Huffman tree. +*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented + and supported by the encoder, resulting in smaller PNGs at the output. +*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone. +*) 24 jan 2007: gave encoder an error interface. Added color conversion from any + greyscale type to 8-bit greyscale with or without alpha. +*) 21 jan 2007: (!) Totally changed the interface. It allows more color types + to convert to and is more uniform. See the manual for how it works now. +*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days: + encode/decode custom tEXt chunks, separate classes for zlib & deflate, and + at last made the decoder give errors for incorrect Adler32 or Crc. +*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel. +*) 29 dec 2006: Added support for encoding images without alpha channel, and + cleaned out code as well as making certain parts faster. +*) 28 dec 2006: Added "Settings" to the encoder. +*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now. + Removed some code duplication in the decoder. Fixed little bug in an example. +*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter. + Fixed a bug of the decoder with 16-bit per color. +*) 15 okt 2006: Changed documentation structure +*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the + given image buffer, however for now it's not compressed. +*) 08 sep 2006: (!) Changed to interface with a Decoder class +*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different + way. Renamed decodePNG to decodePNGGeneric. +*) 29 jul 2006: (!) Changed the interface: image info is now returned as a + struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy. +*) 28 jul 2006: Cleaned the code and added new error checks. + Corrected terminology "deflate" into "inflate". +*) 23 jun 2006: Added SDL example in the documentation in the header, this + example allows easy debugging by displaying the PNG and its transparency. +*) 22 jun 2006: (!) Changed way to obtain error value. Added + loadFile function for convenience. Made decodePNG32 faster. +*) 21 jun 2006: (!) Changed type of info vector to unsigned. + Changed position of palette in info vector. Fixed an important bug that + happened on PNGs with an uncompressed block. +*) 16 jun 2006: Internally changed unsigned into unsigned where + needed, and performed some optimizations. +*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them + in LodePNG namespace. Changed the order of the parameters. Rewrote the + documentation in the header. Renamed files to lodepng.cpp and lodepng.h +*) 22 apr 2006: Optimized and improved some code +*) 07 sep 2005: (!) Changed to std::vector interface +*) 12 aug 2005: Initial release (C++, decoder only) + + +13. contact information +----------------------- + +Feel free to contact me with suggestions, problems, comments, ... concerning +LodePNG. If you encounter a PNG image that doesn't work properly with this +decoder, feel free to send it and I'll use it to find and fix the problem. + +My email address is (puzzle the account and domain together with an @ symbol): +Domain: gmail dot com. +Account: lode dot vandevenne. + + +Copyright (c) 2005-2019 Lode Vandevenne +*/