mirror of
https://github.com/BinomialLLC/basis_universal.git
synced 2026-06-08 08:33:53 +00:00
129 lines
4.6 KiB
JavaScript
129 lines
4.6 KiB
JavaScript
/**
|
|
* Transcodes DXT into RGB565.
|
|
* This is an optimized version of dxtToRgb565Unoptimized() below.
|
|
* Optimizations:
|
|
* 1. Use integer math to compute c2 and c3 instead of floating point
|
|
* math. Specifically:
|
|
* c2 = 5/8 * c0 + 3/8 * c1
|
|
* c3 = 3/8 * c0 + 5/8 * c1
|
|
* This is about a 40% performance improvement. It also appears to
|
|
* match what hardware DXT decoders do, as the colors produced
|
|
* by this integer math match what hardware produces, while the
|
|
* floating point in dxtToRgb565Unoptimized() produce slightly
|
|
* different colors (for one GPU this was tested on).
|
|
* 2. Unroll the inner loop. Another ~10% improvement.
|
|
* 3. Compute r0, g0, b0, r1, g1, b1 only once instead of twice.
|
|
* Another 10% improvement.
|
|
* 4. Use a Uint16Array instead of a Uint8Array. Another 10% improvement.
|
|
* @param {Uint16Array} src The src DXT bits as a Uint16Array.
|
|
* @param {number} srcByteOffset
|
|
* @param {number} width
|
|
* @param {number} height
|
|
* @return {Uint16Array} dst
|
|
*/
|
|
function dxtToRgb565(src, src16Offset, width, height) {
|
|
var c = new Uint16Array(4);
|
|
var dst = new Uint16Array(width * height);
|
|
var nWords = (width * height) / 4;
|
|
var m = 0;
|
|
var dstI = 0;
|
|
var i = 0;
|
|
var r0 = 0, g0 = 0, b0 = 0, r1 = 0, g1 = 0, b1 = 0;
|
|
|
|
var blockWidth = width / 4;
|
|
var blockHeight = height / 4;
|
|
for (var blockY = 0; blockY < blockHeight; blockY++) {
|
|
for (var blockX = 0; blockX < blockWidth; blockX++) {
|
|
i = src16Offset + 4 * (blockY * blockWidth + blockX);
|
|
c[0] = src[i];
|
|
c[1] = src[i + 1];
|
|
|
|
r0 = c[0] & 0x1f;
|
|
g0 = c[0] & 0x7e0;
|
|
b0 = c[0] & 0xf800;
|
|
r1 = c[1] & 0x1f;
|
|
g1 = c[1] & 0x7e0;
|
|
b1 = c[1] & 0xf800;
|
|
// Interpolate between c0 and c1 to get c2 and c3.
|
|
// Note that we approximate 1/3 as 3/8 and 2/3 as 5/8 for
|
|
// speed. This also appears to be what the hardware DXT
|
|
// decoder in many GPUs does :)
|
|
|
|
// rg FIXME: This is most likely leading to wrong results vs. a GPU
|
|
|
|
c[2] = ((5 * r0 + 3 * r1) >> 3)
|
|
| (((5 * g0 + 3 * g1) >> 3) & 0x7e0)
|
|
| (((5 * b0 + 3 * b1) >> 3) & 0xf800);
|
|
c[3] = ((5 * r1 + 3 * r0) >> 3)
|
|
| (((5 * g1 + 3 * g0) >> 3) & 0x7e0)
|
|
| (((5 * b1 + 3 * b0) >> 3) & 0xf800);
|
|
m = src[i + 2];
|
|
dstI = (blockY * 4) * width + blockX * 4;
|
|
dst[dstI] = c[m & 0x3];
|
|
dst[dstI + 1] = c[(m >> 2) & 0x3];
|
|
dst[dstI + 2] = c[(m >> 4) & 0x3];
|
|
dst[dstI + 3] = c[(m >> 6) & 0x3];
|
|
dstI += width;
|
|
dst[dstI] = c[(m >> 8) & 0x3];
|
|
dst[dstI + 1] = c[(m >> 10) & 0x3];
|
|
dst[dstI + 2] = c[(m >> 12) & 0x3];
|
|
dst[dstI + 3] = c[(m >> 14)];
|
|
m = src[i + 3];
|
|
dstI += width;
|
|
dst[dstI] = c[m & 0x3];
|
|
dst[dstI + 1] = c[(m >> 2) & 0x3];
|
|
dst[dstI + 2] = c[(m >> 4) & 0x3];
|
|
dst[dstI + 3] = c[(m >> 6) & 0x3];
|
|
dstI += width;
|
|
dst[dstI] = c[(m >> 8) & 0x3];
|
|
dst[dstI + 1] = c[(m >> 10) & 0x3];
|
|
dst[dstI + 2] = c[(m >> 12) & 0x3];
|
|
dst[dstI + 3] = c[(m >> 14)];
|
|
}
|
|
}
|
|
return dst;
|
|
}
|
|
|
|
|
|
/**
|
|
* An unoptimized version of dxtToRgb565. Also, the floating
|
|
* point math used to compute the colors actually results in
|
|
* slightly different colors compared to hardware DXT decoders.
|
|
* @param {Uint8Array} src
|
|
* @param {number} srcByteOffset
|
|
* @param {number} width
|
|
* @param {number} height
|
|
* @return {Uint16Array} dst
|
|
*/
|
|
function dxtToRgb565Unoptimized(src, srcByteOffset, width, height) {
|
|
var c = new Uint16Array(4);
|
|
var dst = new Uint16Array(width * height);
|
|
var nWords = (width * height) / 4;
|
|
|
|
var blockWidth = width / 4;
|
|
var blockHeight = height / 4;
|
|
for (var blockY = 0; blockY < blockHeight; blockY++) {
|
|
for (var blockX = 0; blockX < blockWidth; blockX++) {
|
|
var i = srcByteOffset + 8 * (blockY * blockWidth + blockX);
|
|
c[0] = src[i] | (src[i + 1] << 8);
|
|
c[1] = src[i + 2] | (src[i + 3] << 8);
|
|
c[2] = (2 * (c[0] & 0x1f) + 1 * (c[1] & 0x1f)) / 3
|
|
| (((2 * (c[0] & 0x7e0) + 1 * (c[1] & 0x7e0)) / 3) & 0x7e0)
|
|
| (((2 * (c[0] & 0xf800) + 1 * (c[1] & 0xf800)) / 3) & 0xf800);
|
|
c[3] = (2 * (c[1] & 0x1f) + 1 * (c[0] & 0x1f)) / 3
|
|
| (((2 * (c[1] & 0x7e0) + 1 * (c[0] & 0x7e0)) / 3) & 0x7e0)
|
|
| (((2 * (c[1] & 0xf800) + 1 * (c[0] & 0xf800)) / 3) & 0xf800);
|
|
for (var row = 0; row < 4; row++) {
|
|
var m = src[i + 4 + row];
|
|
var dstI = (blockY * 4 + row) * width + blockX * 4;
|
|
dst[dstI++] = c[m & 0x3];
|
|
dst[dstI++] = c[(m >> 2) & 0x3];
|
|
dst[dstI++] = c[(m >> 4) & 0x3];
|
|
dst[dstI++] = c[(m >> 6) & 0x3];
|
|
}
|
|
}
|
|
}
|
|
return dst;
|
|
}
|
|
|