1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 // File: vogl_ryg_dxt.cpp
28 // RYG's real-time DXT compressor - Public domain.
29 #include "vogl_core.h"
30 #include "vogl_ryg_types.hpp"
31 #include "vogl_ryg_dxt.hpp"
34 #pragma warning(disable : 4244) // conversion from 'a' to 'b', possible loss of data
39 // Couple of tables...
44 sU8 OMatch5_3[256][2];
45 sU8 OMatch6_3[256][2];
46 sU8 QuantRBTab[256 + 16];
47 sU8 QuantGTab[256 + 16];
49 static sInt Mul8Bit(sInt a, sInt b)
52 return (t + (t >> 8)) >> 8;
63 void From16Bit(sU16 v)
65 sInt rv = (v & 0xf800) >> 11;
66 sInt gv = (v & 0x07e0) >> 5;
67 sInt bv = (v & 0x001f) >> 0;
77 return (Mul8Bit(r, 31) << 11) + (Mul8Bit(g, 63) << 5) + Mul8Bit(b, 31);
80 void LerpRGB(const Pixel &p1, const Pixel &p2, sInt f)
82 r = p1.r + Mul8Bit(p2.r - p1.r, f);
83 g = p1.g + Mul8Bit(p2.g - p1.g, f);
84 b = p1.b + Mul8Bit(p2.b - p1.b, f);
88 /****************************************************************************/
90 static void PrepareOptTable4(sU8 *Table, const sU8 *expand, sInt size)
92 for (sInt i = 0; i < 256; i++)
96 for (sInt min = 0; min < size; min++)
98 for (sInt max = 0; max < size; max++)
100 sInt mine = expand[min];
101 sInt maxe = expand[max];
102 //sInt err = sAbs(maxe + Mul8Bit(mine-maxe,0x55) - i);
103 sInt err = sAbs(((maxe * 2 + mine) / 3) - i);
104 err += ((sAbs(maxe - mine) * 8) >> 8); // approx. .03f
108 Table[i * 2 + 0] = max;
109 Table[i * 2 + 1] = min;
117 static void PrepareOptTable3(sU8 *Table, const sU8 *expand, sInt size)
119 for (sInt i = 0; i < 256; i++)
123 for (sInt min = 0; min < size; min++)
125 for (sInt max = 0; max < size; max++)
127 sInt mine = expand[min];
128 sInt maxe = expand[max];
129 sInt err = sAbs(((mine + maxe) >> 1) - i);
130 err += ((sAbs(maxe - mine) * 8) >> 8); // approx. .03f
134 Table[i * 2 + 0] = max;
135 Table[i * 2 + 1] = min;
143 static inline void EvalColors(Pixel *color, sU16 c0, sU16 c1)
145 color[0].From16Bit(c0);
146 color[1].From16Bit(c1);
147 color[2].LerpRGB(color[0], color[1], 0x55);
148 color[3].LerpRGB(color[0], color[1], 0xaa);
151 // Block dithering function. Simply dithers a block to 565 RGB.
153 static void DitherBlock(Pixel *dest, const Pixel *block)
155 sInt err[8], *ep1 = err, *ep2 = err + 4;
157 // process channels seperately
158 for (sInt ch = 0; ch < 3; ch++)
160 sU8 *bp = (sU8 *)block;
161 sU8 *dp = (sU8 *)dest;
162 sU8 *quant = (ch == 1) ? QuantGTab + 8 : QuantRBTab + 8;
166 sSetMem(err, 0, sizeof(err));
168 for (sInt y = 0; y < 4; y++)
171 dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)];
172 ep1[0] = bp[0] - dp[0];
175 dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)];
176 ep1[1] = bp[4] - dp[4];
179 dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)];
180 ep1[2] = bp[8] - dp[8];
183 dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)];
184 ep1[3] = bp[12] - dp[12];
186 // advance to next line
194 // The color matching function
195 static sU32 MatchColorsBlock(const Pixel *block, const Pixel *color, sBool dither)
198 sInt dirr = color[0].r - color[1].r;
199 sInt dirg = color[0].g - color[1].g;
200 sInt dirb = color[0].b - color[1].b;
203 for (sInt i = 0; i < 16; i++)
204 dots[i] = block[i].r * dirr + block[i].g * dirg + block[i].b * dirb;
207 for (sInt i = 0; i < 4; i++)
208 stops[i] = color[i].r * dirr + color[i].g * dirg + color[i].b * dirb;
210 sInt c0Point = (stops[1] + stops[3]) >> 1;
211 sInt halfPoint = (stops[3] + stops[2]) >> 1;
212 sInt c3Point = (stops[2] + stops[0]) >> 1;
216 // the version without dithering is straightforward
217 for (sInt i = 15; i >= 0; i--)
223 mask |= (dot < c0Point) ? 1 : 3;
225 mask |= (dot < c3Point) ? 2 : 0;
230 // with floyd-steinberg dithering (see above)
231 sInt err[8], *ep1 = err, *ep2 = err + 4;
237 for (sInt i = 0; i < 8; i++)
240 for (sInt y = 0; y < 4; y++)
242 sInt dot, lmask, step;
245 dot = (dp[0] << 4) + (3 * ep2[1] + 5 * ep2[0]);
247 step = (dot < c0Point) ? 1 : 3;
249 step = (dot < c3Point) ? 2 : 0;
251 ep1[0] = dp[0] - stops[step];
255 dot = (dp[1] << 4) + (7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]);
257 step = (dot < c0Point) ? 1 : 3;
259 step = (dot < c3Point) ? 2 : 0;
261 ep1[1] = dp[1] - stops[step];
265 dot = (dp[2] << 4) + (7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]);
267 step = (dot < c0Point) ? 1 : 3;
269 step = (dot < c3Point) ? 2 : 0;
271 ep1[2] = dp[2] - stops[step];
275 dot = (dp[3] << 4) + (7 * ep1[2] + 5 * ep2[3] + ep2[2]);
277 step = (dot < c0Point) ? 1 : 3;
279 step = (dot < c3Point) ? 2 : 0;
281 ep1[3] = dp[3] - stops[step];
284 // advance to next line
287 mask |= lmask << (y * 8);
294 // The color optimization function. (Clever code, part 1)
295 static void OptimizeColorsBlock(const Pixel *block, sU16 &max16, sU16 &min16)
297 static const sInt nIterPower = 4;
299 // determine color distribution
300 sInt mu[3], min[3], max[3];
302 for (sInt ch = 0; ch < 3; ch++)
304 const sU8 *bp = ((const sU8 *)block) + ch;
305 sInt muv, minv, maxv;
307 muv = minv = maxv = bp[0];
308 for (sInt i = 4; i < 64; i += 4)
311 minv = sMin<sInt>(minv, bp[i]);
312 maxv = sMax<sInt>(maxv, bp[i]);
315 mu[ch] = (muv + 8) >> 4;
320 // determine covariance matrix
322 for (sInt i = 0; i < 6; i++)
325 for (sInt i = 0; i < 16; i++)
327 sInt r = block[i].r - mu[2];
328 sInt g = block[i].g - mu[1];
329 sInt b = block[i].b - mu[0];
339 // convert covariance matrix to float, find principal axis via power iter
340 sF32 covf[6], vfr, vfg, vfb;
341 for (sInt i = 0; i < 6; i++)
342 covf[i] = cov[i] / 255.0f;
344 vfr = max[2] - min[2];
345 vfg = max[1] - min[1];
346 vfb = max[0] - min[0];
348 for (sInt iter = 0; iter < nIterPower; iter++)
350 sF32 r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
351 sF32 g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
352 sF32 b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
359 sF32 magn = sMax(sMax(sFAbs(vfr), sFAbs(vfg)), sFAbs(vfb));
362 if (magn < 4.0f) // too small, default to luminance
370 magn = 512.0f / magn;
376 // Pick colors at extreme points
377 sInt mind = 0x7fffffff, maxd = -0x7fffffff;
382 for (sInt i = 0; i < 16; i++)
384 sInt dot = block[i].r * v_r + block[i].g * v_g + block[i].b * v_b;
399 // Reduce to 16 bit colors
400 max16 = maxp.As16Bit();
401 min16 = minp.As16Bit();
404 // The refinement function. (Clever code, part 2)
405 // Tries to optimize colors to suit block contents better.
406 // (By solving a least squares system via normal equations+Cramer's rule)
407 static sBool RefineBlock(const Pixel *block, sU16 &max16, sU16 &min16, sU32 mask)
409 static const sInt w1Tab[4] = { 3, 0, 2, 1 };
410 static const sInt prods[4] = { 0x090000, 0x000900, 0x040102, 0x010402 };
411 // ^some magic to save a lot of multiplies in the accumulating loop...
414 sInt At1_r, At1_g, At1_b;
415 sInt At2_r, At2_g, At2_b;
418 At1_r = At1_g = At1_b = 0;
419 At2_r = At2_g = At2_b = 0;
420 for (sInt i = 0; i < 16; i++, cm >>= 2)
423 sInt w1 = w1Tab[step];
437 At2_r = 3 * At2_r - At1_r;
438 At2_g = 3 * At2_g - At1_g;
439 At2_b = 3 * At2_b - At1_b;
441 // extract solutions and decide solvability
442 sInt xx = akku >> 16;
443 sInt yy = (akku >> 8) & 0xff;
444 sInt xy = (akku >> 0) & 0xff;
446 if (!yy || !xx || xx * yy == xy * xy)
449 sF32 frb = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
450 sF32 fg = frb * 63.0f / 31.0f;
456 max16 = sClamp<sInt>((At1_r * yy - At2_r * xy) * frb + 0.5f, 0, 31) << 11;
457 max16 |= sClamp<sInt>((At1_g * yy - At2_g * xy) * fg + 0.5f, 0, 63) << 5;
458 max16 |= sClamp<sInt>((At1_b * yy - At2_b * xy) * frb + 0.5f, 0, 31) << 0;
460 min16 = sClamp<sInt>((At2_r * xx - At1_r * xy) * frb + 0.5f, 0, 31) << 11;
461 min16 |= sClamp<sInt>((At2_g * xx - At1_g * xy) * fg + 0.5f, 0, 63) << 5;
462 min16 |= sClamp<sInt>((At2_b * xx - At1_b * xy) * frb + 0.5f, 0, 31) << 0;
464 return oldMin != min16 || oldMax != max16;
467 // Color block compression
468 static void CompressColorBlock(sU8 *dest, const sU32 *src, sInt quality)
470 const Pixel *block = (const Pixel *)src;
471 Pixel dblock[16], color[4];
473 // check if block is constant
475 min = max = block[0].v;
477 for (sInt i = 1; i < 16; i++)
479 min = sMin(min, block[i].v);
480 max = sMax(max, block[i].v);
483 // perform block compression
487 if (min != max) // no constant color
489 // first step: compute dithered version for PCA if desired
491 DitherBlock(dblock, block);
493 // second step: pca+map along principal axis
494 OptimizeColorsBlock(quality ? dblock : block, max16, min16);
497 EvalColors(color, max16, min16);
498 mask = MatchColorsBlock(block, color, quality != 0);
503 // third step: refine
504 if (RefineBlock(quality ? dblock : block, max16, min16, mask))
508 EvalColors(color, max16, min16);
509 mask = MatchColorsBlock(block, color, quality != 0);
515 else // constant color
522 max16 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0];
523 min16 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1];
526 // write the color block
533 ((sU16 *)dest)[0] = max16;
534 ((sU16 *)dest)[1] = min16;
535 ((sU32 *)dest)[1] = mask;
538 // Alpha block compression (this is easy for a change)
539 static void CompressAlphaBlock(sU8 *dest, const sU32 *src, sInt quality)
541 VOGL_NOTE_UNUSED(quality);
542 const Pixel *block = (const Pixel *)src;
544 // find min/max color
546 min = max = block[0].a;
548 for (sInt i = 1; i < 16; i++)
550 min = sMin<sInt>(min, block[i].a);
551 max = sMax<sInt>(max, block[i].a);
558 // determine bias and emit color indices
559 sInt dist = max - min;
560 sInt bias = min * 7 - (dist >> 1);
561 sInt dist4 = dist * 4;
562 sInt dist2 = dist * 2;
563 sInt bits = 0, mask = 0;
565 for (sInt i = 0; i < 16; i++)
567 sInt a = block[i].a * 7 - bias;
570 // select index (hooray for bit magic)
571 t = (dist4 - a) >> 31;
574 t = (dist2 - a) >> 31;
577 t = (dist - a) >> 31;
585 if ((bits += 3) >= 8)
594 /****************************************************************************/
598 for (sInt i = 0; i < 32; i++)
599 Expand5[i] = (i << 3) | (i >> 2);
601 for (sInt i = 0; i < 64; i++)
602 Expand6[i] = (i << 2) | (i >> 4);
604 for (sInt i = 0; i < 256 + 16; i++)
606 sInt v = sClamp(i - 8, 0, 255);
607 QuantRBTab[i] = Expand5[Mul8Bit(v, 31)];
608 QuantGTab[i] = Expand6[Mul8Bit(v, 63)];
611 PrepareOptTable4(&OMatch5[0][0], Expand5, 32);
612 PrepareOptTable4(&OMatch6[0][0], Expand6, 64);
614 PrepareOptTable3(&OMatch5_3[0][0], Expand5, 32);
615 PrepareOptTable3(&OMatch6_3[0][0], Expand6, 64);
618 void sCompressDXTBlock(sU8 *dest, const sU32 *src, sBool alpha, sInt quality)
620 VOGL_ASSERT(Expand5[1]);
622 // if alpha specified, compress alpha as well
625 CompressAlphaBlock(dest, src, quality);
629 // compress the color part
630 CompressColorBlock(dest, src, quality);
633 void sCompressDXT5ABlock(sU8 *dest, const sU32 *src, sInt quality)
635 VOGL_ASSERT(Expand5[1]);
637 CompressAlphaBlock(dest, src, quality);
640 } // namespace ryg_dxt