git.cworth.org Git - vogl/blob - src/voglcore/vogl_ryg_dxt.cpp

   1 /**************************************************************************
   2  *
   3  * Copyright 2013-2014 RAD Game Tools and Valve Software
   4  * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a copy
   8  * of this software and associated documentation files (the "Software"), to deal
   9  * in the Software without restriction, including without limitation the rights
  10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11  * copies of the Software, and to permit persons to whom the Software is
  12  * furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included in
  15  * all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23  * THE SOFTWARE.
  24  *
  25  **************************************************************************/
  26
  27 // File: vogl_ryg_dxt.cpp
  28 // RYG's real-time DXT compressor - Public domain.
  29 #include "vogl_core.h"
  30 #include "vogl_ryg_types.hpp"
  31 #include "vogl_ryg_dxt.hpp"
  32
  33 #ifdef _MSC_VER
  34 #pragma warning(disable : 4244) // conversion from 'a' to 'b', possible loss of data
  35 #endif
  36
  37 namespace ryg_dxt
  38 {
  39     // Couple of tables...
  40     sU8 Expand5[32];
  41     sU8 Expand6[64];
  42     sU8 OMatch5[256][2];
  43     sU8 OMatch6[256][2];
  44     sU8 OMatch5_3[256][2];
  45     sU8 OMatch6_3[256][2];
  46     sU8 QuantRBTab[256 + 16];
  47     sU8 QuantGTab[256 + 16];
  48
  49     static sInt Mul8Bit(sInt a, sInt b)
  50     {
  51         sInt t = a * b + 128;
  52         return (t + (t >> 8)) >> 8;
  53     }
  54
  55     union Pixel
  56     {
  57         struct
  58         {
  59             sU8 b, g, r, a;
  60         };
  61         sU32 v;
  62
  63         void From16Bit(sU16 v)
  64         {
  65             sInt rv = (v & 0xf800) >> 11;
  66             sInt gv = (v & 0x07e0) >> 5;
  67             sInt bv = (v & 0x001f) >> 0;
  68
  69             a = 0;
  70             r = Expand5[rv];
  71             g = Expand6[gv];
  72             b = Expand5[bv];
  73         }
  74
  75         sU16 As16Bit() const
  76         {
  77             return (Mul8Bit(r, 31) << 11) + (Mul8Bit(g, 63) << 5) + Mul8Bit(b, 31);
  78         }
  79
  80         void LerpRGB(const Pixel &p1, const Pixel &p2, sInt f)
  81         {
  82             r = p1.r + Mul8Bit(p2.r - p1.r, f);
  83             g = p1.g + Mul8Bit(p2.g - p1.g, f);
  84             b = p1.b + Mul8Bit(p2.b - p1.b, f);
  85         }
  86     };
  87
  88     /****************************************************************************/
  89
  90     static void PrepareOptTable4(sU8 *Table, const sU8 *expand, sInt size)
  91     {
  92         for (sInt i = 0; i < 256; i++)
  93         {
  94             sInt bestErr = 256;
  95
  96             for (sInt min = 0; min < size; min++)
  97             {
  98                 for (sInt max = 0; max < size; max++)
  99                 {
 100                     sInt mine = expand[min];
 101                     sInt maxe = expand[max];
 102                     //sInt err = sAbs(maxe + Mul8Bit(mine-maxe,0x55) - i);
 103                     sInt err = sAbs(((maxe * 2 + mine) / 3) - i);
 104                     err += ((sAbs(maxe - mine) * 8) >> 8); // approx. .03f
 105
 106                     if (err < bestErr)
 107                     {
 108                         Table[i * 2 + 0] = max;
 109                         Table[i * 2 + 1] = min;
 110                         bestErr = err;
 111                     }
 112                 }
 113             }
 114         }
 115     }
 116
 117     static void PrepareOptTable3(sU8 *Table, const sU8 *expand, sInt size)
 118     {
 119         for (sInt i = 0; i < 256; i++)
 120         {
 121             sInt bestErr = 256;
 122
 123             for (sInt min = 0; min < size; min++)
 124             {
 125                 for (sInt max = 0; max < size; max++)
 126                 {
 127                     sInt mine = expand[min];
 128                     sInt maxe = expand[max];
 129                     sInt err = sAbs(((mine + maxe) >> 1) - i);
 130                     err += ((sAbs(maxe - mine) * 8) >> 8); // approx. .03f
 131
 132                     if (err < bestErr)
 133                     {
 134                         Table[i * 2 + 0] = max;
 135                         Table[i * 2 + 1] = min;
 136                         bestErr = err;
 137                     }
 138                 }
 139             }
 140         }
 141     }
 142
 143     static inline void EvalColors(Pixel *color, sU16 c0, sU16 c1)
 144     {
 145         color[0].From16Bit(c0);
 146         color[1].From16Bit(c1);
 147         color[2].LerpRGB(color[0], color[1], 0x55);
 148         color[3].LerpRGB(color[0], color[1], 0xaa);
 149     }
 150
 151     // Block dithering function. Simply dithers a block to 565 RGB.
 152     // (Floyd-Steinberg)
 153     static void DitherBlock(Pixel *dest, const Pixel *block)
 154     {
 155         sInt err[8], *ep1 = err, *ep2 = err + 4;
 156
 157         // process channels seperately
 158         for (sInt ch = 0; ch < 3; ch++)
 159         {
 160             sU8 *bp = (sU8 *)block;
 161             sU8 *dp = (sU8 *)dest;
 162             sU8 *quant = (ch == 1) ? QuantGTab + 8 : QuantRBTab + 8;
 163
 164             bp += ch;
 165             dp += ch;
 166             sSetMem(err, 0, sizeof(err));
 167
 168             for (sInt y = 0; y < 4; y++)
 169             {
 170                 // pixel 0
 171                 dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)];
 172                 ep1[0] = bp[0] - dp[0];
 173
 174                 // pixel 1
 175                 dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)];
 176                 ep1[1] = bp[4] - dp[4];
 177
 178                 // pixel 2
 179                 dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)];
 180                 ep1[2] = bp[8] - dp[8];
 181
 182                 // pixel 3
 183                 dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)];
 184                 ep1[3] = bp[12] - dp[12];
 185
 186                 // advance to next line
 187                 sSwap(ep1, ep2);
 188                 bp += 16;
 189                 dp += 16;
 190             }
 191         }
 192     }
 193
 194     // The color matching function
 195     static sU32 MatchColorsBlock(const Pixel *block, const Pixel *color, sBool dither)
 196     {
 197         sU32 mask = 0;
 198         sInt dirr = color[0].r - color[1].r;
 199         sInt dirg = color[0].g - color[1].g;
 200         sInt dirb = color[0].b - color[1].b;
 201
 202         sInt dots[16];
 203         for (sInt i = 0; i < 16; i++)
 204             dots[i] = block[i].r * dirr + block[i].g * dirg + block[i].b * dirb;
 205
 206         sInt stops[4];
 207         for (sInt i = 0; i < 4; i++)
 208             stops[i] = color[i].r * dirr + color[i].g * dirg + color[i].b * dirb;
 209
 210         sInt c0Point = (stops[1] + stops[3]) >> 1;
 211         sInt halfPoint = (stops[3] + stops[2]) >> 1;
 212         sInt c3Point = (stops[2] + stops[0]) >> 1;
 213
 214         if (!dither)
 215         {
 216             // the version without dithering is straightforward
 217             for (sInt i = 15; i >= 0; i--)
 218             {
 219                 mask <<= 2;
 220                 sInt dot = dots[i];
 221
 222                 if (dot < halfPoint)
 223                     mask |= (dot < c0Point) ? 1 : 3;
 224                 else
 225                     mask |= (dot < c3Point) ? 2 : 0;
 226             }
 227         }
 228         else
 229         {
 230             // with floyd-steinberg dithering (see above)
 231             sInt err[8], *ep1 = err, *ep2 = err + 4;
 232             sInt *dp = dots;
 233
 234             c0Point <<= 4;
 235             halfPoint <<= 4;
 236             c3Point <<= 4;
 237             for (sInt i = 0; i < 8; i++)
 238                 err[i] = 0;
 239
 240             for (sInt y = 0; y < 4; y++)
 241             {
 242                 sInt dot, lmask, step;
 243
 244                 // pixel 0
 245                 dot = (dp[0] << 4) + (3 * ep2[1] + 5 * ep2[0]);
 246                 if (dot < halfPoint)
 247                     step = (dot < c0Point) ? 1 : 3;
 248                 else
 249                     step = (dot < c3Point) ? 2 : 0;
 250
 251                 ep1[0] = dp[0] - stops[step];
 252                 lmask = step;
 253
 254                 // pixel 1
 255                 dot = (dp[1] << 4) + (7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]);
 256                 if (dot < halfPoint)
 257                     step = (dot < c0Point) ? 1 : 3;
 258                 else
 259                     step = (dot < c3Point) ? 2 : 0;
 260
 261                 ep1[1] = dp[1] - stops[step];
 262                 lmask |= step << 2;
 263
 264                 // pixel 2
 265                 dot = (dp[2] << 4) + (7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]);
 266                 if (dot < halfPoint)
 267                     step = (dot < c0Point) ? 1 : 3;
 268                 else
 269                     step = (dot < c3Point) ? 2 : 0;
 270
 271                 ep1[2] = dp[2] - stops[step];
 272                 lmask |= step << 4;
 273
 274                 // pixel 3
 275                 dot = (dp[3] << 4) + (7 * ep1[2] + 5 * ep2[3] + ep2[2]);
 276                 if (dot < halfPoint)
 277                     step = (dot < c0Point) ? 1 : 3;
 278                 else
 279                     step = (dot < c3Point) ? 2 : 0;
 280
 281                 ep1[3] = dp[3] - stops[step];
 282                 lmask |= step << 6;
 283
 284                 // advance to next line
 285                 sSwap(ep1, ep2);
 286                 dp += 4;
 287                 mask |= lmask << (y * 8);
 288             }
 289         }
 290
 291         return mask;
 292     }
 293
 294     // The color optimization function. (Clever code, part 1)
 295     static void OptimizeColorsBlock(const Pixel *block, sU16 &max16, sU16 &min16)
 296     {
 297         static const sInt nIterPower = 4;
 298
 299         // determine color distribution
 300         sInt mu[3], min[3], max[3];
 301
 302         for (sInt ch = 0; ch < 3; ch++)
 303         {
 304             const sU8 *bp = ((const sU8 *)block) + ch;
 305             sInt muv, minv, maxv;
 306
 307             muv = minv = maxv = bp[0];
 308             for (sInt i = 4; i < 64; i += 4)
 309             {
 310                 muv += bp[i];
 311                 minv = sMin<sInt>(minv, bp[i]);
 312                 maxv = sMax<sInt>(maxv, bp[i]);
 313             }
 314
 315             mu[ch] = (muv + 8) >> 4;
 316             min[ch] = minv;
 317             max[ch] = maxv;
 318         }
 319
 320         // determine covariance matrix
 321         sInt cov[6];
 322         for (sInt i = 0; i < 6; i++)
 323             cov[i] = 0;
 324
 325         for (sInt i = 0; i < 16; i++)
 326         {
 327             sInt r = block[i].r - mu[2];
 328             sInt g = block[i].g - mu[1];
 329             sInt b = block[i].b - mu[0];
 330
 331             cov[0] += r * r;
 332             cov[1] += r * g;
 333             cov[2] += r * b;
 334             cov[3] += g * g;
 335             cov[4] += g * b;
 336             cov[5] += b * b;
 337         }
 338
 339         // convert covariance matrix to float, find principal axis via power iter
 340         sF32 covf[6], vfr, vfg, vfb;
 341         for (sInt i = 0; i < 6; i++)
 342             covf[i] = cov[i] / 255.0f;
 343
 344         vfr = max[2] - min[2];
 345         vfg = max[1] - min[1];
 346         vfb = max[0] - min[0];
 347
 348         for (sInt iter = 0; iter < nIterPower; iter++)
 349         {
 350             sF32 r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
 351             sF32 g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
 352             sF32 b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
 353
 354             vfr = r;
 355             vfg = g;
 356             vfb = b;
 357         }
 358
 359         sF32 magn = sMax(sMax(sFAbs(vfr), sFAbs(vfg)), sFAbs(vfb));
 360         sInt v_r, v_g, v_b;
 361
 362         if (magn < 4.0f) // too small, default to luminance
 363         {
 364             v_r = 148;
 365             v_g = 300;
 366             v_b = 58;
 367         }
 368         else
 369         {
 370             magn = 512.0f / magn;
 371             v_r = vfr * magn;
 372             v_g = vfg * magn;
 373             v_b = vfb * magn;
 374         }
 375
 376         // Pick colors at extreme points
 377         sInt mind = 0x7fffffff, maxd = -0x7fffffff;
 378         Pixel minp, maxp;
 379         minp.v = 0;
 380         maxp.v = 0;
 381
 382         for (sInt i = 0; i < 16; i++)
 383         {
 384             sInt dot = block[i].r * v_r + block[i].g * v_g + block[i].b * v_b;
 385
 386             if (dot < mind)
 387             {
 388                 mind = dot;
 389                 minp = block[i];
 390             }
 391
 392             if (dot > maxd)
 393             {
 394                 maxd = dot;
 395                 maxp = block[i];
 396             }
 397         }
 398
 399         // Reduce to 16 bit colors
 400         max16 = maxp.As16Bit();
 401         min16 = minp.As16Bit();
 402     }
 403
 404     // The refinement function. (Clever code, part 2)
 405     // Tries to optimize colors to suit block contents better.
 406     // (By solving a least squares system via normal equations+Cramer's rule)
 407     static sBool RefineBlock(const Pixel *block, sU16 &max16, sU16 &min16, sU32 mask)
 408     {
 409         static const sInt w1Tab[4] = { 3, 0, 2, 1 };
 410         static const sInt prods[4] = { 0x090000, 0x000900, 0x040102, 0x010402 };
 411         // ^some magic to save a lot of multiplies in the accumulating loop...
 412
 413         sInt akku = 0;
 414         sInt At1_r, At1_g, At1_b;
 415         sInt At2_r, At2_g, At2_b;
 416         sU32 cm = mask;
 417
 418         At1_r = At1_g = At1_b = 0;
 419         At2_r = At2_g = At2_b = 0;
 420         for (sInt i = 0; i < 16; i++, cm >>= 2)
 421         {
 422             sInt step = cm & 3;
 423             sInt w1 = w1Tab[step];
 424             sInt r = block[i].r;
 425             sInt g = block[i].g;
 426             sInt b = block[i].b;
 427
 428             akku += prods[step];
 429             At1_r += w1 * r;
 430             At1_g += w1 * g;
 431             At1_b += w1 * b;
 432             At2_r += r;
 433             At2_g += g;
 434             At2_b += b;
 435         }
 436
 437         At2_r = 3 * At2_r - At1_r;
 438         At2_g = 3 * At2_g - At1_g;
 439         At2_b = 3 * At2_b - At1_b;
 440
 441         // extract solutions and decide solvability
 442         sInt xx = akku >> 16;
 443         sInt yy = (akku >> 8) & 0xff;
 444         sInt xy = (akku >> 0) & 0xff;
 445
 446         if (!yy || !xx || xx * yy == xy * xy)
 447             return sFALSE;
 448
 449         sF32 frb = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
 450         sF32 fg = frb * 63.0f / 31.0f;
 451
 452         sU16 oldMin = min16;
 453         sU16 oldMax = max16;
 454
 455         // solve.
 456         max16 = sClamp<sInt>((At1_r * yy - At2_r * xy) * frb + 0.5f, 0, 31) << 11;
 457         max16 |= sClamp<sInt>((At1_g * yy - At2_g * xy) * fg + 0.5f, 0, 63) << 5;
 458         max16 |= sClamp<sInt>((At1_b * yy - At2_b * xy) * frb + 0.5f, 0, 31) << 0;
 459
 460         min16 = sClamp<sInt>((At2_r * xx - At1_r * xy) * frb + 0.5f, 0, 31) << 11;
 461         min16 |= sClamp<sInt>((At2_g * xx - At1_g * xy) * fg + 0.5f, 0, 63) << 5;
 462         min16 |= sClamp<sInt>((At2_b * xx - At1_b * xy) * frb + 0.5f, 0, 31) << 0;
 463
 464         return oldMin != min16 || oldMax != max16;
 465     }
 466
 467     // Color block compression
 468     static void CompressColorBlock(sU8 *dest, const sU32 *src, sInt quality)
 469     {
 470         const Pixel *block = (const Pixel *)src;
 471         Pixel dblock[16], color[4];
 472
 473         // check if block is constant
 474         sU32 min, max;
 475         min = max = block[0].v;
 476
 477         for (sInt i = 1; i < 16; i++)
 478         {
 479             min = sMin(min, block[i].v);
 480             max = sMax(max, block[i].v);
 481         }
 482
 483         // perform block compression
 484         sU16 min16, max16;
 485         sU32 mask;
 486
 487         if (min != max) // no constant color
 488         {
 489             // first step: compute dithered version for PCA if desired
 490             if (quality)
 491                 DitherBlock(dblock, block);
 492
 493             // second step: pca+map along principal axis
 494             OptimizeColorsBlock(quality ? dblock : block, max16, min16);
 495             if (max16 != min16)
 496             {
 497                 EvalColors(color, max16, min16);
 498                 mask = MatchColorsBlock(block, color, quality != 0);
 499             }
 500             else
 501                 mask = 0;
 502
 503             // third step: refine
 504             if (RefineBlock(quality ? dblock : block, max16, min16, mask))
 505             {
 506                 if (max16 != min16)
 507                 {
 508                     EvalColors(color, max16, min16);
 509                     mask = MatchColorsBlock(block, color, quality != 0);
 510                 }
 511                 else
 512                     mask = 0;
 513             }
 514         }
 515         else // constant color
 516         {
 517             sInt r = block[0].r;
 518             sInt g = block[0].g;
 519             sInt b = block[0].b;
 520
 521             mask = 0xaaaaaaaa;
 522             max16 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0];
 523             min16 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1];
 524         }
 525
 526         // write the color block
 527         if (max16 < min16)
 528         {
 529             sSwap(max16, min16);
 530             mask ^= 0x55555555;
 531         }
 532
 533         ((sU16 *)dest)[0] = max16;
 534         ((sU16 *)dest)[1] = min16;
 535         ((sU32 *)dest)[1] = mask;
 536     }
 537
 538     // Alpha block compression (this is easy for a change)
 539     static void CompressAlphaBlock(sU8 *dest, const sU32 *src, sInt quality)
 540     {
 541         VOGL_NOTE_UNUSED(quality);
 542         const Pixel *block = (const Pixel *)src;
 543
 544         // find min/max color
 545         sInt min, max;
 546         min = max = block[0].a;
 547
 548         for (sInt i = 1; i < 16; i++)
 549         {
 550             min = sMin<sInt>(min, block[i].a);
 551             max = sMax<sInt>(max, block[i].a);
 552         }
 553
 554         // encode them
 555         *dest++ = max;
 556         *dest++ = min;
 557
 558         // determine bias and emit color indices
 559         sInt dist = max - min;
 560         sInt bias = min * 7 - (dist >> 1);
 561         sInt dist4 = dist * 4;
 562         sInt dist2 = dist * 2;
 563         sInt bits = 0, mask = 0;
 564
 565         for (sInt i = 0; i < 16; i++)
 566         {
 567             sInt a = block[i].a * 7 - bias;
 568             sInt ind, t;
 569
 570             // select index (hooray for bit magic)
 571             t = (dist4 - a) >> 31;
 572             ind = t & 4;
 573             a -= dist4 & t;
 574             t = (dist2 - a) >> 31;
 575             ind += t & 2;
 576             a -= dist2 & t;
 577             t = (dist - a) >> 31;
 578             ind += t & 1;
 579
 580             ind = -ind & 7;
 581             ind ^= (2 > ind);
 582
 583             // write index
 584             mask |= ind << bits;
 585             if ((bits += 3) >= 8)
 586             {
 587                 *dest++ = mask;
 588                 mask >>= 8;
 589                 bits -= 8;
 590             }
 591         }
 592     }
 593
 594     /****************************************************************************/
 595
 596     void sInitDXT()
 597     {
 598         for (sInt i = 0; i < 32; i++)
 599             Expand5[i] = (i << 3) | (i >> 2);
 600
 601         for (sInt i = 0; i < 64; i++)
 602             Expand6[i] = (i << 2) | (i >> 4);
 603
 604         for (sInt i = 0; i < 256 + 16; i++)
 605         {
 606             sInt v = sClamp(i - 8, 0, 255);
 607             QuantRBTab[i] = Expand5[Mul8Bit(v, 31)];
 608             QuantGTab[i] = Expand6[Mul8Bit(v, 63)];
 609         }
 610
 611         PrepareOptTable4(&OMatch5[0][0], Expand5, 32);
 612         PrepareOptTable4(&OMatch6[0][0], Expand6, 64);
 613
 614         PrepareOptTable3(&OMatch5_3[0][0], Expand5, 32);
 615         PrepareOptTable3(&OMatch6_3[0][0], Expand6, 64);
 616     }
 617
 618     void sCompressDXTBlock(sU8 *dest, const sU32 *src, sBool alpha, sInt quality)
 619     {
 620         VOGL_ASSERT(Expand5[1]);
 621
 622         // if alpha specified, compress alpha as well
 623         if (alpha)
 624         {
 625             CompressAlphaBlock(dest, src, quality);
 626             dest += 8;
 627         }
 628
 629         // compress the color part
 630         CompressColorBlock(dest, src, quality);
 631     }
 632
 633     void sCompressDXT5ABlock(sU8 *dest, const sU32 *src, sInt quality)
 634     {
 635         VOGL_ASSERT(Expand5[1]);
 636
 637         CompressAlphaBlock(dest, src, quality);
 638     }
 639
 640 } // namespace ryg_dxt