git.cworth.org Git - apitrace/blob - thirdparty/directxtex/DirectXTex/BC.h

   1 //-------------------------------------------------------------------------------------
   2 // BC.h
   3 //
   4 // Block-compression (BC) functionality
   5 //
   6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
   7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
   8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
   9 // PARTICULAR PURPOSE.
  10 //
  11 // Copyright (c) Microsoft Corporation. All rights reserved.
  12 //
  13 // http://go.microsoft.com/fwlink/?LinkId=248926
  14 //-------------------------------------------------------------------------------------
  15
  16 #if defined(_MSC_VER) && (_MSC_VER > 1000)
  17 #pragma once
  18 #endif
  19
  20 #include <assert.h>
  21
  22 #ifdef USE_XNAMATH
  23 #include <xnamath.h>
  24 #else
  25 #include <directxmath.h>
  26 #include <directxpackedvector.h>
  27 #endif
  28
  29 #include <float.h>
  30
  31 #pragma warning(push)
  32 #pragma warning(disable : 4005)
  33 #include <stdint.h>
  34 #pragma warning(pop)
  35
  36 namespace DirectX
  37 {
  38
  39 #ifndef USE_XNAMATH
  40 typedef PackedVector::HALF HALF;
  41 typedef PackedVector::XMHALF4 XMHALF4;
  42 typedef PackedVector::XMU565 XMU565;
  43 #endif
  44
  45 //-------------------------------------------------------------------------------------
  46 // Constants
  47 //-------------------------------------------------------------------------------------
  48
  49 const uint16_t F16S_MASK    = 0x8000;   // f16 sign mask
  50 const uint16_t F16EM_MASK   = 0x7fff;   // f16 exp & mantissa mask
  51 const uint16_t F16MAX       = 0x7bff;   // MAXFLT bit pattern for XMHALF
  52
  53 #define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x))
  54
  55 // Because these are used in SAL annotations, they need to remain macros rather than const values
  56 #define NUM_PIXELS_PER_BLOCK 16
  57 #define BC6H_MAX_REGIONS 2
  58 #define BC6H_MAX_INDICES 16
  59 #define BC7_MAX_REGIONS 3
  60 #define BC7_MAX_INDICES 16
  61
  62 const size_t BC6H_NUM_CHANNELS = 3;
  63 const size_t BC6H_MAX_SHAPES = 32;
  64
  65 const size_t BC7_NUM_CHANNELS = 4;
  66 const size_t BC7_MAX_SHAPES = 64;
  67
  68 const uint32_t BC67_WEIGHT_MAX = 64;
  69 const uint32_t BC67_WEIGHT_SHIFT = 6;
  70 const uint32_t BC67_WEIGHT_ROUND = 32;
  71
  72 extern const int g_aWeights2[4];
  73 extern const int g_aWeights3[8];
  74 extern const int g_aWeights4[16];
  75
  76 enum BC_FLAGS
  77 {
  78     BC_FLAGS_NONE       = 0x0,
  79     BC_FLAGS_DITHER_RGB = 0x10000,  // Enables dithering for RGB colors for BC1-3
  80     BC_FLAGS_DITHER_A   = 0x20000,  // Enables dithering for Alpha channel for BC1-3
  81     BC_FLAGS_UNIFORM    = 0x40000,  // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting
  82 };
  83
  84 //-------------------------------------------------------------------------------------
  85 // Structures
  86 //-------------------------------------------------------------------------------------
  87 class HDRColorA;
  88
  89 class LDRColorA
  90 {
  91 public:
  92     uint8_t r, g, b, a;
  93
  94     LDRColorA() {}
  95     LDRColorA(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a) : r(_r), g(_g), b(_b), a(_a) {}
  96
  97     const uint8_t& operator [] (_In_range_(0,3) size_t uElement) const
  98     {
  99         switch(uElement)
 100         {
 101         case 0: return r;
 102         case 1: return g;
 103         case 2: return b;
 104         case 3: return a;
 105         default: assert(false); return r;
 106         }
 107     }
 108
 109     uint8_t& operator [] (_In_range_(0,3) size_t uElement)
 110     {
 111         switch(uElement)
 112         {
 113         case 0: return r;
 114         case 1: return g;
 115         case 2: return b;
 116         case 3: return a;
 117         default: assert(false); return r;
 118         }
 119     }
 120
 121     LDRColorA operator = (_In_ const HDRColorA& c);
 122
 123     static void InterpolateRGB(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wcprec, _Out_ LDRColorA& out)
 124     {
 125         const int* aWeights = nullptr;
 126         switch(wcprec)
 127         {
 128         case 2: aWeights = g_aWeights2; assert( wc < 4 ); __analysis_assume( wc < 4 ); break;
 129         case 3: aWeights = g_aWeights3; assert( wc < 8 ); __analysis_assume( wc < 8 ); break;
 130         case 4: aWeights = g_aWeights4; assert( wc < 16 ); __analysis_assume( wc < 16 ); break;
 131         default: assert(false); out.r = out.g = out.b = 0; return;
 132         }
 133         out.r = uint8_t((uint32_t(c0.r) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.r) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
 134         out.g = uint8_t((uint32_t(c0.g) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.g) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
 135         out.b = uint8_t((uint32_t(c0.b) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.b) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
 136     }
 137
 138     static void InterpolateA(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wa, _In_ size_t waprec, _Out_ LDRColorA& out)
 139     {
 140         const int* aWeights = nullptr;
 141         switch(waprec)
 142         {
 143         case 2: aWeights = g_aWeights2; assert( wa < 4 ); __analysis_assume( wa < 4 ); break;
 144         case 3: aWeights = g_aWeights3; assert( wa < 8 ); __analysis_assume( wa < 8 ); break;
 145         case 4: aWeights = g_aWeights4; assert( wa < 16 ); __analysis_assume( wa < 16 ); break;
 146         default: assert(false); out.a = 0; return;
 147         }
 148         out.a = uint8_t((uint32_t(c0.a) * uint32_t(BC67_WEIGHT_MAX - aWeights[wa]) + uint32_t(c1.a) * uint32_t(aWeights[wa]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
 149     }
 150
 151     static void Interpolate(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wa, _In_ size_t wcprec, _In_ size_t waprec, _Out_ LDRColorA& out)
 152     {
 153         InterpolateRGB(c0, c1, wc, wcprec, out);
 154         InterpolateA(c0, c1, wa, waprec, out);
 155     }
 156 };
 157
 158 class HDRColorA
 159 {
 160 public:
 161     float r, g, b, a;
 162
 163 public:
 164     HDRColorA() {}
 165     HDRColorA(float _r, float _g, float _b, float _a) : r(_r), g(_g), b(_b), a(_a) {}
 166     HDRColorA(const HDRColorA& c) : r(c.r), g(c.g), b(c.b), a(c.a) {}
 167     HDRColorA(const LDRColorA& c)
 168     {
 169         r = float(c.r) * (1.0f/255.0f);
 170         g = float(c.g) * (1.0f/255.0f);
 171         b = float(c.b) * (1.0f/255.0f);
 172         a = float(c.a) * (1.0f/255.0f);
 173     }
 174
 175     // binary operators
 176     HDRColorA operator + ( _In_ const HDRColorA& c ) const
 177     {
 178         return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a);
 179     }
 180
 181     HDRColorA operator - ( _In_ const HDRColorA& c ) const
 182     {
 183         return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a);
 184     }
 185
 186     HDRColorA operator * ( _In_ float f ) const
 187     {
 188         return HDRColorA(r * f, g * f, b * f, a * f);
 189     }
 190
 191     HDRColorA operator / ( _In_ float f ) const
 192     {
 193         float fInv = 1.0f / f;
 194         return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv);
 195     }
 196
 197     float operator * ( _In_ const HDRColorA& c ) const
 198     {
 199         return r * c.r + g * c.g + b * c.b + a * c.a;
 200     }
 201
 202     // assignment operators
 203     HDRColorA& operator += ( _In_ const HDRColorA& c )
 204     {
 205         r += c.r;
 206         g += c.g;
 207         b += c.b;
 208         a += c.a;
 209         return *this;
 210     }
 211
 212     HDRColorA& operator -= ( _In_ const HDRColorA& c )
 213     {
 214         r -= c.r;
 215         g -= c.g;
 216         b -= c.b;
 217         a -= c.a;
 218         return *this;
 219     }
 220
 221     HDRColorA& operator *= ( _In_ float f )
 222     {
 223         r *= f;
 224         g *= f;
 225         b *= f;
 226         a *= f;
 227         return *this;
 228     }
 229
 230     HDRColorA& operator /= ( _In_ float f )
 231     {
 232         float fInv = 1.0f / f;
 233         r *= fInv;
 234         g *= fInv;
 235         b *= fInv;
 236         a *= fInv;
 237         return *this;
 238     }
 239
 240     HDRColorA& operator = (_In_ const LDRColorA& c)
 241     {
 242         r = (float) c.r;
 243         g = (float) c.g;
 244         b = (float) c.b;
 245         a = (float) c.a;
 246         return *this;
 247     }
 248
 249     HDRColorA& Clamp(_In_ float fMin, _In_ float fMax)
 250     {
 251         r = std::min<float>(fMax, std::max<float>(fMin, r));
 252         g = std::min<float>(fMax, std::max<float>(fMin, g));
 253         b = std::min<float>(fMax, std::max<float>(fMin, b));
 254         a = std::min<float>(fMax, std::max<float>(fMin, a));
 255         return *this;
 256     }
 257
 258     LDRColorA ToLDRColorA() const
 259     {
 260         return LDRColorA((uint8_t) (r + 0.01f), (uint8_t) (g + 0.01f), (uint8_t) (b + 0.01f), (uint8_t) (a + 0.01f));
 261     }
 262 };
 263
 264 inline LDRColorA LDRColorA::operator = (_In_ const HDRColorA& c)
 265 {
 266     LDRColorA ret;
 267     HDRColorA tmp(c);
 268     tmp = tmp.Clamp(0.0f, 1.0f) * 255.0f;
 269     ret.r = uint8_t(tmp.r + 0.001f);
 270     ret.g = uint8_t(tmp.g + 0.001f);
 271     ret.b = uint8_t(tmp.b + 0.001f);
 272     ret.a = uint8_t(tmp.a + 0.001f);
 273     return ret;
 274 }
 275
 276 struct LDREndPntPair
 277 {
 278     LDRColorA A;
 279     LDRColorA B;
 280 };
 281
 282 struct HDREndPntPair
 283 {
 284     HDRColorA A;
 285     HDRColorA B;
 286 };
 287
 288 inline HDRColorA* HDRColorALerp(_Out_ HDRColorA *pOut, _In_ const HDRColorA *pC1, _In_ const HDRColorA *pC2, _In_ float s)
 289 {
 290     pOut->r = pC1->r + s * (pC2->r - pC1->r);
 291     pOut->g = pC1->g + s * (pC2->g - pC1->g);
 292     pOut->b = pC1->b + s * (pC2->b - pC1->b);
 293     pOut->a = pC1->a + s * (pC2->a - pC1->a);
 294     return pOut;
 295 }
 296
 297 #pragma pack(push,1)
 298 // BC1/DXT1 compression (4 bits per texel)
 299 struct D3DX_BC1
 300 {
 301     uint16_t    rgb[2]; // 565 colors
 302     uint32_t    bitmap; // 2bpp rgb bitmap
 303 };
 304
 305 // BC2/DXT2/3 compression (8 bits per texel)
 306 struct D3DX_BC2
 307 {
 308     uint32_t    bitmap[2];  // 4bpp alpha bitmap
 309     D3DX_BC1    bc1;        // BC1 rgb data
 310 };
 311
 312 // BC3/DXT4/5 compression (8 bits per texel)
 313 struct D3DX_BC3
 314 {
 315     uint8_t     alpha[2];   // alpha values
 316     uint8_t     bitmap[6];  // 3bpp alpha bitmap
 317     D3DX_BC1    bc1;        // BC1 rgb data
 318 };
 319 #pragma pack(pop)
 320
 321 class INTColor
 322 {
 323 public:
 324     int r, g, b;
 325
 326 public:
 327     INTColor() {}
 328     INTColor(int nr, int ng, int nb) {r = nr; g = ng; b = nb;}
 329     INTColor(const INTColor& c) {r = c.r; g = c.g; b = c.b;}
 330
 331     INTColor operator - ( _In_ const INTColor& c ) const
 332     {
 333         return INTColor(r - c.r, g - c.g, b - c.b);
 334     }
 335
 336     INTColor& operator += ( _In_ const INTColor& c )
 337     {
 338         r += c.r;
 339         g += c.g;
 340         b += c.b;
 341         return *this;
 342     }
 343
 344     INTColor& operator -= ( _In_ const INTColor& c )
 345     {
 346         r -= c.r;
 347         g -= c.g;
 348         b -= c.b;
 349         return *this;
 350     }
 351
 352     INTColor& operator &= ( _In_ const INTColor& c )
 353     {
 354         r &= c.r;
 355         g &= c.g;
 356         b &= c.b;
 357         return *this;
 358     }
 359
 360     int& operator [] ( _In_ uint8_t i )
 361     {
 362         assert(i < sizeof(INTColor) / sizeof(int));
 363         __analysis_assume(i < sizeof(INTColor) / sizeof(int));
 364         return ((int*) this)[i];
 365     }
 366
 367     void Set(_In_ const HDRColorA& c, _In_ bool bSigned)
 368     {
 369         XMHALF4 aF16;
 370
 371         XMVECTOR v = XMLoadFloat4( (const XMFLOAT4*)& c );
 372         XMStoreHalf4( &aF16, v );
 373
 374         r = F16ToINT(aF16.x, bSigned);
 375         g = F16ToINT(aF16.y, bSigned);
 376         b = F16ToINT(aF16.z, bSigned);
 377     }
 378
 379     INTColor& Clamp(_In_ int iMin, _In_ int iMax)
 380     {
 381         r = std::min<int>(iMax, std::max<int>(iMin, r));
 382         g = std::min<int>(iMax, std::max<int>(iMin, g));
 383         b = std::min<int>(iMax, std::max<int>(iMin, b));
 384         return *this;
 385     }
 386
 387     INTColor& SignExtend(_In_ const LDRColorA& Prec)
 388     {
 389         r = SIGN_EXTEND(r, Prec.r);
 390         g = SIGN_EXTEND(g, Prec.g);
 391         b = SIGN_EXTEND(b, Prec.b);
 392         return *this;
 393     }
 394
 395     void ToF16(_Out_cap_c_(3) HALF aF16[3], _In_ bool bSigned) const
 396     {
 397         aF16[0] = INT2F16(r, bSigned);
 398         aF16[1] = INT2F16(g, bSigned);
 399         aF16[2] = INT2F16(b, bSigned);
 400     }
 401
 402 private:
 403     static int F16ToINT(_In_ const HALF& f, _In_ bool bSigned)
 404     {
 405         uint16_t input = *((const uint16_t*) &f);
 406         int out, s;
 407         if(bSigned)
 408         {
 409             s = input & F16S_MASK;
 410             input &= F16EM_MASK;
 411             if(input > F16MAX) out = F16MAX;
 412             else out = input;
 413             out = s ? -out : out;
 414         }
 415         else
 416         {
 417             if(input & F16S_MASK) out = 0;
 418             else out = input;
 419         }
 420         return out;
 421     }
 422
 423     static HALF INT2F16(_In_ int input, _In_ bool bSigned)
 424     {
 425         HALF h;
 426         uint16_t out;
 427         if(bSigned)
 428         {
 429             int s = 0;
 430             if(input < 0)
 431             {
 432                 s = F16S_MASK;
 433                 input = -input;
 434             }
 435             out = uint16_t(s | input);
 436         }
 437         else
 438         {
 439             assert(input >= 0 && input <= F16MAX);
 440             out = (uint16_t) input;
 441         }
 442
 443         *((uint16_t*) &h) = out;
 444         return h;
 445     }
 446 };
 447
 448 struct INTEndPntPair
 449 {
 450     INTColor A;
 451     INTColor B;
 452 };
 453
 454 template< size_t SizeInBytes >
 455 class CBits
 456 {
 457 public:
 458     uint8_t GetBit(_Inout_ size_t& uStartBit) const
 459     {
 460         assert(uStartBit < 128);
 461         __analysis_assume(uStartBit < 128);
 462         size_t uIndex = uStartBit >> 3;
 463         uint8_t ret = (m_uBits[uIndex] >> (uStartBit - (uIndex << 3))) & 0x01;
 464         uStartBit++;
 465         return ret;
 466     }
 467
 468     uint8_t GetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits) const
 469     {
 470         if(uNumBits == 0) return 0;
 471         assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
 472         __analysis_assume(uStartBit + uNumBits <= 128 && uNumBits <= 8);
 473         uint8_t ret;
 474         size_t uIndex = uStartBit >> 3;
 475         size_t uBase = uStartBit - (uIndex << 3);
 476         if(uBase + uNumBits > 8)
 477         {
 478             size_t uFirstIndexBits = 8 - uBase;
 479             size_t uNextIndexBits = uNumBits - uFirstIndexBits;
 480             ret = (m_uBits[uIndex] >> uBase) | ((m_uBits[uIndex+1] & ((1 << uNextIndexBits) - 1)) << uFirstIndexBits);
 481         }
 482         else
 483         {
 484             ret = (m_uBits[uIndex] >> uBase) & ((1 << uNumBits) - 1);
 485         }
 486         assert(ret < (1 << uNumBits));
 487         uStartBit += uNumBits;
 488         return ret;
 489     }
 490
 491     void SetBit(_Inout_ size_t& uStartBit, _In_ uint8_t uValue)
 492     {
 493         assert(uStartBit < 128 && uValue < 2);
 494         __analysis_assume(uStartBit < 128 && uValue < 2);
 495         size_t uIndex = uStartBit >> 3;
 496         size_t uBase = uStartBit - (uIndex << 3);
 497         m_uBits[uIndex] &= ~(1 << uBase);
 498         m_uBits[uIndex] |= uValue << uBase;
 499         uStartBit++;
 500     }
 501
 502     void SetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits, _In_ uint8_t uValue)
 503     {
 504         if(uNumBits == 0)
 505             return;
 506         assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
 507         __analysis_assume(uStartBit + uNumBits <= 128 && uNumBits <= 8);
 508         assert(uValue < (1 << uNumBits));
 509         size_t uIndex = uStartBit >> 3;
 510         size_t uBase = uStartBit - (uIndex << 3);
 511         if(uBase + uNumBits > 8)
 512         {
 513             size_t uFirstIndexBits = 8 - uBase;
 514             size_t uNextIndexBits = uNumBits - uFirstIndexBits;
 515             m_uBits[uIndex] &= ~(((1 << uFirstIndexBits) - 1) << uBase);
 516             m_uBits[uIndex] |= uValue << uBase;
 517             m_uBits[uIndex+1] &= ~((1 << uNextIndexBits) - 1);
 518             m_uBits[uIndex+1] |= uValue >> uFirstIndexBits;
 519         }
 520         else
 521         {
 522             m_uBits[uIndex] &= ~(((1 << uNumBits) - 1) << uBase);
 523             m_uBits[uIndex] |= uValue << uBase;
 524         }
 525         uStartBit += uNumBits;
 526     }
 527
 528 private:
 529     uint8_t m_uBits[ SizeInBytes ];
 530 };
 531
 532 #pragma warning(push)
 533 #pragma warning(disable : 4127 4480 4512)
 534
 535 // BC6H compression (16 bits per texel)
 536 class D3DX_BC6H : private CBits< 16 >
 537 {
 538 public:
 539     void Decode(_In_ bool bSigned, _Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
 540     void Encode(_In_ bool bSigned, _In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
 541
 542 private:
 543     enum EField : uint8_t
 544     {
 545         NA, // N/A
 546         M,  // Mode
 547         D,  // Shape
 548         RW,
 549         RX,
 550         RY,
 551         RZ,
 552         GW,
 553         GX,
 554         GY,
 555         GZ,
 556         BW,
 557         BX,
 558         BY,
 559         BZ,
 560     };
 561
 562     struct ModeDescriptor
 563     {
 564         EField m_eField;
 565         uint8_t   m_uBit;
 566     };
 567
 568     struct ModeInfo
 569     {
 570         uint8_t uMode;
 571         uint8_t uPartitions;
 572         bool bTransformed;
 573         uint8_t uIndexPrec;
 574         LDRColorA RGBAPrec[BC6H_MAX_REGIONS][2];
 575     };
 576
 577     struct EncodeParams
 578     {
 579         float fBestErr;
 580         const bool bSigned;
 581         uint8_t uMode;
 582         uint8_t uShape;
 583         const HDRColorA* const aHDRPixels;
 584         INTEndPntPair aUnqEndPts[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS];
 585         INTColor aIPixels[NUM_PIXELS_PER_BLOCK];
 586
 587         EncodeParams(const HDRColorA* const aOriginal, bool bSignedFormat) :
 588             aHDRPixels(aOriginal), fBestErr(FLT_MAX), bSigned(bSignedFormat)
 589         {
 590             for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 591             {
 592                 aIPixels[i].Set(aOriginal[i], bSigned);
 593             }
 594         }
 595     };
 596
 597     static int Quantize(_In_ int iValue, _In_ int prec, _In_ bool bSigned);
 598     static int Unquantize(_In_ int comp, _In_ uint8_t uBitsPerComp, _In_ bool bSigned);
 599     static int FinishUnquantize(_In_ int comp, _In_ bool bSigned);
 600
 601     static bool EndPointsFit(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[]);
 602
 603     void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ const INTEndPntPair& endPts,
 604                                   _Out_cap_c_(BC6H_MAX_INDICES) INTColor aPalette[]) const;
 605     float MapColorsQuantized(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ const INTEndPntPair &endPts) const;
 606     float PerturbOne(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ uint8_t ch,
 607                      _In_ const INTEndPntPair& oldEndPts, _Out_ INTEndPntPair& newEndPts, _In_ float fOldErr, _In_ int do_b) const;
 608     void OptimizeOne(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ float aOrgErr,
 609                      _In_ const INTEndPntPair &aOrgEndPts, _Out_ INTEndPntPair &aOptEndPts) const;
 610     void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const float aOrgErr[],
 611                            _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aOrgEndPts[],
 612                            _Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aOptEndPts[]) const;
 613     static void SwapIndices(_In_ const EncodeParams* pEP, _Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[],
 614                             _In_count_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[]);
 615     void AssignIndices(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
 616                         _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[],
 617                         _Out_cap_c_(BC6H_MAX_REGIONS) float aTotErr[]) const;
 618     void QuantizeEndPts(_In_ const EncodeParams* pEP, _Out_cap_c_(BC6H_MAX_REGIONS) INTEndPntPair* qQntEndPts) const;
 619     void EmitBlock(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
 620                    _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndices[]);
 621     void Refine(_Inout_ EncodeParams* pEP);
 622
 623     static void GeneratePaletteUnquantized(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _Out_cap_c_(BC6H_MAX_INDICES) INTColor aPalette[]);
 624     float MapColors(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _In_ size_t np, _In_count_(np) const size_t* auIndex) const;
 625     float RoughMSE(_Inout_ EncodeParams* pEP) const;
 626
 627 private:
 628     const static ModeDescriptor ms_aDesc[][82];
 629     const static ModeInfo ms_aInfo[];
 630     const static int ms_aModeToInfo[];
 631 };
 632
 633 // BC67 compression (16b bits per texel)
 634 class D3DX_BC7 : private CBits< 16 >
 635 {
 636 public:
 637     void Decode(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
 638     void Encode(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
 639
 640 private:
 641     struct ModeInfo
 642     {
 643         uint8_t uPartitions;
 644         uint8_t uPartitionBits;
 645         uint8_t uPBits;
 646         uint8_t uRotationBits;
 647         uint8_t uIndexModeBits;
 648         uint8_t uIndexPrec;
 649         uint8_t uIndexPrec2;
 650         LDRColorA RGBAPrec;
 651         LDRColorA RGBAPrecWithP;
 652     };
 653
 654     struct EncodeParams
 655     {
 656         uint8_t uMode;
 657         LDREndPntPair aEndPts[BC7_MAX_SHAPES][BC7_MAX_REGIONS];
 658         LDRColorA aLDRPixels[NUM_PIXELS_PER_BLOCK];
 659         const HDRColorA* const aHDRPixels;
 660
 661         EncodeParams(const HDRColorA* const aOriginal) : aHDRPixels(aOriginal) {}
 662     };
 663
 664     static uint8_t Quantize(_In_ uint8_t comp, _In_ uint8_t uPrec)
 665     {
 666         assert(0 < uPrec && uPrec <= 8);
 667         uint8_t rnd = (uint8_t) std::min<uint16_t>(255, uint16_t(comp) + (1 << (7 - uPrec)));
 668         return rnd >> (8 - uPrec);
 669     }
 670
 671     static LDRColorA Quantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
 672     {
 673         LDRColorA q;
 674         q.r = Quantize(c.r, RGBAPrec.r);
 675         q.g = Quantize(c.g, RGBAPrec.g);
 676         q.b = Quantize(c.b, RGBAPrec.b);
 677         if(RGBAPrec.a)
 678             q.a = Quantize(c.a, RGBAPrec.a);
 679         else
 680             q.a = 255;
 681         return q;
 682     }
 683
 684     static uint8_t Unquantize(_In_ uint8_t comp, _In_ size_t uPrec)
 685     {
 686         assert(0 < uPrec && uPrec <= 8);
 687         comp = comp << (8 - uPrec);
 688         return comp | (comp >> uPrec);
 689     }
 690
 691     static LDRColorA Unquantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
 692     {
 693         LDRColorA q;
 694         q.r = Unquantize(c.r, RGBAPrec.r);
 695         q.g = Unquantize(c.g, RGBAPrec.g);
 696         q.b = Unquantize(c.b, RGBAPrec.b);
 697         q.a = RGBAPrec.a > 0 ? Unquantize(c.a, RGBAPrec.a) : 255;
 698         return q;
 699     }
 700
 701     void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ size_t uIndexMode, _In_ const LDREndPntPair& endpts,
 702                                   _Out_cap_c_(BC7_MAX_INDICES) LDRColorA aPalette[]) const;
 703     float PerturbOne(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
 704                      _In_ size_t ch, _In_ const LDREndPntPair &old_endpts,
 705                      _Out_ LDREndPntPair &new_endpts, _In_ float old_err, _In_ uint8_t do_b) const;
 706     void Exhaustive(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
 707                     _In_ size_t ch, _Inout_ float& fOrgErr, _Inout_ LDREndPntPair& optEndPt) const;
 708     void OptimizeOne(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
 709                      _In_ float orig_err, _In_ const LDREndPntPair &orig_endpts, _Out_ LDREndPntPair &opt_endpts) const;
 710     void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
 711                            _In_count_c_(BC7_MAX_REGIONS) const float orig_err[],
 712                            _In_count_c_(BC7_MAX_REGIONS) const LDREndPntPair orig_endpts[],
 713                            _Out_cap_c_(BC7_MAX_REGIONS) LDREndPntPair opt_endpts[]) const;
 714     void AssignIndices(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
 715                        _In_count_c_(BC7_MAX_REGIONS) LDREndPntPair endpts[],
 716                        _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices2[],
 717                        _Out_cap_c_(BC7_MAX_REGIONS) float afTotErr[]) const;
 718     void EmitBlock(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode,
 719                    _In_count_c_(BC7_MAX_REGIONS) const LDREndPntPair aEndPts[],
 720                    _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndex[],
 721                    _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndex2[]);
 722     float Refine(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode);
 723
 724     float MapColors(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
 725                     _In_ const LDREndPntPair& endPts, _In_ float fMinErr) const;
 726     static float RoughMSE(_Inout_ EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode);
 727
 728 private:
 729     const static ModeInfo ms_aInfo[];
 730 };
 731
 732 //-------------------------------------------------------------------------------------
 733 template <bool bRange> void OptimizeAlpha(float *pX, float *pY, const float *pPoints, size_t cSteps)
 734 {
 735     static const float pC6[] = { 5.0f/5.0f, 4.0f/5.0f, 3.0f/5.0f, 2.0f/5.0f, 1.0f/5.0f, 0.0f/5.0f };
 736     static const float pD6[] = { 0.0f/5.0f, 1.0f/5.0f, 2.0f/5.0f, 3.0f/5.0f, 4.0f/5.0f, 5.0f/5.0f };
 737     static const float pC8[] = { 7.0f/7.0f, 6.0f/7.0f, 5.0f/7.0f, 4.0f/7.0f, 3.0f/7.0f, 2.0f/7.0f, 1.0f/7.0f, 0.0f/7.0f };
 738     static const float pD8[] = { 0.0f/7.0f, 1.0f/7.0f, 2.0f/7.0f, 3.0f/7.0f, 4.0f/7.0f, 5.0f/7.0f, 6.0f/7.0f, 7.0f/7.0f };
 739
 740     const float *pC = (6 == cSteps) ? pC6 : pC8;
 741     const float *pD = (6 == cSteps) ? pD6 : pD8;
 742
 743     float MAX_VALUE = 1.0f;
 744     float MIN_VALUE;
 745     if (bRange)
 746     {
 747         MIN_VALUE = -1.0f;
 748     }
 749     else
 750     {
 751         MIN_VALUE = 0.0f;
 752     }
 753
 754     // Find Min and Max points, as starting point
 755     float fX = MAX_VALUE;
 756     float fY = MIN_VALUE;
 757
 758     if(8 == cSteps)
 759     {
 760         for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
 761         {
 762             if(pPoints[iPoint] < fX)
 763                 fX = pPoints[iPoint];
 764
 765             if(pPoints[iPoint] > fY)
 766                 fY = pPoints[iPoint];
 767         }
 768     }
 769     else
 770     {
 771         for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
 772         {
 773             if(pPoints[iPoint] < fX && pPoints[iPoint] > MIN_VALUE)
 774                 fX = pPoints[iPoint];
 775
 776             if(pPoints[iPoint] > fY && pPoints[iPoint] < MAX_VALUE)
 777                 fY = pPoints[iPoint];
 778         }
 779
 780         if (fX == fY)
 781         {
 782             fY = MAX_VALUE;
 783         }
 784     }
 785
 786     // Use Newton's Method to find local minima of sum-of-squares error.
 787     float fSteps = (float) (cSteps - 1);
 788
 789     for(size_t iIteration = 0; iIteration < 8; iIteration++)
 790     {
 791         float fScale;
 792
 793         if((fY - fX) < (1.0f / 256.0f))
 794             break;
 795
 796         fScale = fSteps / (fY - fX);
 797
 798         // Calculate new steps
 799         float pSteps[8];
 800
 801         for(size_t iStep = 0; iStep < cSteps; iStep++)
 802             pSteps[iStep] = pC[iStep] * fX + pD[iStep] * fY;
 803
 804         if(6 == cSteps)
 805         {
 806             pSteps[6] = MIN_VALUE;
 807             pSteps[7] = MAX_VALUE;
 808         }
 809
 810         // Evaluate function, and derivatives
 811         float dX  = 0.0f;
 812         float dY  = 0.0f;
 813         float d2X = 0.0f;
 814         float d2Y = 0.0f;
 815
 816         for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
 817         {
 818             float fDot = (pPoints[iPoint] - fX) * fScale;
 819
 820             size_t iStep;
 821
 822             if(fDot <= 0.0f)
 823                 iStep = ((6 == cSteps) && (pPoints[iPoint] <= fX * 0.5f)) ? 6 : 0;
 824             else if(fDot >= fSteps)
 825                 iStep = ((6 == cSteps) && (pPoints[iPoint] >= (fY + 1.0f) * 0.5f)) ? 7 : (cSteps - 1);
 826             else
 827                 iStep = static_cast<int32_t>(fDot + 0.5f);
 828
 829
 830             if(iStep < cSteps)
 831             {
 832                 // D3DX had this computation backwards (pPoints[iPoint] - pSteps[iStep])
 833                 // this fix improves RMS of the alpha component
 834                 float fDiff = pSteps[iStep] - pPoints[iPoint];
 835
 836                 dX  += pC[iStep] * fDiff;
 837                 d2X += pC[iStep] * pC[iStep];
 838
 839                 dY  += pD[iStep] * fDiff;
 840                 d2Y += pD[iStep] * pD[iStep];
 841             }
 842         }
 843
 844         // Move endpoints
 845         if(d2X > 0.0f)
 846             fX -= dX / d2X;
 847
 848         if(d2Y > 0.0f)
 849             fY -= dY / d2Y;
 850
 851         if(fX > fY)
 852         {
 853             float f = fX; fX = fY; fY = f;
 854         }
 855
 856         if((dX * dX < (1.0f / 64.0f)) && (dY * dY < (1.0f / 64.0f)))
 857             break;
 858     }
 859
 860     *pX = (fX < MIN_VALUE) ? MIN_VALUE : (fX > MAX_VALUE) ? MAX_VALUE : fX;
 861     *pY = (fY < MIN_VALUE) ? MIN_VALUE : (fY > MAX_VALUE) ? MAX_VALUE : fY;
 862 }
 863 #pragma warning(pop)
 864
 865
 866 //-------------------------------------------------------------------------------------
 867 // Functions
 868 //-------------------------------------------------------------------------------------
 869
 870 typedef void (*BC_DECODE)(XMVECTOR *pColor, const uint8_t *pBC);
 871 typedef void (*BC_ENCODE)(uint8_t *pDXT, const XMVECTOR *pColor, DWORD flags);
 872
 873 void D3DXDecodeBC1(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC);
 874 void D3DXDecodeBC2(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
 875 void D3DXDecodeBC3(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
 876 void D3DXDecodeBC4U(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC);
 877 void D3DXDecodeBC4S(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC);
 878 void D3DXDecodeBC5U(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
 879 void D3DXDecodeBC5S(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
 880 void D3DXDecodeBC6HU(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
 881 void D3DXDecodeBC6HS(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
 882 void D3DXDecodeBC7(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
 883
 884 void D3DXEncodeBC1(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ float alphaRef, _In_ DWORD flags);
 885     // BC1 requires one additional parameter, so it doesn't match signature of BC_ENCODE above
 886
 887 void D3DXEncodeBC2(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
 888 void D3DXEncodeBC3(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
 889 void D3DXEncodeBC4U(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
 890 void D3DXEncodeBC4S(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
 891 void D3DXEncodeBC5U(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
 892 void D3DXEncodeBC5S(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
 893 void D3DXEncodeBC6HU(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
 894 void D3DXEncodeBC6HS(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
 895 void D3DXEncodeBC7(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
 896
 897 }; // namespace