1 //-------------------------------------------------------------------------------------
4 // Block-compression (BC) functionality
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
11 // Copyright (c) Microsoft Corporation. All rights reserved.
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
16 #if defined(_MSC_VER) && (_MSC_VER > 1000)
25 #include <directxmath.h>
26 #include <directxpackedvector.h>
32 #pragma warning(disable : 4005)
40 typedef PackedVector::HALF HALF;
41 typedef PackedVector::XMHALF4 XMHALF4;
42 typedef PackedVector::XMU565 XMU565;
45 //-------------------------------------------------------------------------------------
47 //-------------------------------------------------------------------------------------
49 const uint16_t F16S_MASK = 0x8000; // f16 sign mask
50 const uint16_t F16EM_MASK = 0x7fff; // f16 exp & mantissa mask
51 const uint16_t F16MAX = 0x7bff; // MAXFLT bit pattern for XMHALF
53 #define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x))
55 // Because these are used in SAL annotations, they need to remain macros rather than const values
56 #define NUM_PIXELS_PER_BLOCK 16
57 #define BC6H_MAX_REGIONS 2
58 #define BC6H_MAX_INDICES 16
59 #define BC7_MAX_REGIONS 3
60 #define BC7_MAX_INDICES 16
62 const size_t BC6H_NUM_CHANNELS = 3;
63 const size_t BC6H_MAX_SHAPES = 32;
65 const size_t BC7_NUM_CHANNELS = 4;
66 const size_t BC7_MAX_SHAPES = 64;
68 const uint32_t BC67_WEIGHT_MAX = 64;
69 const uint32_t BC67_WEIGHT_SHIFT = 6;
70 const uint32_t BC67_WEIGHT_ROUND = 32;
72 extern const int g_aWeights2[4];
73 extern const int g_aWeights3[8];
74 extern const int g_aWeights4[16];
79 BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3
80 BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3
81 BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting
84 //-------------------------------------------------------------------------------------
86 //-------------------------------------------------------------------------------------
95 LDRColorA(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a) : r(_r), g(_g), b(_b), a(_a) {}
97 const uint8_t& operator [] (_In_range_(0,3) size_t uElement) const
105 default: assert(false); return r;
109 uint8_t& operator [] (_In_range_(0,3) size_t uElement)
117 default: assert(false); return r;
121 LDRColorA operator = (_In_ const HDRColorA& c);
123 static void InterpolateRGB(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wcprec, _Out_ LDRColorA& out)
125 const int* aWeights = nullptr;
128 case 2: aWeights = g_aWeights2; assert( wc < 4 ); __analysis_assume( wc < 4 ); break;
129 case 3: aWeights = g_aWeights3; assert( wc < 8 ); __analysis_assume( wc < 8 ); break;
130 case 4: aWeights = g_aWeights4; assert( wc < 16 ); __analysis_assume( wc < 16 ); break;
131 default: assert(false); out.r = out.g = out.b = 0; return;
133 out.r = uint8_t((uint32_t(c0.r) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.r) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
134 out.g = uint8_t((uint32_t(c0.g) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.g) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
135 out.b = uint8_t((uint32_t(c0.b) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.b) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
138 static void InterpolateA(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wa, _In_ size_t waprec, _Out_ LDRColorA& out)
140 const int* aWeights = nullptr;
143 case 2: aWeights = g_aWeights2; assert( wa < 4 ); __analysis_assume( wa < 4 ); break;
144 case 3: aWeights = g_aWeights3; assert( wa < 8 ); __analysis_assume( wa < 8 ); break;
145 case 4: aWeights = g_aWeights4; assert( wa < 16 ); __analysis_assume( wa < 16 ); break;
146 default: assert(false); out.a = 0; return;
148 out.a = uint8_t((uint32_t(c0.a) * uint32_t(BC67_WEIGHT_MAX - aWeights[wa]) + uint32_t(c1.a) * uint32_t(aWeights[wa]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT);
151 static void Interpolate(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wa, _In_ size_t wcprec, _In_ size_t waprec, _Out_ LDRColorA& out)
153 InterpolateRGB(c0, c1, wc, wcprec, out);
154 InterpolateA(c0, c1, wa, waprec, out);
165 HDRColorA(float _r, float _g, float _b, float _a) : r(_r), g(_g), b(_b), a(_a) {}
166 HDRColorA(const HDRColorA& c) : r(c.r), g(c.g), b(c.b), a(c.a) {}
167 HDRColorA(const LDRColorA& c)
169 r = float(c.r) * (1.0f/255.0f);
170 g = float(c.g) * (1.0f/255.0f);
171 b = float(c.b) * (1.0f/255.0f);
172 a = float(c.a) * (1.0f/255.0f);
176 HDRColorA operator + ( _In_ const HDRColorA& c ) const
178 return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a);
181 HDRColorA operator - ( _In_ const HDRColorA& c ) const
183 return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a);
186 HDRColorA operator * ( _In_ float f ) const
188 return HDRColorA(r * f, g * f, b * f, a * f);
191 HDRColorA operator / ( _In_ float f ) const
193 float fInv = 1.0f / f;
194 return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv);
197 float operator * ( _In_ const HDRColorA& c ) const
199 return r * c.r + g * c.g + b * c.b + a * c.a;
202 // assignment operators
203 HDRColorA& operator += ( _In_ const HDRColorA& c )
212 HDRColorA& operator -= ( _In_ const HDRColorA& c )
221 HDRColorA& operator *= ( _In_ float f )
230 HDRColorA& operator /= ( _In_ float f )
232 float fInv = 1.0f / f;
240 HDRColorA& operator = (_In_ const LDRColorA& c)
249 HDRColorA& Clamp(_In_ float fMin, _In_ float fMax)
251 r = std::min<float>(fMax, std::max<float>(fMin, r));
252 g = std::min<float>(fMax, std::max<float>(fMin, g));
253 b = std::min<float>(fMax, std::max<float>(fMin, b));
254 a = std::min<float>(fMax, std::max<float>(fMin, a));
258 LDRColorA ToLDRColorA() const
260 return LDRColorA((uint8_t) (r + 0.01f), (uint8_t) (g + 0.01f), (uint8_t) (b + 0.01f), (uint8_t) (a + 0.01f));
264 inline LDRColorA LDRColorA::operator = (_In_ const HDRColorA& c)
268 tmp = tmp.Clamp(0.0f, 1.0f) * 255.0f;
269 ret.r = uint8_t(tmp.r + 0.001f);
270 ret.g = uint8_t(tmp.g + 0.001f);
271 ret.b = uint8_t(tmp.b + 0.001f);
272 ret.a = uint8_t(tmp.a + 0.001f);
288 inline HDRColorA* HDRColorALerp(_Out_ HDRColorA *pOut, _In_ const HDRColorA *pC1, _In_ const HDRColorA *pC2, _In_ float s)
290 pOut->r = pC1->r + s * (pC2->r - pC1->r);
291 pOut->g = pC1->g + s * (pC2->g - pC1->g);
292 pOut->b = pC1->b + s * (pC2->b - pC1->b);
293 pOut->a = pC1->a + s * (pC2->a - pC1->a);
298 // BC1/DXT1 compression (4 bits per texel)
301 uint16_t rgb[2]; // 565 colors
302 uint32_t bitmap; // 2bpp rgb bitmap
305 // BC2/DXT2/3 compression (8 bits per texel)
308 uint32_t bitmap[2]; // 4bpp alpha bitmap
309 D3DX_BC1 bc1; // BC1 rgb data
312 // BC3/DXT4/5 compression (8 bits per texel)
315 uint8_t alpha[2]; // alpha values
316 uint8_t bitmap[6]; // 3bpp alpha bitmap
317 D3DX_BC1 bc1; // BC1 rgb data
328 INTColor(int nr, int ng, int nb) {r = nr; g = ng; b = nb;}
329 INTColor(const INTColor& c) {r = c.r; g = c.g; b = c.b;}
331 INTColor operator - ( _In_ const INTColor& c ) const
333 return INTColor(r - c.r, g - c.g, b - c.b);
336 INTColor& operator += ( _In_ const INTColor& c )
344 INTColor& operator -= ( _In_ const INTColor& c )
352 INTColor& operator &= ( _In_ const INTColor& c )
360 int& operator [] ( _In_ uint8_t i )
362 assert(i < sizeof(INTColor) / sizeof(int));
363 __analysis_assume(i < sizeof(INTColor) / sizeof(int));
364 return ((int*) this)[i];
367 void Set(_In_ const HDRColorA& c, _In_ bool bSigned)
371 XMVECTOR v = XMLoadFloat4( (const XMFLOAT4*)& c );
372 XMStoreHalf4( &aF16, v );
374 r = F16ToINT(aF16.x, bSigned);
375 g = F16ToINT(aF16.y, bSigned);
376 b = F16ToINT(aF16.z, bSigned);
379 INTColor& Clamp(_In_ int iMin, _In_ int iMax)
381 r = std::min<int>(iMax, std::max<int>(iMin, r));
382 g = std::min<int>(iMax, std::max<int>(iMin, g));
383 b = std::min<int>(iMax, std::max<int>(iMin, b));
387 INTColor& SignExtend(_In_ const LDRColorA& Prec)
389 r = SIGN_EXTEND(r, Prec.r);
390 g = SIGN_EXTEND(g, Prec.g);
391 b = SIGN_EXTEND(b, Prec.b);
395 void ToF16(_Out_cap_c_(3) HALF aF16[3], _In_ bool bSigned) const
397 aF16[0] = INT2F16(r, bSigned);
398 aF16[1] = INT2F16(g, bSigned);
399 aF16[2] = INT2F16(b, bSigned);
403 static int F16ToINT(_In_ const HALF& f, _In_ bool bSigned)
405 uint16_t input = *((const uint16_t*) &f);
409 s = input & F16S_MASK;
411 if(input > F16MAX) out = F16MAX;
413 out = s ? -out : out;
417 if(input & F16S_MASK) out = 0;
423 static HALF INT2F16(_In_ int input, _In_ bool bSigned)
435 out = uint16_t(s | input);
439 assert(input >= 0 && input <= F16MAX);
440 out = (uint16_t) input;
443 *((uint16_t*) &h) = out;
454 template< size_t SizeInBytes >
458 uint8_t GetBit(_Inout_ size_t& uStartBit) const
460 assert(uStartBit < 128);
461 __analysis_assume(uStartBit < 128);
462 size_t uIndex = uStartBit >> 3;
463 uint8_t ret = (m_uBits[uIndex] >> (uStartBit - (uIndex << 3))) & 0x01;
468 uint8_t GetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits) const
470 if(uNumBits == 0) return 0;
471 assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
472 __analysis_assume(uStartBit + uNumBits <= 128 && uNumBits <= 8);
474 size_t uIndex = uStartBit >> 3;
475 size_t uBase = uStartBit - (uIndex << 3);
476 if(uBase + uNumBits > 8)
478 size_t uFirstIndexBits = 8 - uBase;
479 size_t uNextIndexBits = uNumBits - uFirstIndexBits;
480 ret = (m_uBits[uIndex] >> uBase) | ((m_uBits[uIndex+1] & ((1 << uNextIndexBits) - 1)) << uFirstIndexBits);
484 ret = (m_uBits[uIndex] >> uBase) & ((1 << uNumBits) - 1);
486 assert(ret < (1 << uNumBits));
487 uStartBit += uNumBits;
491 void SetBit(_Inout_ size_t& uStartBit, _In_ uint8_t uValue)
493 assert(uStartBit < 128 && uValue < 2);
494 __analysis_assume(uStartBit < 128 && uValue < 2);
495 size_t uIndex = uStartBit >> 3;
496 size_t uBase = uStartBit - (uIndex << 3);
497 m_uBits[uIndex] &= ~(1 << uBase);
498 m_uBits[uIndex] |= uValue << uBase;
502 void SetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits, _In_ uint8_t uValue)
506 assert(uStartBit + uNumBits <= 128 && uNumBits <= 8);
507 __analysis_assume(uStartBit + uNumBits <= 128 && uNumBits <= 8);
508 assert(uValue < (1 << uNumBits));
509 size_t uIndex = uStartBit >> 3;
510 size_t uBase = uStartBit - (uIndex << 3);
511 if(uBase + uNumBits > 8)
513 size_t uFirstIndexBits = 8 - uBase;
514 size_t uNextIndexBits = uNumBits - uFirstIndexBits;
515 m_uBits[uIndex] &= ~(((1 << uFirstIndexBits) - 1) << uBase);
516 m_uBits[uIndex] |= uValue << uBase;
517 m_uBits[uIndex+1] &= ~((1 << uNextIndexBits) - 1);
518 m_uBits[uIndex+1] |= uValue >> uFirstIndexBits;
522 m_uBits[uIndex] &= ~(((1 << uNumBits) - 1) << uBase);
523 m_uBits[uIndex] |= uValue << uBase;
525 uStartBit += uNumBits;
529 uint8_t m_uBits[ SizeInBytes ];
532 #pragma warning(push)
533 #pragma warning(disable : 4127 4480 4512)
535 // BC6H compression (16 bits per texel)
536 class D3DX_BC6H : private CBits< 16 >
539 void Decode(_In_ bool bSigned, _Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
540 void Encode(_In_ bool bSigned, _In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
543 enum EField : uint8_t
562 struct ModeDescriptor
574 LDRColorA RGBAPrec[BC6H_MAX_REGIONS][2];
583 const HDRColorA* const aHDRPixels;
584 INTEndPntPair aUnqEndPts[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS];
585 INTColor aIPixels[NUM_PIXELS_PER_BLOCK];
587 EncodeParams(const HDRColorA* const aOriginal, bool bSignedFormat) :
588 aHDRPixels(aOriginal), fBestErr(FLT_MAX), bSigned(bSignedFormat)
590 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
592 aIPixels[i].Set(aOriginal[i], bSigned);
597 static int Quantize(_In_ int iValue, _In_ int prec, _In_ bool bSigned);
598 static int Unquantize(_In_ int comp, _In_ uint8_t uBitsPerComp, _In_ bool bSigned);
599 static int FinishUnquantize(_In_ int comp, _In_ bool bSigned);
601 static bool EndPointsFit(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[]);
603 void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ const INTEndPntPair& endPts,
604 _Out_cap_c_(BC6H_MAX_INDICES) INTColor aPalette[]) const;
605 float MapColorsQuantized(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ const INTEndPntPair &endPts) const;
606 float PerturbOne(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ uint8_t ch,
607 _In_ const INTEndPntPair& oldEndPts, _Out_ INTEndPntPair& newEndPts, _In_ float fOldErr, _In_ int do_b) const;
608 void OptimizeOne(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ float aOrgErr,
609 _In_ const INTEndPntPair &aOrgEndPts, _Out_ INTEndPntPair &aOptEndPts) const;
610 void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const float aOrgErr[],
611 _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aOrgEndPts[],
612 _Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aOptEndPts[]) const;
613 static void SwapIndices(_In_ const EncodeParams* pEP, _Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[],
614 _In_count_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[]);
615 void AssignIndices(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
616 _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[],
617 _Out_cap_c_(BC6H_MAX_REGIONS) float aTotErr[]) const;
618 void QuantizeEndPts(_In_ const EncodeParams* pEP, _Out_cap_c_(BC6H_MAX_REGIONS) INTEndPntPair* qQntEndPts) const;
619 void EmitBlock(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[],
620 _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndices[]);
621 void Refine(_Inout_ EncodeParams* pEP);
623 static void GeneratePaletteUnquantized(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _Out_cap_c_(BC6H_MAX_INDICES) INTColor aPalette[]);
624 float MapColors(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _In_ size_t np, _In_count_(np) const size_t* auIndex) const;
625 float RoughMSE(_Inout_ EncodeParams* pEP) const;
628 const static ModeDescriptor ms_aDesc[][82];
629 const static ModeInfo ms_aInfo[];
630 const static int ms_aModeToInfo[];
633 // BC67 compression (16b bits per texel)
634 class D3DX_BC7 : private CBits< 16 >
637 void Decode(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
638 void Encode(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
644 uint8_t uPartitionBits;
646 uint8_t uRotationBits;
647 uint8_t uIndexModeBits;
651 LDRColorA RGBAPrecWithP;
657 LDREndPntPair aEndPts[BC7_MAX_SHAPES][BC7_MAX_REGIONS];
658 LDRColorA aLDRPixels[NUM_PIXELS_PER_BLOCK];
659 const HDRColorA* const aHDRPixels;
661 EncodeParams(const HDRColorA* const aOriginal) : aHDRPixels(aOriginal) {}
664 static uint8_t Quantize(_In_ uint8_t comp, _In_ uint8_t uPrec)
666 assert(0 < uPrec && uPrec <= 8);
667 uint8_t rnd = (uint8_t) std::min<uint16_t>(255, uint16_t(comp) + (1 << (7 - uPrec)));
668 return rnd >> (8 - uPrec);
671 static LDRColorA Quantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
674 q.r = Quantize(c.r, RGBAPrec.r);
675 q.g = Quantize(c.g, RGBAPrec.g);
676 q.b = Quantize(c.b, RGBAPrec.b);
678 q.a = Quantize(c.a, RGBAPrec.a);
684 static uint8_t Unquantize(_In_ uint8_t comp, _In_ size_t uPrec)
686 assert(0 < uPrec && uPrec <= 8);
687 comp = comp << (8 - uPrec);
688 return comp | (comp >> uPrec);
691 static LDRColorA Unquantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec)
694 q.r = Unquantize(c.r, RGBAPrec.r);
695 q.g = Unquantize(c.g, RGBAPrec.g);
696 q.b = Unquantize(c.b, RGBAPrec.b);
697 q.a = RGBAPrec.a > 0 ? Unquantize(c.a, RGBAPrec.a) : 255;
701 void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ size_t uIndexMode, _In_ const LDREndPntPair& endpts,
702 _Out_cap_c_(BC7_MAX_INDICES) LDRColorA aPalette[]) const;
703 float PerturbOne(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
704 _In_ size_t ch, _In_ const LDREndPntPair &old_endpts,
705 _Out_ LDREndPntPair &new_endpts, _In_ float old_err, _In_ uint8_t do_b) const;
706 void Exhaustive(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
707 _In_ size_t ch, _Inout_ float& fOrgErr, _Inout_ LDREndPntPair& optEndPt) const;
708 void OptimizeOne(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode,
709 _In_ float orig_err, _In_ const LDREndPntPair &orig_endpts, _Out_ LDREndPntPair &opt_endpts) const;
710 void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
711 _In_count_c_(BC7_MAX_REGIONS) const float orig_err[],
712 _In_count_c_(BC7_MAX_REGIONS) const LDREndPntPair orig_endpts[],
713 _Out_cap_c_(BC7_MAX_REGIONS) LDREndPntPair opt_endpts[]) const;
714 void AssignIndices(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode,
715 _In_count_c_(BC7_MAX_REGIONS) LDREndPntPair endpts[],
716 _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices2[],
717 _Out_cap_c_(BC7_MAX_REGIONS) float afTotErr[]) const;
718 void EmitBlock(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode,
719 _In_count_c_(BC7_MAX_REGIONS) const LDREndPntPair aEndPts[],
720 _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndex[],
721 _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndex2[]);
722 float Refine(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode);
724 float MapColors(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode,
725 _In_ const LDREndPntPair& endPts, _In_ float fMinErr) const;
726 static float RoughMSE(_Inout_ EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode);
729 const static ModeInfo ms_aInfo[];
732 //-------------------------------------------------------------------------------------
733 template <bool bRange> void OptimizeAlpha(float *pX, float *pY, const float *pPoints, size_t cSteps)
735 static const float pC6[] = { 5.0f/5.0f, 4.0f/5.0f, 3.0f/5.0f, 2.0f/5.0f, 1.0f/5.0f, 0.0f/5.0f };
736 static const float pD6[] = { 0.0f/5.0f, 1.0f/5.0f, 2.0f/5.0f, 3.0f/5.0f, 4.0f/5.0f, 5.0f/5.0f };
737 static const float pC8[] = { 7.0f/7.0f, 6.0f/7.0f, 5.0f/7.0f, 4.0f/7.0f, 3.0f/7.0f, 2.0f/7.0f, 1.0f/7.0f, 0.0f/7.0f };
738 static const float pD8[] = { 0.0f/7.0f, 1.0f/7.0f, 2.0f/7.0f, 3.0f/7.0f, 4.0f/7.0f, 5.0f/7.0f, 6.0f/7.0f, 7.0f/7.0f };
740 const float *pC = (6 == cSteps) ? pC6 : pC8;
741 const float *pD = (6 == cSteps) ? pD6 : pD8;
743 float MAX_VALUE = 1.0f;
754 // Find Min and Max points, as starting point
755 float fX = MAX_VALUE;
756 float fY = MIN_VALUE;
760 for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
762 if(pPoints[iPoint] < fX)
763 fX = pPoints[iPoint];
765 if(pPoints[iPoint] > fY)
766 fY = pPoints[iPoint];
771 for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
773 if(pPoints[iPoint] < fX && pPoints[iPoint] > MIN_VALUE)
774 fX = pPoints[iPoint];
776 if(pPoints[iPoint] > fY && pPoints[iPoint] < MAX_VALUE)
777 fY = pPoints[iPoint];
786 // Use Newton's Method to find local minima of sum-of-squares error.
787 float fSteps = (float) (cSteps - 1);
789 for(size_t iIteration = 0; iIteration < 8; iIteration++)
793 if((fY - fX) < (1.0f / 256.0f))
796 fScale = fSteps / (fY - fX);
798 // Calculate new steps
801 for(size_t iStep = 0; iStep < cSteps; iStep++)
802 pSteps[iStep] = pC[iStep] * fX + pD[iStep] * fY;
806 pSteps[6] = MIN_VALUE;
807 pSteps[7] = MAX_VALUE;
810 // Evaluate function, and derivatives
816 for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++)
818 float fDot = (pPoints[iPoint] - fX) * fScale;
823 iStep = ((6 == cSteps) && (pPoints[iPoint] <= fX * 0.5f)) ? 6 : 0;
824 else if(fDot >= fSteps)
825 iStep = ((6 == cSteps) && (pPoints[iPoint] >= (fY + 1.0f) * 0.5f)) ? 7 : (cSteps - 1);
827 iStep = static_cast<int32_t>(fDot + 0.5f);
832 // D3DX had this computation backwards (pPoints[iPoint] - pSteps[iStep])
833 // this fix improves RMS of the alpha component
834 float fDiff = pSteps[iStep] - pPoints[iPoint];
836 dX += pC[iStep] * fDiff;
837 d2X += pC[iStep] * pC[iStep];
839 dY += pD[iStep] * fDiff;
840 d2Y += pD[iStep] * pD[iStep];
853 float f = fX; fX = fY; fY = f;
856 if((dX * dX < (1.0f / 64.0f)) && (dY * dY < (1.0f / 64.0f)))
860 *pX = (fX < MIN_VALUE) ? MIN_VALUE : (fX > MAX_VALUE) ? MAX_VALUE : fX;
861 *pY = (fY < MIN_VALUE) ? MIN_VALUE : (fY > MAX_VALUE) ? MAX_VALUE : fY;
866 //-------------------------------------------------------------------------------------
868 //-------------------------------------------------------------------------------------
870 typedef void (*BC_DECODE)(XMVECTOR *pColor, const uint8_t *pBC);
871 typedef void (*BC_ENCODE)(uint8_t *pDXT, const XMVECTOR *pColor, DWORD flags);
873 void D3DXDecodeBC1(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC);
874 void D3DXDecodeBC2(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
875 void D3DXDecodeBC3(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
876 void D3DXDecodeBC4U(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC);
877 void D3DXDecodeBC4S(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC);
878 void D3DXDecodeBC5U(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
879 void D3DXDecodeBC5S(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
880 void D3DXDecodeBC6HU(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
881 void D3DXDecodeBC6HS(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
882 void D3DXDecodeBC7(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC);
884 void D3DXEncodeBC1(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ float alphaRef, _In_ DWORD flags);
885 // BC1 requires one additional parameter, so it doesn't match signature of BC_ENCODE above
887 void D3DXEncodeBC2(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
888 void D3DXEncodeBC3(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
889 void D3DXEncodeBC4U(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
890 void D3DXEncodeBC4S(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
891 void D3DXEncodeBC5U(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
892 void D3DXEncodeBC5S(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
893 void D3DXEncodeBC6HU(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
894 void D3DXEncodeBC6HS(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);
895 void D3DXEncodeBC7(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags);