X-Git-Url: https://git.cworth.org/git?a=blobdiff_plain;f=thirdparty%2Fdirectxtex%2FDirectXTex%2FBC.h;fp=thirdparty%2Fdirectxtex%2FDirectXTex%2FBC.h;h=638058eabf9eb6c6e8d4cc0d47d4283a390e4109;hb=f6a9034a9d6d58ab27b574a0c146a36782762d55;hp=0000000000000000000000000000000000000000;hpb=a4bcf6ae9c4988600a7c4b5b8f9ee37528f342d4;p=apitrace diff --git a/thirdparty/directxtex/DirectXTex/BC.h b/thirdparty/directxtex/DirectXTex/BC.h new file mode 100644 index 0000000..638058e --- /dev/null +++ b/thirdparty/directxtex/DirectXTex/BC.h @@ -0,0 +1,897 @@ +//------------------------------------------------------------------------------------- +// BC.h +// +// Block-compression (BC) functionality +// +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A +// PARTICULAR PURPOSE. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 +//------------------------------------------------------------------------------------- + +#if defined(_MSC_VER) && (_MSC_VER > 1000) +#pragma once +#endif + +#include + +#ifdef USE_XNAMATH +#include +#else +#include +#include +#endif + +#include + +#pragma warning(push) +#pragma warning(disable : 4005) +#include +#pragma warning(pop) + +namespace DirectX +{ + +#ifndef USE_XNAMATH +typedef PackedVector::HALF HALF; +typedef PackedVector::XMHALF4 XMHALF4; +typedef PackedVector::XMU565 XMU565; +#endif + +//------------------------------------------------------------------------------------- +// Constants +//------------------------------------------------------------------------------------- + +const uint16_t F16S_MASK = 0x8000; // f16 sign mask +const uint16_t F16EM_MASK = 0x7fff; // f16 exp & mantissa mask +const uint16_t F16MAX = 0x7bff; // MAXFLT bit pattern for XMHALF + +#define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)) + +// Because these are used in SAL annotations, they need to remain macros rather than const values +#define NUM_PIXELS_PER_BLOCK 16 +#define BC6H_MAX_REGIONS 2 +#define BC6H_MAX_INDICES 16 +#define BC7_MAX_REGIONS 3 +#define BC7_MAX_INDICES 16 + +const size_t BC6H_NUM_CHANNELS = 3; +const size_t BC6H_MAX_SHAPES = 32; + +const size_t BC7_NUM_CHANNELS = 4; +const size_t BC7_MAX_SHAPES = 64; + +const uint32_t BC67_WEIGHT_MAX = 64; +const uint32_t BC67_WEIGHT_SHIFT = 6; +const uint32_t BC67_WEIGHT_ROUND = 32; + +extern const int g_aWeights2[4]; +extern const int g_aWeights3[8]; +extern const int g_aWeights4[16]; + +enum BC_FLAGS +{ + BC_FLAGS_NONE = 0x0, + BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3 + BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3 + BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting +}; + +//------------------------------------------------------------------------------------- +// Structures +//------------------------------------------------------------------------------------- +class HDRColorA; + +class LDRColorA +{ +public: + uint8_t r, g, b, a; + + LDRColorA() {} + LDRColorA(uint8_t _r, uint8_t _g, uint8_t _b, uint8_t _a) : r(_r), g(_g), b(_b), a(_a) {} + + const uint8_t& operator [] (_In_range_(0,3) size_t uElement) const + { + switch(uElement) + { + case 0: return r; + case 1: return g; + case 2: return b; + case 3: return a; + default: assert(false); return r; + } + } + + uint8_t& operator [] (_In_range_(0,3) size_t uElement) + { + switch(uElement) + { + case 0: return r; + case 1: return g; + case 2: return b; + case 3: return a; + default: assert(false); return r; + } + } + + LDRColorA operator = (_In_ const HDRColorA& c); + + static void InterpolateRGB(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wcprec, _Out_ LDRColorA& out) + { + const int* aWeights = nullptr; + switch(wcprec) + { + case 2: aWeights = g_aWeights2; assert( wc < 4 ); __analysis_assume( wc < 4 ); break; + case 3: aWeights = g_aWeights3; assert( wc < 8 ); __analysis_assume( wc < 8 ); break; + case 4: aWeights = g_aWeights4; assert( wc < 16 ); __analysis_assume( wc < 16 ); break; + default: assert(false); out.r = out.g = out.b = 0; return; + } + out.r = uint8_t((uint32_t(c0.r) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.r) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT); + out.g = uint8_t((uint32_t(c0.g) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.g) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT); + out.b = uint8_t((uint32_t(c0.b) * uint32_t(BC67_WEIGHT_MAX - aWeights[wc]) + uint32_t(c1.b) * uint32_t(aWeights[wc]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT); + } + + static void InterpolateA(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wa, _In_ size_t waprec, _Out_ LDRColorA& out) + { + const int* aWeights = nullptr; + switch(waprec) + { + case 2: aWeights = g_aWeights2; assert( wa < 4 ); __analysis_assume( wa < 4 ); break; + case 3: aWeights = g_aWeights3; assert( wa < 8 ); __analysis_assume( wa < 8 ); break; + case 4: aWeights = g_aWeights4; assert( wa < 16 ); __analysis_assume( wa < 16 ); break; + default: assert(false); out.a = 0; return; + } + out.a = uint8_t((uint32_t(c0.a) * uint32_t(BC67_WEIGHT_MAX - aWeights[wa]) + uint32_t(c1.a) * uint32_t(aWeights[wa]) + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT); + } + + static void Interpolate(_In_ const LDRColorA& c0, _In_ const LDRColorA& c1, _In_ size_t wc, _In_ size_t wa, _In_ size_t wcprec, _In_ size_t waprec, _Out_ LDRColorA& out) + { + InterpolateRGB(c0, c1, wc, wcprec, out); + InterpolateA(c0, c1, wa, waprec, out); + } +}; + +class HDRColorA +{ +public: + float r, g, b, a; + +public: + HDRColorA() {} + HDRColorA(float _r, float _g, float _b, float _a) : r(_r), g(_g), b(_b), a(_a) {} + HDRColorA(const HDRColorA& c) : r(c.r), g(c.g), b(c.b), a(c.a) {} + HDRColorA(const LDRColorA& c) + { + r = float(c.r) * (1.0f/255.0f); + g = float(c.g) * (1.0f/255.0f); + b = float(c.b) * (1.0f/255.0f); + a = float(c.a) * (1.0f/255.0f); + } + + // binary operators + HDRColorA operator + ( _In_ const HDRColorA& c ) const + { + return HDRColorA(r + c.r, g + c.g, b + c.b, a + c.a); + } + + HDRColorA operator - ( _In_ const HDRColorA& c ) const + { + return HDRColorA(r - c.r, g - c.g, b - c.b, a - c.a); + } + + HDRColorA operator * ( _In_ float f ) const + { + return HDRColorA(r * f, g * f, b * f, a * f); + } + + HDRColorA operator / ( _In_ float f ) const + { + float fInv = 1.0f / f; + return HDRColorA(r * fInv, g * fInv, b * fInv, a * fInv); + } + + float operator * ( _In_ const HDRColorA& c ) const + { + return r * c.r + g * c.g + b * c.b + a * c.a; + } + + // assignment operators + HDRColorA& operator += ( _In_ const HDRColorA& c ) + { + r += c.r; + g += c.g; + b += c.b; + a += c.a; + return *this; + } + + HDRColorA& operator -= ( _In_ const HDRColorA& c ) + { + r -= c.r; + g -= c.g; + b -= c.b; + a -= c.a; + return *this; + } + + HDRColorA& operator *= ( _In_ float f ) + { + r *= f; + g *= f; + b *= f; + a *= f; + return *this; + } + + HDRColorA& operator /= ( _In_ float f ) + { + float fInv = 1.0f / f; + r *= fInv; + g *= fInv; + b *= fInv; + a *= fInv; + return *this; + } + + HDRColorA& operator = (_In_ const LDRColorA& c) + { + r = (float) c.r; + g = (float) c.g; + b = (float) c.b; + a = (float) c.a; + return *this; + } + + HDRColorA& Clamp(_In_ float fMin, _In_ float fMax) + { + r = std::min(fMax, std::max(fMin, r)); + g = std::min(fMax, std::max(fMin, g)); + b = std::min(fMax, std::max(fMin, b)); + a = std::min(fMax, std::max(fMin, a)); + return *this; + } + + LDRColorA ToLDRColorA() const + { + return LDRColorA((uint8_t) (r + 0.01f), (uint8_t) (g + 0.01f), (uint8_t) (b + 0.01f), (uint8_t) (a + 0.01f)); + } +}; + +inline LDRColorA LDRColorA::operator = (_In_ const HDRColorA& c) +{ + LDRColorA ret; + HDRColorA tmp(c); + tmp = tmp.Clamp(0.0f, 1.0f) * 255.0f; + ret.r = uint8_t(tmp.r + 0.001f); + ret.g = uint8_t(tmp.g + 0.001f); + ret.b = uint8_t(tmp.b + 0.001f); + ret.a = uint8_t(tmp.a + 0.001f); + return ret; +} + +struct LDREndPntPair +{ + LDRColorA A; + LDRColorA B; +}; + +struct HDREndPntPair +{ + HDRColorA A; + HDRColorA B; +}; + +inline HDRColorA* HDRColorALerp(_Out_ HDRColorA *pOut, _In_ const HDRColorA *pC1, _In_ const HDRColorA *pC2, _In_ float s) +{ + pOut->r = pC1->r + s * (pC2->r - pC1->r); + pOut->g = pC1->g + s * (pC2->g - pC1->g); + pOut->b = pC1->b + s * (pC2->b - pC1->b); + pOut->a = pC1->a + s * (pC2->a - pC1->a); + return pOut; +} + +#pragma pack(push,1) +// BC1/DXT1 compression (4 bits per texel) +struct D3DX_BC1 +{ + uint16_t rgb[2]; // 565 colors + uint32_t bitmap; // 2bpp rgb bitmap +}; + +// BC2/DXT2/3 compression (8 bits per texel) +struct D3DX_BC2 +{ + uint32_t bitmap[2]; // 4bpp alpha bitmap + D3DX_BC1 bc1; // BC1 rgb data +}; + +// BC3/DXT4/5 compression (8 bits per texel) +struct D3DX_BC3 +{ + uint8_t alpha[2]; // alpha values + uint8_t bitmap[6]; // 3bpp alpha bitmap + D3DX_BC1 bc1; // BC1 rgb data +}; +#pragma pack(pop) + +class INTColor +{ +public: + int r, g, b; + +public: + INTColor() {} + INTColor(int nr, int ng, int nb) {r = nr; g = ng; b = nb;} + INTColor(const INTColor& c) {r = c.r; g = c.g; b = c.b;} + + INTColor operator - ( _In_ const INTColor& c ) const + { + return INTColor(r - c.r, g - c.g, b - c.b); + } + + INTColor& operator += ( _In_ const INTColor& c ) + { + r += c.r; + g += c.g; + b += c.b; + return *this; + } + + INTColor& operator -= ( _In_ const INTColor& c ) + { + r -= c.r; + g -= c.g; + b -= c.b; + return *this; + } + + INTColor& operator &= ( _In_ const INTColor& c ) + { + r &= c.r; + g &= c.g; + b &= c.b; + return *this; + } + + int& operator [] ( _In_ uint8_t i ) + { + assert(i < sizeof(INTColor) / sizeof(int)); + __analysis_assume(i < sizeof(INTColor) / sizeof(int)); + return ((int*) this)[i]; + } + + void Set(_In_ const HDRColorA& c, _In_ bool bSigned) + { + XMHALF4 aF16; + + XMVECTOR v = XMLoadFloat4( (const XMFLOAT4*)& c ); + XMStoreHalf4( &aF16, v ); + + r = F16ToINT(aF16.x, bSigned); + g = F16ToINT(aF16.y, bSigned); + b = F16ToINT(aF16.z, bSigned); + } + + INTColor& Clamp(_In_ int iMin, _In_ int iMax) + { + r = std::min(iMax, std::max(iMin, r)); + g = std::min(iMax, std::max(iMin, g)); + b = std::min(iMax, std::max(iMin, b)); + return *this; + } + + INTColor& SignExtend(_In_ const LDRColorA& Prec) + { + r = SIGN_EXTEND(r, Prec.r); + g = SIGN_EXTEND(g, Prec.g); + b = SIGN_EXTEND(b, Prec.b); + return *this; + } + + void ToF16(_Out_cap_c_(3) HALF aF16[3], _In_ bool bSigned) const + { + aF16[0] = INT2F16(r, bSigned); + aF16[1] = INT2F16(g, bSigned); + aF16[2] = INT2F16(b, bSigned); + } + +private: + static int F16ToINT(_In_ const HALF& f, _In_ bool bSigned) + { + uint16_t input = *((const uint16_t*) &f); + int out, s; + if(bSigned) + { + s = input & F16S_MASK; + input &= F16EM_MASK; + if(input > F16MAX) out = F16MAX; + else out = input; + out = s ? -out : out; + } + else + { + if(input & F16S_MASK) out = 0; + else out = input; + } + return out; + } + + static HALF INT2F16(_In_ int input, _In_ bool bSigned) + { + HALF h; + uint16_t out; + if(bSigned) + { + int s = 0; + if(input < 0) + { + s = F16S_MASK; + input = -input; + } + out = uint16_t(s | input); + } + else + { + assert(input >= 0 && input <= F16MAX); + out = (uint16_t) input; + } + + *((uint16_t*) &h) = out; + return h; + } +}; + +struct INTEndPntPair +{ + INTColor A; + INTColor B; +}; + +template< size_t SizeInBytes > +class CBits +{ +public: + uint8_t GetBit(_Inout_ size_t& uStartBit) const + { + assert(uStartBit < 128); + __analysis_assume(uStartBit < 128); + size_t uIndex = uStartBit >> 3; + uint8_t ret = (m_uBits[uIndex] >> (uStartBit - (uIndex << 3))) & 0x01; + uStartBit++; + return ret; + } + + uint8_t GetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits) const + { + if(uNumBits == 0) return 0; + assert(uStartBit + uNumBits <= 128 && uNumBits <= 8); + __analysis_assume(uStartBit + uNumBits <= 128 && uNumBits <= 8); + uint8_t ret; + size_t uIndex = uStartBit >> 3; + size_t uBase = uStartBit - (uIndex << 3); + if(uBase + uNumBits > 8) + { + size_t uFirstIndexBits = 8 - uBase; + size_t uNextIndexBits = uNumBits - uFirstIndexBits; + ret = (m_uBits[uIndex] >> uBase) | ((m_uBits[uIndex+1] & ((1 << uNextIndexBits) - 1)) << uFirstIndexBits); + } + else + { + ret = (m_uBits[uIndex] >> uBase) & ((1 << uNumBits) - 1); + } + assert(ret < (1 << uNumBits)); + uStartBit += uNumBits; + return ret; + } + + void SetBit(_Inout_ size_t& uStartBit, _In_ uint8_t uValue) + { + assert(uStartBit < 128 && uValue < 2); + __analysis_assume(uStartBit < 128 && uValue < 2); + size_t uIndex = uStartBit >> 3; + size_t uBase = uStartBit - (uIndex << 3); + m_uBits[uIndex] &= ~(1 << uBase); + m_uBits[uIndex] |= uValue << uBase; + uStartBit++; + } + + void SetBits(_Inout_ size_t& uStartBit, _In_ size_t uNumBits, _In_ uint8_t uValue) + { + if(uNumBits == 0) + return; + assert(uStartBit + uNumBits <= 128 && uNumBits <= 8); + __analysis_assume(uStartBit + uNumBits <= 128 && uNumBits <= 8); + assert(uValue < (1 << uNumBits)); + size_t uIndex = uStartBit >> 3; + size_t uBase = uStartBit - (uIndex << 3); + if(uBase + uNumBits > 8) + { + size_t uFirstIndexBits = 8 - uBase; + size_t uNextIndexBits = uNumBits - uFirstIndexBits; + m_uBits[uIndex] &= ~(((1 << uFirstIndexBits) - 1) << uBase); + m_uBits[uIndex] |= uValue << uBase; + m_uBits[uIndex+1] &= ~((1 << uNextIndexBits) - 1); + m_uBits[uIndex+1] |= uValue >> uFirstIndexBits; + } + else + { + m_uBits[uIndex] &= ~(((1 << uNumBits) - 1) << uBase); + m_uBits[uIndex] |= uValue << uBase; + } + uStartBit += uNumBits; + } + +private: + uint8_t m_uBits[ SizeInBytes ]; +}; + +#pragma warning(push) +#pragma warning(disable : 4127 4480 4512) + +// BC6H compression (16 bits per texel) +class D3DX_BC6H : private CBits< 16 > +{ +public: + void Decode(_In_ bool bSigned, _Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const; + void Encode(_In_ bool bSigned, _In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn); + +private: + enum EField : uint8_t + { + NA, // N/A + M, // Mode + D, // Shape + RW, + RX, + RY, + RZ, + GW, + GX, + GY, + GZ, + BW, + BX, + BY, + BZ, + }; + + struct ModeDescriptor + { + EField m_eField; + uint8_t m_uBit; + }; + + struct ModeInfo + { + uint8_t uMode; + uint8_t uPartitions; + bool bTransformed; + uint8_t uIndexPrec; + LDRColorA RGBAPrec[BC6H_MAX_REGIONS][2]; + }; + + struct EncodeParams + { + float fBestErr; + const bool bSigned; + uint8_t uMode; + uint8_t uShape; + const HDRColorA* const aHDRPixels; + INTEndPntPair aUnqEndPts[BC6H_MAX_SHAPES][BC6H_MAX_REGIONS]; + INTColor aIPixels[NUM_PIXELS_PER_BLOCK]; + + EncodeParams(const HDRColorA* const aOriginal, bool bSignedFormat) : + aHDRPixels(aOriginal), fBestErr(FLT_MAX), bSigned(bSignedFormat) + { + for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i) + { + aIPixels[i].Set(aOriginal[i], bSigned); + } + } + }; + + static int Quantize(_In_ int iValue, _In_ int prec, _In_ bool bSigned); + static int Unquantize(_In_ int comp, _In_ uint8_t uBitsPerComp, _In_ bool bSigned); + static int FinishUnquantize(_In_ int comp, _In_ bool bSigned); + + static bool EndPointsFit(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[]); + + void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ const INTEndPntPair& endPts, + _Out_cap_c_(BC6H_MAX_INDICES) INTColor aPalette[]) const; + float MapColorsQuantized(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ const INTEndPntPair &endPts) const; + float PerturbOne(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ uint8_t ch, + _In_ const INTEndPntPair& oldEndPts, _Out_ INTEndPntPair& newEndPts, _In_ float fOldErr, _In_ int do_b) const; + void OptimizeOne(_In_ const EncodeParams* pEP, _In_count_(np) const INTColor aColors[], _In_ size_t np, _In_ float aOrgErr, + _In_ const INTEndPntPair &aOrgEndPts, _Out_ INTEndPntPair &aOptEndPts) const; + void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const float aOrgErr[], + _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aOrgEndPts[], + _Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aOptEndPts[]) const; + static void SwapIndices(_In_ const EncodeParams* pEP, _Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], + _In_count_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[]); + void AssignIndices(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[], + _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], + _Out_cap_c_(BC6H_MAX_REGIONS) float aTotErr[]) const; + void QuantizeEndPts(_In_ const EncodeParams* pEP, _Out_cap_c_(BC6H_MAX_REGIONS) INTEndPntPair* qQntEndPts) const; + void EmitBlock(_In_ const EncodeParams* pEP, _In_count_c_(BC6H_MAX_REGIONS) const INTEndPntPair aEndPts[], + _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndices[]); + void Refine(_Inout_ EncodeParams* pEP); + + static void GeneratePaletteUnquantized(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _Out_cap_c_(BC6H_MAX_INDICES) INTColor aPalette[]); + float MapColors(_In_ const EncodeParams* pEP, _In_ size_t uRegion, _In_ size_t np, _In_count_(np) const size_t* auIndex) const; + float RoughMSE(_Inout_ EncodeParams* pEP) const; + +private: + const static ModeDescriptor ms_aDesc[][82]; + const static ModeInfo ms_aInfo[]; + const static int ms_aModeToInfo[]; +}; + +// BC67 compression (16b bits per texel) +class D3DX_BC7 : private CBits< 16 > +{ +public: + void Decode(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const; + void Encode(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn); + +private: + struct ModeInfo + { + uint8_t uPartitions; + uint8_t uPartitionBits; + uint8_t uPBits; + uint8_t uRotationBits; + uint8_t uIndexModeBits; + uint8_t uIndexPrec; + uint8_t uIndexPrec2; + LDRColorA RGBAPrec; + LDRColorA RGBAPrecWithP; + }; + + struct EncodeParams + { + uint8_t uMode; + LDREndPntPair aEndPts[BC7_MAX_SHAPES][BC7_MAX_REGIONS]; + LDRColorA aLDRPixels[NUM_PIXELS_PER_BLOCK]; + const HDRColorA* const aHDRPixels; + + EncodeParams(const HDRColorA* const aOriginal) : aHDRPixels(aOriginal) {} + }; + + static uint8_t Quantize(_In_ uint8_t comp, _In_ uint8_t uPrec) + { + assert(0 < uPrec && uPrec <= 8); + uint8_t rnd = (uint8_t) std::min(255, uint16_t(comp) + (1 << (7 - uPrec))); + return rnd >> (8 - uPrec); + } + + static LDRColorA Quantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec) + { + LDRColorA q; + q.r = Quantize(c.r, RGBAPrec.r); + q.g = Quantize(c.g, RGBAPrec.g); + q.b = Quantize(c.b, RGBAPrec.b); + if(RGBAPrec.a) + q.a = Quantize(c.a, RGBAPrec.a); + else + q.a = 255; + return q; + } + + static uint8_t Unquantize(_In_ uint8_t comp, _In_ size_t uPrec) + { + assert(0 < uPrec && uPrec <= 8); + comp = comp << (8 - uPrec); + return comp | (comp >> uPrec); + } + + static LDRColorA Unquantize(_In_ const LDRColorA& c, _In_ const LDRColorA& RGBAPrec) + { + LDRColorA q; + q.r = Unquantize(c.r, RGBAPrec.r); + q.g = Unquantize(c.g, RGBAPrec.g); + q.b = Unquantize(c.b, RGBAPrec.b); + q.a = RGBAPrec.a > 0 ? Unquantize(c.a, RGBAPrec.a) : 255; + return q; + } + + void GeneratePaletteQuantized(_In_ const EncodeParams* pEP, _In_ size_t uIndexMode, _In_ const LDREndPntPair& endpts, + _Out_cap_c_(BC7_MAX_INDICES) LDRColorA aPalette[]) const; + float PerturbOne(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode, + _In_ size_t ch, _In_ const LDREndPntPair &old_endpts, + _Out_ LDREndPntPair &new_endpts, _In_ float old_err, _In_ uint8_t do_b) const; + void Exhaustive(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode, + _In_ size_t ch, _Inout_ float& fOrgErr, _Inout_ LDREndPntPair& optEndPt) const; + void OptimizeOne(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA colors[], _In_ size_t np, _In_ size_t uIndexMode, + _In_ float orig_err, _In_ const LDREndPntPair &orig_endpts, _Out_ LDREndPntPair &opt_endpts) const; + void OptimizeEndPoints(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode, + _In_count_c_(BC7_MAX_REGIONS) const float orig_err[], + _In_count_c_(BC7_MAX_REGIONS) const LDREndPntPair orig_endpts[], + _Out_cap_c_(BC7_MAX_REGIONS) LDREndPntPair opt_endpts[]) const; + void AssignIndices(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode, + _In_count_c_(BC7_MAX_REGIONS) LDREndPntPair endpts[], + _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices[], _Out_cap_c_(NUM_PIXELS_PER_BLOCK) size_t aIndices2[], + _Out_cap_c_(BC7_MAX_REGIONS) float afTotErr[]) const; + void EmitBlock(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode, + _In_count_c_(BC7_MAX_REGIONS) const LDREndPntPair aEndPts[], + _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndex[], + _In_count_c_(NUM_PIXELS_PER_BLOCK) const size_t aIndex2[]); + float Refine(_In_ const EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uRotation, _In_ size_t uIndexMode); + + float MapColors(_In_ const EncodeParams* pEP, _In_count_(np) const LDRColorA aColors[], _In_ size_t np, _In_ size_t uIndexMode, + _In_ const LDREndPntPair& endPts, _In_ float fMinErr) const; + static float RoughMSE(_Inout_ EncodeParams* pEP, _In_ size_t uShape, _In_ size_t uIndexMode); + +private: + const static ModeInfo ms_aInfo[]; +}; + +//------------------------------------------------------------------------------------- +template void OptimizeAlpha(float *pX, float *pY, const float *pPoints, size_t cSteps) +{ + static const float pC6[] = { 5.0f/5.0f, 4.0f/5.0f, 3.0f/5.0f, 2.0f/5.0f, 1.0f/5.0f, 0.0f/5.0f }; + static const float pD6[] = { 0.0f/5.0f, 1.0f/5.0f, 2.0f/5.0f, 3.0f/5.0f, 4.0f/5.0f, 5.0f/5.0f }; + static const float pC8[] = { 7.0f/7.0f, 6.0f/7.0f, 5.0f/7.0f, 4.0f/7.0f, 3.0f/7.0f, 2.0f/7.0f, 1.0f/7.0f, 0.0f/7.0f }; + static const float pD8[] = { 0.0f/7.0f, 1.0f/7.0f, 2.0f/7.0f, 3.0f/7.0f, 4.0f/7.0f, 5.0f/7.0f, 6.0f/7.0f, 7.0f/7.0f }; + + const float *pC = (6 == cSteps) ? pC6 : pC8; + const float *pD = (6 == cSteps) ? pD6 : pD8; + + float MAX_VALUE = 1.0f; + float MIN_VALUE; + if (bRange) + { + MIN_VALUE = -1.0f; + } + else + { + MIN_VALUE = 0.0f; + } + + // Find Min and Max points, as starting point + float fX = MAX_VALUE; + float fY = MIN_VALUE; + + if(8 == cSteps) + { + for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++) + { + if(pPoints[iPoint] < fX) + fX = pPoints[iPoint]; + + if(pPoints[iPoint] > fY) + fY = pPoints[iPoint]; + } + } + else + { + for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++) + { + if(pPoints[iPoint] < fX && pPoints[iPoint] > MIN_VALUE) + fX = pPoints[iPoint]; + + if(pPoints[iPoint] > fY && pPoints[iPoint] < MAX_VALUE) + fY = pPoints[iPoint]; + } + + if (fX == fY) + { + fY = MAX_VALUE; + } + } + + // Use Newton's Method to find local minima of sum-of-squares error. + float fSteps = (float) (cSteps - 1); + + for(size_t iIteration = 0; iIteration < 8; iIteration++) + { + float fScale; + + if((fY - fX) < (1.0f / 256.0f)) + break; + + fScale = fSteps / (fY - fX); + + // Calculate new steps + float pSteps[8]; + + for(size_t iStep = 0; iStep < cSteps; iStep++) + pSteps[iStep] = pC[iStep] * fX + pD[iStep] * fY; + + if(6 == cSteps) + { + pSteps[6] = MIN_VALUE; + pSteps[7] = MAX_VALUE; + } + + // Evaluate function, and derivatives + float dX = 0.0f; + float dY = 0.0f; + float d2X = 0.0f; + float d2Y = 0.0f; + + for(size_t iPoint = 0; iPoint < NUM_PIXELS_PER_BLOCK; iPoint++) + { + float fDot = (pPoints[iPoint] - fX) * fScale; + + size_t iStep; + + if(fDot <= 0.0f) + iStep = ((6 == cSteps) && (pPoints[iPoint] <= fX * 0.5f)) ? 6 : 0; + else if(fDot >= fSteps) + iStep = ((6 == cSteps) && (pPoints[iPoint] >= (fY + 1.0f) * 0.5f)) ? 7 : (cSteps - 1); + else + iStep = static_cast(fDot + 0.5f); + + + if(iStep < cSteps) + { + // D3DX had this computation backwards (pPoints[iPoint] - pSteps[iStep]) + // this fix improves RMS of the alpha component + float fDiff = pSteps[iStep] - pPoints[iPoint]; + + dX += pC[iStep] * fDiff; + d2X += pC[iStep] * pC[iStep]; + + dY += pD[iStep] * fDiff; + d2Y += pD[iStep] * pD[iStep]; + } + } + + // Move endpoints + if(d2X > 0.0f) + fX -= dX / d2X; + + if(d2Y > 0.0f) + fY -= dY / d2Y; + + if(fX > fY) + { + float f = fX; fX = fY; fY = f; + } + + if((dX * dX < (1.0f / 64.0f)) && (dY * dY < (1.0f / 64.0f))) + break; + } + + *pX = (fX < MIN_VALUE) ? MIN_VALUE : (fX > MAX_VALUE) ? MAX_VALUE : fX; + *pY = (fY < MIN_VALUE) ? MIN_VALUE : (fY > MAX_VALUE) ? MAX_VALUE : fY; +} +#pragma warning(pop) + + +//------------------------------------------------------------------------------------- +// Functions +//------------------------------------------------------------------------------------- + +typedef void (*BC_DECODE)(XMVECTOR *pColor, const uint8_t *pBC); +typedef void (*BC_ENCODE)(uint8_t *pDXT, const XMVECTOR *pColor, DWORD flags); + +void D3DXDecodeBC1(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC); +void D3DXDecodeBC2(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); +void D3DXDecodeBC3(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); +void D3DXDecodeBC4U(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC); +void D3DXDecodeBC4S(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(8) const uint8_t *pBC); +void D3DXDecodeBC5U(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); +void D3DXDecodeBC5S(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); +void D3DXDecodeBC6HU(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); +void D3DXDecodeBC6HS(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); +void D3DXDecodeBC7(_Out_cap_c_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_count_c_(16) const uint8_t *pBC); + +void D3DXEncodeBC1(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ float alphaRef, _In_ DWORD flags); + // BC1 requires one additional parameter, so it doesn't match signature of BC_ENCODE above + +void D3DXEncodeBC2(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC3(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC4U(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC4S(_Out_cap_c_(8) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC5U(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC5S(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC6HU(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC6HS(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); +void D3DXEncodeBC7(_Out_cap_c_(16) uint8_t *pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags); + +}; // namespace