1 //-------------------------------------------------------------------------------------
4 // Block-compression (BC) functionality for BC4 and BC5 (DirectX 10 texture compression)
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
11 // Copyright (c) Microsoft Corporation. All rights reserved.
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
16 #include "DirectXTexP.h"
20 #pragma warning(disable : 4201)
25 //------------------------------------------------------------------------------------
27 //------------------------------------------------------------------------------------
29 // Because these are used in SAL annotations, they need to remain macros rather than const values
31 // length of each block in texel
33 #define BLOCK_SIZE (BLOCK_LEN * BLOCK_LEN)
34 // total texels in a 4x4 block.
36 //------------------------------------------------------------------------------------
38 //-------------------------------------------------------------------------------------
43 float R(size_t uOffset) const
45 size_t uIndex = GetIndex(uOffset);
46 return DecodeFromIndex(uIndex);
49 float DecodeFromIndex(size_t uIndex) const
52 return red_0 / 255.0f;
54 return red_1 / 255.0f;
55 float fred_0 = red_0 / 255.0f;
56 float fred_1 = red_1 / 255.0f;
60 return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
69 return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
73 size_t GetIndex(size_t uOffset) const
75 return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
78 void SetIndex(size_t uOffset, size_t uIndex)
80 data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
81 data |= ((uint64_t) uIndex << (3*uOffset + 16));
99 float R(size_t uOffset) const
101 size_t uIndex = GetIndex(uOffset);
102 return DecodeFromIndex(uIndex);
105 float DecodeFromIndex(size_t uIndex) const
107 int8_t sred_0 = (red_0 == -128)? -127 : red_0;
108 int8_t sred_1 = (red_1 == -128)? -127 : red_1;
111 return sred_0 / 127.0f;
113 return sred_1 / 127.0f;
114 float fred_0 = sred_0 / 127.0f;
115 float fred_1 = sred_1 / 127.0f;
119 return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
128 return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
132 size_t GetIndex(size_t uOffset) const
134 return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
137 void SetIndex(size_t uOffset, size_t uIndex)
139 data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
140 data |= ((uint64_t) uIndex << (3*uOffset + 16));
156 //-------------------------------------------------------------------------------------
157 // Convert a floating point value to an 8-bit SNORM
158 //-------------------------------------------------------------------------------------
159 static void inline FloatToSNorm( _In_ float fVal, _Out_ int8_t *piSNorm )
161 const uint32_t dwMostNeg = ( 1 << ( 8 * sizeof( int8_t ) - 1 ) );
167 fVal = 1; // Clamp to 1
170 fVal = -1; // Clamp to -1
172 fVal = fVal * (int8_t) ( dwMostNeg - 1 );
179 *piSNorm = (int8_t) (fVal);
183 //------------------------------------------------------------------------------
184 static void FindEndPointsBC4U( _In_count_c_(BLOCK_SIZE) const float theTexelsU[], _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1)
186 // The boundary of codec for signed/unsigned format
188 float MAX_NORM = 1.0f;
194 // Find max/min of input texels
195 float fBlockMax = theTexelsU[0];
196 float fBlockMin = theTexelsU[0];
197 for (i = 0; i < BLOCK_SIZE; ++i)
199 if (theTexelsU[i]<fBlockMin)
201 fBlockMin = theTexelsU[i];
203 else if (theTexelsU[i]>fBlockMax)
205 fBlockMax = theTexelsU[i];
209 // If there are boundary values in input texels, Should use 4 block-codec to guarantee
210 // the exact code of the boundary values.
211 bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
216 if (!bUsing4BlockCodec)
218 OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 8);
220 iStart = (uint8_t) (fStart * 255.0f);
221 iEnd = (uint8_t) (fEnd * 255.0f);
224 endpointU_1 = iStart;
228 OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 6);
230 iStart = (uint8_t) (fStart * 255.0f);
231 iEnd = (uint8_t) (fEnd * 255.0f);
234 endpointU_0 = iStart;
238 static void FindEndPointsBC4S(_In_count_c_(BLOCK_SIZE) const float theTexelsU[], _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1)
240 // The boundary of codec for signed/unsigned format
242 float MAX_NORM = 1.0f;
248 // Find max/min of input texels
249 float fBlockMax = theTexelsU[0];
250 float fBlockMin = theTexelsU[0];
251 for (i = 0; i < BLOCK_SIZE; ++i)
253 if (theTexelsU[i]<fBlockMin)
255 fBlockMin = theTexelsU[i];
257 else if (theTexelsU[i]>fBlockMax)
259 fBlockMax = theTexelsU[i];
263 // If there are boundary values in input texels, Should use 4 block-codec to guarantee
264 // the exact code of the boundary values.
265 bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
270 if (!bUsing4BlockCodec)
272 OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 8);
274 FloatToSNorm(fStart, &iStart);
275 FloatToSNorm(fEnd, &iEnd);
278 endpointU_1 = iStart;
282 OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 6);
284 FloatToSNorm(fStart, &iStart);
285 FloatToSNorm(fEnd, &iEnd);
288 endpointU_0 = iStart;
293 //------------------------------------------------------------------------------
294 static inline void FindEndPointsBC5U( _In_count_c_(BLOCK_SIZE) const float theTexelsU[], _In_count_c_(BLOCK_SIZE) const float theTexelsV[],
295 _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1, _Out_ uint8_t &endpointV_0, _Out_ uint8_t &endpointV_1)
297 //Encoding the U and V channel by BC4 codec separately.
298 FindEndPointsBC4U( theTexelsU, endpointU_0, endpointU_1);
299 FindEndPointsBC4U( theTexelsV, endpointV_0, endpointV_1);
302 static inline void FindEndPointsBC5S( _In_count_c_(BLOCK_SIZE) const float theTexelsU[], _In_count_c_(BLOCK_SIZE) const float theTexelsV[],
303 _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1, _Out_ int8_t &endpointV_0, _Out_ int8_t &endpointV_1)
305 //Encoding the U and V channel by BC4 codec separately.
306 FindEndPointsBC4S( theTexelsU, endpointU_0, endpointU_1);
307 FindEndPointsBC4S( theTexelsV, endpointV_0, endpointV_1);
311 //------------------------------------------------------------------------------
312 static void FindClosestUNORM(_Inout_ BC4_UNORM* pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
316 for (i = 0; i < 8; ++i)
318 rGradient[i] = pBC->DecodeFromIndex(i);
320 for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
322 size_t uBestIndex = 0;
323 float fBestDelta = 100000;
324 for (size_t uIndex = 0; uIndex < 8; uIndex++)
326 float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
327 if (fCurrentDelta < fBestDelta)
330 fBestDelta = fCurrentDelta;
333 pBC->SetIndex(i, uBestIndex);
337 static void FindClosestSNORM(_Inout_ BC4_SNORM* pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
341 for (i = 0; i < 8; ++i)
343 rGradient[i] = pBC->DecodeFromIndex(i);
345 for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
347 size_t uBestIndex = 0;
348 float fBestDelta = 100000;
349 for (size_t uIndex = 0; uIndex < 8; uIndex++)
351 float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
352 if (fCurrentDelta < fBestDelta)
355 fBestDelta = fCurrentDelta;
358 pBC->SetIndex(i, uBestIndex);
363 //=====================================================================================
365 //=====================================================================================
367 //-------------------------------------------------------------------------------------
369 //-------------------------------------------------------------------------------------
370 void D3DXDecodeBC4U( XMVECTOR *pColor, const uint8_t *pBC )
372 assert( pColor && pBC );
373 static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
375 const BC4_UNORM * pBC4 = reinterpret_cast<const BC4_UNORM*>(pBC);
377 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
379 pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
383 void D3DXDecodeBC4S(XMVECTOR *pColor, const uint8_t *pBC)
385 assert( pColor && pBC );
386 static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
388 const BC4_SNORM * pBC4 = reinterpret_cast<const BC4_SNORM*>(pBC);
390 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
392 pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
396 void D3DXEncodeBC4U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
398 UNREFERENCED_PARAMETER( flags );
400 assert( pBC && pColor );
401 static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
403 memset(pBC, 0, sizeof(BC4_UNORM));
404 BC4_UNORM * pBC4 = reinterpret_cast<BC4_UNORM*>(pBC);
405 float theTexelsU[NUM_PIXELS_PER_BLOCK];
407 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
409 theTexelsU[i] = XMVectorGetX( pColor[i] );
412 FindEndPointsBC4U(theTexelsU, pBC4->red_0, pBC4->red_1);
413 FindClosestUNORM(pBC4, theTexelsU);
416 void D3DXEncodeBC4S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
418 UNREFERENCED_PARAMETER( flags );
420 assert( pBC && pColor );
421 static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
423 memset(pBC, 0, sizeof(BC4_UNORM));
424 BC4_SNORM * pBC4 = reinterpret_cast<BC4_SNORM*>(pBC);
425 float theTexelsU[NUM_PIXELS_PER_BLOCK];
427 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
429 theTexelsU[i] = XMVectorGetX( pColor[i] );
432 FindEndPointsBC4S(theTexelsU, pBC4->red_0, pBC4->red_1);
433 FindClosestSNORM(pBC4, theTexelsU);
437 //-------------------------------------------------------------------------------------
439 //-------------------------------------------------------------------------------------
440 void D3DXDecodeBC5U(XMVECTOR *pColor, const uint8_t *pBC)
442 assert( pColor && pBC );
443 static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
445 const BC4_UNORM * pBCR = reinterpret_cast<const BC4_UNORM*>(pBC);
446 const BC4_UNORM * pBCG = reinterpret_cast<const BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
448 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
450 pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
454 void D3DXDecodeBC5S(XMVECTOR *pColor, const uint8_t *pBC)
456 assert( pColor && pBC );
457 static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
459 const BC4_SNORM * pBCR = reinterpret_cast<const BC4_SNORM*>(pBC);
460 const BC4_SNORM * pBCG = reinterpret_cast<const BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
462 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
464 pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
468 void D3DXEncodeBC5U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
470 UNREFERENCED_PARAMETER( flags );
472 assert( pBC && pColor );
473 static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
475 memset(pBC, 0, sizeof(BC4_UNORM)*2);
476 BC4_UNORM * pBCR = reinterpret_cast<BC4_UNORM*>(pBC);
477 BC4_UNORM * pBCG = reinterpret_cast<BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
478 float theTexelsU[NUM_PIXELS_PER_BLOCK];
479 float theTexelsV[NUM_PIXELS_PER_BLOCK];
481 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
484 XMStoreFloat4A( &clr, pColor[i] );
485 theTexelsU[i] = clr.x;
486 theTexelsV[i] = clr.y;
497 FindClosestUNORM(pBCR, theTexelsU);
498 FindClosestUNORM(pBCG, theTexelsV);
501 void D3DXEncodeBC5S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
503 UNREFERENCED_PARAMETER( flags );
505 assert( pBC && pColor );
506 static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
508 memset(pBC, 0, sizeof(BC4_UNORM)*2);
509 BC4_SNORM * pBCR = reinterpret_cast<BC4_SNORM*>(pBC);
510 BC4_SNORM * pBCG = reinterpret_cast<BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
511 float theTexelsU[NUM_PIXELS_PER_BLOCK];
512 float theTexelsV[NUM_PIXELS_PER_BLOCK];
514 for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
517 XMStoreFloat4A( &clr, pColor[i] );
518 theTexelsU[i] = clr.x;
519 theTexelsV[i] = clr.y;
530 FindClosestSNORM(pBCR, theTexelsU);
531 FindClosestSNORM(pBCG, theTexelsV);