git.cworth.org Git - apitrace/blob - thirdparty/directxtex/DirectXTex/BC4BC5.cpp

   1 //-------------------------------------------------------------------------------------
   2 // BC4BC5.cpp
   3 //
   4 // Block-compression (BC) functionality for BC4 and BC5 (DirectX 10 texture compression)
   5 //
   6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
   7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
   8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
   9 // PARTICULAR PURPOSE.
  10 //
  11 // Copyright (c) Microsoft Corporation. All rights reserved.
  12 //
  13 // http://go.microsoft.com/fwlink/?LinkId=248926
  14 //-------------------------------------------------------------------------------------
  15
  16 #include "DirectXTexP.h"
  17
  18 #include "BC.h"
  19
  20 #pragma warning(disable : 4201)
  21
  22 namespace DirectX
  23 {
  24
  25 //------------------------------------------------------------------------------------
  26 // Constants
  27 //------------------------------------------------------------------------------------
  28
  29 // Because these are used in SAL annotations, they need to remain macros rather than const values
  30 #define BLOCK_LEN 4
  31     // length of each block in texel
  32
  33 #define BLOCK_SIZE (BLOCK_LEN * BLOCK_LEN)
  34     // total texels in a 4x4 block.
  35
  36 //------------------------------------------------------------------------------------
  37 // Structures
  38 //-------------------------------------------------------------------------------------
  39
  40 // BC4U/BC5U
  41 struct BC4_UNORM
  42 {
  43     float R(size_t uOffset) const
  44     {
  45         size_t uIndex = GetIndex(uOffset);
  46         return DecodeFromIndex(uIndex);
  47     }
  48
  49     float DecodeFromIndex(size_t uIndex) const
  50     {
  51         if (uIndex == 0)
  52             return red_0 / 255.0f;
  53         if (uIndex == 1)
  54             return red_1 / 255.0f;
  55         float fred_0 = red_0 / 255.0f;
  56         float fred_1 = red_1 / 255.0f;
  57         if (red_0 > red_1)
  58         {
  59             uIndex -= 1;
  60             return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
  61         }
  62         else
  63         {
  64             if (uIndex == 6)
  65                 return 0.0f;
  66             if (uIndex == 7)
  67                 return 1.0f;
  68             uIndex -= 1;
  69             return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
  70         }
  71     }
  72
  73     size_t GetIndex(size_t uOffset) const
  74     {
  75         return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
  76     }
  77
  78     void SetIndex(size_t uOffset, size_t uIndex)
  79     {
  80         data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
  81         data |= ((uint64_t) uIndex << (3*uOffset + 16));
  82     }
  83
  84     union
  85     {
  86         struct
  87         {
  88             uint8_t red_0;
  89             uint8_t red_1;
  90             uint8_t indices[6];
  91         };
  92         uint64_t data;
  93     };
  94 };
  95
  96 // BC4S/BC5S
  97 struct BC4_SNORM
  98 {
  99     float R(size_t uOffset) const
 100     {
 101         size_t uIndex = GetIndex(uOffset);
 102         return DecodeFromIndex(uIndex);
 103     }
 104
 105     float DecodeFromIndex(size_t uIndex) const
 106     {
 107         int8_t sred_0 = (red_0 == -128)? -127 : red_0;
 108         int8_t sred_1 = (red_1 == -128)? -127 : red_1;
 109
 110         if (uIndex == 0)
 111             return sred_0 / 127.0f;
 112         if (uIndex == 1)
 113             return sred_1 / 127.0f;
 114         float fred_0 = sred_0 / 127.0f;
 115         float fred_1 = sred_1 / 127.0f;
 116         if (red_0 > red_1)
 117         {
 118             uIndex -= 1;
 119             return (fred_0 * (7-uIndex) + fred_1 * uIndex) / 7.0f;
 120         }
 121         else
 122         {
 123             if (uIndex == 6)
 124                 return -1.0f;
 125             if (uIndex == 7)
 126                 return 1.0f;
 127             uIndex -= 1;
 128             return (fred_0 * (5-uIndex) + fred_1 * uIndex) / 5.0f;
 129         }
 130     }
 131
 132     size_t GetIndex(size_t uOffset) const
 133     {
 134         return (size_t) ((data >> (3*uOffset + 16)) & 0x07);
 135     }
 136
 137     void SetIndex(size_t uOffset, size_t uIndex)
 138     {
 139         data &= ~((uint64_t) 0x07 << (3*uOffset + 16));
 140         data |= ((uint64_t) uIndex << (3*uOffset + 16));
 141     }
 142
 143     union
 144     {
 145         struct
 146         {
 147             int8_t red_0;
 148             int8_t red_1;
 149             uint8_t indices[6];
 150         };
 151         uint64_t data;
 152     };
 153 };
 154
 155
 156 //-------------------------------------------------------------------------------------
 157 // Convert a floating point value to an 8-bit SNORM
 158 //-------------------------------------------------------------------------------------
 159 static void inline FloatToSNorm( _In_ float fVal, _Out_ int8_t *piSNorm )
 160 {
 161     const uint32_t dwMostNeg = ( 1 << ( 8 * sizeof( int8_t ) - 1 ) );
 162
 163     if( _isnan( fVal ) )
 164         fVal = 0;
 165     else
 166         if( fVal > 1 )
 167             fVal = 1;   // Clamp to 1
 168         else
 169             if( fVal < -1 )
 170                 fVal = -1;      // Clamp to -1
 171
 172     fVal = fVal * (int8_t) ( dwMostNeg - 1 );
 173
 174     if( fVal >= 0 )
 175         fVal += .5f;
 176     else
 177         fVal -= .5f;
 178
 179     *piSNorm = (int8_t) (fVal);
 180 }
 181
 182
 183 //------------------------------------------------------------------------------
 184 static void FindEndPointsBC4U( _In_count_c_(BLOCK_SIZE) const float theTexelsU[], _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1)
 185 {
 186     // The boundary of codec for signed/unsigned format
 187     float MIN_NORM;
 188     float MAX_NORM = 1.0f;
 189     int8_t iStart, iEnd;
 190     size_t i;
 191
 192     MIN_NORM = 0.0f;
 193
 194     // Find max/min of input texels
 195     float fBlockMax = theTexelsU[0];
 196     float fBlockMin = theTexelsU[0];
 197     for (i = 0; i < BLOCK_SIZE; ++i)
 198     {
 199         if (theTexelsU[i]<fBlockMin)
 200         {
 201             fBlockMin = theTexelsU[i];
 202         }
 203         else if (theTexelsU[i]>fBlockMax)
 204         {
 205             fBlockMax = theTexelsU[i];
 206         }
 207     }
 208
 209     //  If there are boundary values in input texels, Should use 4 block-codec to guarantee
 210     //  the exact code of the boundary values.
 211     bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
 212
 213     // Using Optimize
 214     float fStart, fEnd;
 215
 216     if (!bUsing4BlockCodec)
 217     {
 218         OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 8);
 219
 220         iStart = (uint8_t) (fStart * 255.0f);
 221         iEnd   = (uint8_t) (fEnd   * 255.0f);
 222
 223         endpointU_0 = iEnd;
 224         endpointU_1 = iStart;
 225     }
 226     else
 227     {
 228         OptimizeAlpha<false>(&fStart, &fEnd, theTexelsU, 6);
 229
 230         iStart = (uint8_t) (fStart * 255.0f);
 231         iEnd   = (uint8_t) (fEnd   * 255.0f);
 232
 233         endpointU_1 = iEnd;
 234         endpointU_0 = iStart;
 235     }
 236 }
 237
 238 static void FindEndPointsBC4S(_In_count_c_(BLOCK_SIZE) const float theTexelsU[], _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1)
 239 {
 240     //  The boundary of codec for signed/unsigned format
 241     float MIN_NORM;
 242     float MAX_NORM = 1.0f;
 243     int8_t iStart, iEnd;
 244     size_t i;
 245
 246     MIN_NORM = -1.0f;
 247
 248     // Find max/min of input texels
 249     float fBlockMax = theTexelsU[0];
 250     float fBlockMin = theTexelsU[0];
 251     for (i = 0; i < BLOCK_SIZE; ++i)
 252     {
 253         if (theTexelsU[i]<fBlockMin)
 254         {
 255             fBlockMin = theTexelsU[i];
 256         }
 257         else if (theTexelsU[i]>fBlockMax)
 258         {
 259             fBlockMax = theTexelsU[i];
 260         }
 261     }
 262
 263     //  If there are boundary values in input texels, Should use 4 block-codec to guarantee
 264     //  the exact code of the boundary values.
 265     bool bUsing4BlockCodec = ( MIN_NORM == fBlockMin || MAX_NORM == fBlockMax );
 266
 267     // Using Optimize
 268     float fStart, fEnd;
 269
 270     if (!bUsing4BlockCodec)
 271     {
 272         OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 8);
 273
 274         FloatToSNorm(fStart, &iStart);
 275         FloatToSNorm(fEnd, &iEnd);
 276
 277         endpointU_0 = iEnd;
 278         endpointU_1 = iStart;
 279     }
 280     else
 281     {
 282         OptimizeAlpha<true>(&fStart, &fEnd, theTexelsU, 6);
 283
 284         FloatToSNorm(fStart, &iStart);
 285         FloatToSNorm(fEnd, &iEnd);
 286
 287         endpointU_1 = iEnd;
 288         endpointU_0 = iStart;
 289     }
 290 }
 291
 292
 293 //------------------------------------------------------------------------------
 294 static inline void FindEndPointsBC5U( _In_count_c_(BLOCK_SIZE) const float theTexelsU[], _In_count_c_(BLOCK_SIZE) const float theTexelsV[],
 295                                       _Out_ uint8_t &endpointU_0, _Out_ uint8_t &endpointU_1, _Out_ uint8_t &endpointV_0, _Out_ uint8_t &endpointV_1)
 296 {
 297     //Encoding the U and V channel by BC4 codec separately.
 298     FindEndPointsBC4U( theTexelsU, endpointU_0, endpointU_1);
 299     FindEndPointsBC4U( theTexelsV, endpointV_0, endpointV_1);
 300 }
 301
 302 static inline void FindEndPointsBC5S( _In_count_c_(BLOCK_SIZE) const float theTexelsU[], _In_count_c_(BLOCK_SIZE) const float theTexelsV[],
 303                                       _Out_ int8_t &endpointU_0, _Out_ int8_t &endpointU_1, _Out_ int8_t &endpointV_0, _Out_ int8_t &endpointV_1)
 304 {
 305     //Encoding the U and V channel by BC4 codec separately.
 306     FindEndPointsBC4S( theTexelsU, endpointU_0, endpointU_1);
 307     FindEndPointsBC4S( theTexelsV, endpointV_0, endpointV_1);
 308 }
 309
 310
 311 //------------------------------------------------------------------------------
 312 static void FindClosestUNORM(_Inout_ BC4_UNORM* pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
 313 {
 314     float rGradient[8];
 315     int i;
 316     for (i = 0; i < 8; ++i)
 317     {
 318         rGradient[i] = pBC->DecodeFromIndex(i);
 319     }
 320     for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 321     {
 322         size_t uBestIndex = 0;
 323         float fBestDelta = 100000;
 324         for (size_t uIndex = 0; uIndex < 8; uIndex++)
 325         {
 326             float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
 327             if (fCurrentDelta < fBestDelta)
 328             {
 329                 uBestIndex = uIndex;
 330                 fBestDelta = fCurrentDelta;
 331             }
 332         }
 333         pBC->SetIndex(i, uBestIndex);
 334     }
 335 }
 336
 337 static void FindClosestSNORM(_Inout_ BC4_SNORM* pBC, _In_count_c_(NUM_PIXELS_PER_BLOCK) const float theTexelsU[])
 338 {
 339     float rGradient[8];
 340     int i;
 341     for (i = 0; i < 8; ++i)
 342     {
 343         rGradient[i] = pBC->DecodeFromIndex(i);
 344     }
 345     for (i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 346     {
 347         size_t uBestIndex = 0;
 348         float fBestDelta = 100000;
 349         for (size_t uIndex = 0; uIndex < 8; uIndex++)
 350         {
 351             float fCurrentDelta = fabsf(rGradient[uIndex]-theTexelsU[i]);
 352             if (fCurrentDelta < fBestDelta)
 353             {
 354                 uBestIndex = uIndex;
 355                 fBestDelta = fCurrentDelta;
 356             }
 357         }
 358         pBC->SetIndex(i, uBestIndex);
 359     }
 360 }
 361
 362
 363 //=====================================================================================
 364 // Entry points
 365 //=====================================================================================
 366
 367 //-------------------------------------------------------------------------------------
 368 // BC4 Compression
 369 //-------------------------------------------------------------------------------------
 370 void D3DXDecodeBC4U( XMVECTOR *pColor, const uint8_t *pBC )
 371 {
 372     assert( pColor && pBC );
 373     static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
 374
 375     const BC4_UNORM * pBC4 = reinterpret_cast<const BC4_UNORM*>(pBC);
 376
 377     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 378     {
 379         pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
 380     }
 381 }
 382
 383 void D3DXDecodeBC4S(XMVECTOR *pColor, const uint8_t *pBC)
 384 {
 385     assert( pColor && pBC );
 386     static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
 387
 388     const BC4_SNORM * pBC4 = reinterpret_cast<const BC4_SNORM*>(pBC);
 389
 390     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 391     {
 392         pColor[i] = XMVectorSet( pBC4->R(i), 0, 0, 1.0f);
 393     }
 394 }
 395
 396 void D3DXEncodeBC4U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
 397 {
 398     UNREFERENCED_PARAMETER( flags );
 399
 400     assert( pBC && pColor );
 401     static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
 402
 403     memset(pBC, 0, sizeof(BC4_UNORM));
 404     BC4_UNORM * pBC4 = reinterpret_cast<BC4_UNORM*>(pBC);
 405     float theTexelsU[NUM_PIXELS_PER_BLOCK];
 406
 407     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 408     {
 409         theTexelsU[i] = XMVectorGetX( pColor[i] );
 410     }
 411
 412     FindEndPointsBC4U(theTexelsU, pBC4->red_0, pBC4->red_1);
 413     FindClosestUNORM(pBC4, theTexelsU);
 414 }
 415
 416 void D3DXEncodeBC4S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
 417 {
 418     UNREFERENCED_PARAMETER( flags );
 419
 420     assert( pBC && pColor );
 421     static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
 422
 423     memset(pBC, 0, sizeof(BC4_UNORM));
 424     BC4_SNORM * pBC4 = reinterpret_cast<BC4_SNORM*>(pBC);
 425     float theTexelsU[NUM_PIXELS_PER_BLOCK];
 426
 427     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 428     {
 429         theTexelsU[i] = XMVectorGetX( pColor[i] );
 430     }
 431
 432     FindEndPointsBC4S(theTexelsU, pBC4->red_0, pBC4->red_1);
 433     FindClosestSNORM(pBC4, theTexelsU);
 434 }
 435
 436
 437 //-------------------------------------------------------------------------------------
 438 // BC5 Compression
 439 //-------------------------------------------------------------------------------------
 440 void D3DXDecodeBC5U(XMVECTOR *pColor, const uint8_t *pBC)
 441 {
 442     assert( pColor && pBC );
 443     static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
 444
 445     const BC4_UNORM * pBCR = reinterpret_cast<const BC4_UNORM*>(pBC);
 446     const BC4_UNORM * pBCG = reinterpret_cast<const BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
 447
 448     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 449     {
 450         pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
 451     }
 452 }
 453
 454 void D3DXDecodeBC5S(XMVECTOR *pColor, const uint8_t *pBC)
 455 {
 456     assert( pColor && pBC );
 457     static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
 458
 459     const BC4_SNORM * pBCR = reinterpret_cast<const BC4_SNORM*>(pBC);
 460     const BC4_SNORM * pBCG = reinterpret_cast<const BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
 461
 462     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 463     {
 464         pColor[i] = XMVectorSet(pBCR->R(i), pBCG->R(i), 0, 1.0f);
 465     }
 466 }
 467
 468 void D3DXEncodeBC5U( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
 469 {
 470     UNREFERENCED_PARAMETER( flags );
 471
 472     assert( pBC && pColor );
 473     static_assert( sizeof(BC4_UNORM) == 8, "BC4_UNORM should be 8 bytes" );
 474
 475     memset(pBC, 0, sizeof(BC4_UNORM)*2);
 476     BC4_UNORM * pBCR = reinterpret_cast<BC4_UNORM*>(pBC);
 477     BC4_UNORM * pBCG = reinterpret_cast<BC4_UNORM*>(pBC+sizeof(BC4_UNORM));
 478     float theTexelsU[NUM_PIXELS_PER_BLOCK];
 479     float theTexelsV[NUM_PIXELS_PER_BLOCK];
 480
 481     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 482     {
 483         XMFLOAT4A clr;
 484         XMStoreFloat4A( &clr, pColor[i] );
 485         theTexelsU[i] = clr.x;
 486         theTexelsV[i] = clr.y;
 487     }
 488
 489     FindEndPointsBC5U(
 490         theTexelsU,
 491         theTexelsV,
 492         pBCR->red_0,
 493         pBCR->red_1,
 494         pBCG->red_0,
 495         pBCG->red_1);
 496
 497     FindClosestUNORM(pBCR, theTexelsU);
 498     FindClosestUNORM(pBCG, theTexelsV);
 499 }
 500
 501 void D3DXEncodeBC5S( uint8_t *pBC, const XMVECTOR *pColor, DWORD flags )
 502 {
 503     UNREFERENCED_PARAMETER( flags );
 504
 505     assert( pBC && pColor );
 506     static_assert( sizeof(BC4_SNORM) == 8, "BC4_SNORM should be 8 bytes" );
 507
 508     memset(pBC, 0, sizeof(BC4_UNORM)*2);
 509     BC4_SNORM * pBCR = reinterpret_cast<BC4_SNORM*>(pBC);
 510     BC4_SNORM * pBCG = reinterpret_cast<BC4_SNORM*>(pBC+sizeof(BC4_SNORM));
 511     float theTexelsU[NUM_PIXELS_PER_BLOCK];
 512     float theTexelsV[NUM_PIXELS_PER_BLOCK];
 513
 514     for (size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
 515     {
 516         XMFLOAT4A clr;
 517         XMStoreFloat4A( &clr, pColor[i] );
 518         theTexelsU[i] = clr.x;
 519         theTexelsV[i] = clr.y;
 520     }
 521
 522     FindEndPointsBC5S(
 523         theTexelsU,
 524         theTexelsV,
 525         pBCR->red_0,
 526         pBCR->red_1,
 527         pBCG->red_0,
 528         pBCG->red_1);
 529
 530     FindClosestSNORM(pBCR, theTexelsU);
 531     FindClosestSNORM(pBCG, theTexelsV);
 532 }
 533
 534 } // namespace