1 //-------------------------------------------------------------------------------------
4 // Block-compression (BC) functionality for BC6H and BC7 (DirectX 11 texture compression)
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
11 // Copyright (c) Microsoft Corporation. All rights reserved.
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
16 #include "directxtexp.h"
21 using namespace DirectX::PackedVector;
27 //-------------------------------------------------------------------------------------
29 //-------------------------------------------------------------------------------------
31 static const float fEpsilon = (0.25f / 64.0f) * (0.25f / 64.0f);
32 static const float pC3[] = { 2.0f/2.0f, 1.0f/2.0f, 0.0f/2.0f };
33 static const float pD3[] = { 0.0f/2.0f, 1.0f/2.0f, 2.0f/2.0f };
34 static const float pC4[] = { 3.0f/3.0f, 2.0f/3.0f, 1.0f/3.0f, 0.0f/3.0f };
35 static const float pD4[] = { 0.0f/3.0f, 1.0f/3.0f, 2.0f/3.0f, 3.0f/3.0f };
37 const int g_aWeights2[] = {0, 21, 43, 64};
38 const int g_aWeights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
39 const int g_aWeights4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
41 // Partition, Shape, Pixel (index into 4x4 block)
42 static const uint8_t g_aPartitionTable[3][64][16] =
44 { // 1 Region case has no subsets (all 0)
45 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
46 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
47 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
48 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
49 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
50 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
51 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
52 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
54 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
55 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
56 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
57 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
58 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
59 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
60 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
61 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
62 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
63 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
64 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
65 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
66 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
67 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
68 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
69 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
70 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
71 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
72 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
73 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
74 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
75 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
76 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
77 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
78 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
79 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
80 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
81 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
82 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
83 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
84 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
85 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
86 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
87 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
88 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
89 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
90 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
91 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
92 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
93 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
94 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
95 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
96 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
97 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
98 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
99 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
100 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
101 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
102 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
103 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
104 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
105 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
106 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
107 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
108 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
111 { // BC6H/BC7 Partition Set for 2 Subsets
112 { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 0
113 { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 }, // Shape 1
114 { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, // Shape 2
115 { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
116 { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 4
117 { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 5
118 { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
119 { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 7
120 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 8
121 { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 9
122 { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 10
123 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 }, // Shape 11
124 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 12
125 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 13
126 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 14
127 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 15
128 { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, // Shape 16
129 { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 17
130 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 18
131 { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 19
132 { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 20
133 { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 21
134 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 22
135 { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 }, // Shape 23
136 { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 24
137 { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 25
138 { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 }, // Shape 26
139 { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 }, // Shape 27
140 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }, // Shape 28
141 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 29
142 { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 30
143 { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 31
145 // BC7 Partition Set for 2 Subsets (second-half)
146 { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, // Shape 32
147 { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 33
148 { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 }, // Shape 34
149 { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 }, // Shape 35
150 { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, // Shape 36
151 { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 }, // Shape 37
152 { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, // Shape 38
153 { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 }, // Shape 39
154 { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 40
155 { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, // Shape 41
156 { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }, // Shape 42
157 { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }, // Shape 43
158 { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }, // Shape 44
159 { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 }, // Shape 45
160 { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 }, // Shape 46
161 { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, // Shape 47
162 { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, // Shape 48
163 { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, // Shape 49
164 { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 }, // Shape 50
165 { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 }, // Shape 51
166 { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 }, // Shape 52
167 { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 53
168 { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 54
169 { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 }, // Shape 55
170 { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 56
171 { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 }, // Shape 57
172 { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, // Shape 58
173 { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 }, // Shape 59
174 { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 60
175 { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 61
176 { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 }, // Shape 62
177 { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 } // Shape 63
180 { // BC7 Partition Set for 3 Subsets
181 { 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 }, // Shape 0
182 { 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 1
183 { 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 2
184 { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
185 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 4
186 { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 }, // Shape 5
187 { 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
188 { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 7
189 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 8
190 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 9
191 { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 10
192 { 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 }, // Shape 11
193 { 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 }, // Shape 12
194 { 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 13
195 { 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 14
196 { 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 }, // Shape 15
197 { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 }, // Shape 16
198 { 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 }, // Shape 17
199 { 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 18
200 { 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 }, // Shape 19
201 { 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 }, // Shape 20
202 { 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 }, // Shape 21
203 { 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 22
204 { 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 }, // Shape 23
205 { 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 }, // Shape 24
206 { 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 }, // Shape 25
207 { 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 }, // Shape 26
208 { 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 }, // Shape 27
209 { 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 }, // Shape 28
210 { 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 }, // Shape 29
211 { 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 }, // Shape 30
212 { 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 31
213 { 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 32
214 { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 }, // Shape 33
215 { 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 }, // Shape 34
216 { 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 }, // Shape 35
217 { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 }, // Shape 36
218 { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }, // Shape 37
219 { 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 }, // Shape 38
220 { 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 }, // Shape 39
221 { 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 }, // Shape 40
222 { 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 41
223 { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 42
224 { 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 }, // Shape 43
225 { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 }, // Shape 44
226 { 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 }, // Shape 45
227 { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 }, // Shape 46
228 { 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 47
229 { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 }, // Shape 48
230 { 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 }, // Shape 49
231 { 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 }, // Shape 50
232 { 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 51
233 { 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 }, // Shape 52
234 { 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 }, // Shape 53
235 { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 }, // Shape 54
236 { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 55
237 { 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 56
238 { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 }, // Shape 57
239 { 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 }, // Shape 58
240 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 }, // Shape 59
241 { 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 }, // Shape 60
242 { 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 }, // Shape 61
243 { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 62
244 { 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 } // Shape 63
248 // Partition, Shape, Fixup
249 static const uint8_t g_aFixUp[3][64][3] =
251 { // No fix-ups for 1st subset for BC6H or BC7
252 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
253 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
254 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
255 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
256 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
257 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
258 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
259 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
260 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
261 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
262 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
263 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
264 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
265 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
266 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
267 { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}
270 { // BC6H/BC7 Partition Set Fixups for 2 Subsets
271 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
272 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
273 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
274 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
275 { 0,15, 0}, { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0},
276 { 0, 2, 0}, { 0, 8, 0}, { 0, 8, 0}, { 0,15, 0},
277 { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
278 { 0, 8, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
280 // BC7 Partition Set Fixups for 2 Subsets (second-half)
281 { 0,15, 0}, { 0,15, 0}, { 0, 6, 0}, { 0, 8, 0},
282 { 0, 2, 0}, { 0, 8, 0}, { 0,15, 0}, { 0,15, 0},
283 { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
284 { 0, 2, 0}, { 0,15, 0}, { 0,15, 0}, { 0, 6, 0},
285 { 0, 6, 0}, { 0, 2, 0}, { 0, 6, 0}, { 0, 8, 0},
286 { 0,15, 0}, { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0},
287 { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
288 { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0}, { 0,15, 0}
291 { // BC7 Partition Set Fixups for 3 Subsets
292 { 0, 3,15}, { 0, 3, 8}, { 0,15, 8}, { 0,15, 3},
293 { 0, 8,15}, { 0, 3,15}, { 0,15, 3}, { 0,15, 8},
294 { 0, 8,15}, { 0, 8,15}, { 0, 6,15}, { 0, 6,15},
295 { 0, 6,15}, { 0, 5,15}, { 0, 3,15}, { 0, 3, 8},
296 { 0, 3,15}, { 0, 3, 8}, { 0, 8,15}, { 0,15, 3},
297 { 0, 3,15}, { 0, 3, 8}, { 0, 6,15}, { 0,10, 8},
298 { 0, 5, 3}, { 0, 8,15}, { 0, 8, 6}, { 0, 6,10},
299 { 0, 8,15}, { 0, 5,15}, { 0,15,10}, { 0,15, 8},
300 { 0, 8,15}, { 0,15, 3}, { 0, 3,15}, { 0, 5,10},
301 { 0, 6,10}, { 0,10, 8}, { 0, 8, 9}, { 0,15,10},
302 { 0,15, 6}, { 0, 3,15}, { 0,15, 8}, { 0, 5,15},
303 { 0,15, 3}, { 0,15, 6}, { 0,15, 6}, { 0,15, 8},
304 { 0, 3,15}, { 0,15, 3}, { 0, 5,15}, { 0, 5,15},
305 { 0, 5,15}, { 0, 8,15}, { 0, 5,15}, { 0,10,15},
306 { 0, 5,15}, { 0,10,15}, { 0, 8,15}, { 0,13,15},
307 { 0,15, 3}, { 0,12,15}, { 0, 3,15}, { 0, 3, 8}
312 const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] =
315 { M, 0}, { M, 1}, {GY, 4}, {BY, 4}, {BZ, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
316 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
317 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
318 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
319 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
320 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
321 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
322 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
327 { M, 0}, { M, 1}, {GY, 5}, {GZ, 4}, {GZ, 5}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
328 {RW, 5}, {RW, 6}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
329 {GW, 5}, {GW, 6}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
330 {BW, 5}, {BW, 6}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
331 {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
332 {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
333 {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
334 {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
339 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
340 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
341 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
342 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
343 {RW,10}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
344 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
345 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
346 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
351 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
352 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
353 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
354 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
355 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
356 {GW,10}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
357 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 0},
358 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {GY, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
363 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
364 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
365 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
366 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
367 {BY, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
368 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
369 {BW,10}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 1},
370 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {BZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
375 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
376 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
377 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
378 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
379 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
380 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
381 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
382 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
387 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
388 {RW, 5}, {RW, 6}, {RW, 7}, {GZ, 4}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
389 {GW, 5}, {GW, 6}, {GW, 7}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
390 {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 3}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
391 {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
392 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
393 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
394 {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
399 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
400 {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 0}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
401 {GW, 5}, {GW, 6}, {GW, 7}, {GY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
402 {BW, 5}, {BW, 6}, {BW, 7}, {GZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
403 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
404 {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
405 {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
406 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
411 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
412 {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
413 {GW, 5}, {GW, 6}, {GW, 7}, {BY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
414 {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
415 {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
416 {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
417 {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
418 {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
423 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
424 {RW, 5}, {GZ, 4}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
425 {GW, 5}, {GY, 5}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
426 {BW, 5}, {GZ, 5}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
427 {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
428 {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
429 {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
430 {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
435 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
436 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
437 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
438 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
439 {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RX, 9}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
440 {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GX, 9}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
441 {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BX, 9}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
442 {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
447 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
448 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
449 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
450 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
451 {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
452 {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
453 {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
454 {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
459 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
460 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
461 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
462 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
463 {RX, 5}, {RX, 6}, {RX, 7}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
464 {GX, 5}, {GX, 6}, {GX, 7}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
465 {BX, 5}, {BX, 6}, {BX, 7}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
466 {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
471 { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
472 {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
473 {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
474 {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,15},
475 {RW,14}, {RW,13}, {RW,12}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,15},
476 {GW,14}, {GW,13}, {GW,12}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,15},
477 {BW,14}, {BW,13}, {BW,12}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
478 {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
483 // Mode, Partitions, Transformed, IndexPrec, RGBAPrec
484 const D3DX_BC6H::ModeInfo D3DX_BC6H::ms_aInfo[] =
486 {0x00, 1, true, 3, LDRColorA(10,10,10,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 0
487 {0x01, 1, true, 3, LDRColorA( 7, 7, 7,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 1
488 {0x02, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 5, 4, 4,0), LDRColorA(5,4,4,0), LDRColorA(5,4,4,0)}, // Mode 2
489 {0x06, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 5, 4,0), LDRColorA(4,5,4,0), LDRColorA(4,5,4,0)}, // Mode 3
490 {0x0a, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 4, 5,0), LDRColorA(4,4,5,0), LDRColorA(4,4,5,0)}, // Mode 4
491 {0x0e, 1, true, 3, LDRColorA( 9, 9, 9,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 5
492 {0x12, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 6, 5, 5,0), LDRColorA(6,5,5,0), LDRColorA(6,5,5,0)}, // Mode 6
493 {0x16, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 6, 5,0), LDRColorA(5,6,5,0), LDRColorA(5,6,5,0)}, // Mode 7
494 {0x1a, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 5, 6,0), LDRColorA(5,5,6,0), LDRColorA(5,5,6,0)}, // Mode 8
495 {0x1e, 1, false, 3, LDRColorA( 6, 6, 6,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 9
496 {0x03, 0, false, 4, LDRColorA(10,10,10,0), LDRColorA(10,10,10,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 10
497 {0x07, 0, true, 4, LDRColorA(11,11,11,0), LDRColorA( 9, 9, 9,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 11
498 {0x0b, 0, true, 4, LDRColorA(12,12,12,0), LDRColorA( 8, 8, 8,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 12
499 {0x0f, 0, true, 4, LDRColorA(16,16,16,0), LDRColorA( 4, 4, 4,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 13
502 const int D3DX_BC6H::ms_aModeToInfo[] =
538 // BC7 compression: uPartitions, uPartitionBits, uPBits, uRotationBits, uIndexModeBits, uIndexPrec, uIndexPrec2, RGBAPrec, RGBAPrecWithP
539 const D3DX_BC7::ModeInfo D3DX_BC7::ms_aInfo[] =
541 {2, 4, 6, 0, 0, 3, 0, LDRColorA(4,4,4,0), LDRColorA(5,5,5,0)},
542 // Mode 0: Color only, 3 Subsets, RGBP 4441 (unique P-bit), 3-bit indecies, 16 partitions
543 {1, 6, 2, 0, 0, 3, 0, LDRColorA(6,6,6,0), LDRColorA(7,7,7,0)},
544 // Mode 1: Color only, 2 Subsets, RGBP 6661 (shared P-bit), 3-bit indecies, 64 partitions
545 {2, 6, 0, 0, 0, 2, 0, LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)},
546 // Mode 2: Color only, 3 Subsets, RGB 555, 2-bit indecies, 64 partitions
547 {1, 6, 4, 0, 0, 2, 0, LDRColorA(7,7,7,0), LDRColorA(8,8,8,0)},
548 // Mode 3: Color only, 2 Subsets, RGBP 7771 (unique P-bit), 2-bits indecies, 64 partitions
549 {0, 0, 0, 2, 1, 2, 3, LDRColorA(5,5,5,6), LDRColorA(5,5,5,6)},
550 // Mode 4: Color w/ Separate Alpha, 1 Subset, RGB 555, A6, 16x2/16x3-bit indices, 2-bit rotation, 1-bit index selector
551 {0, 0, 0, 2, 0, 2, 2, LDRColorA(7,7,7,8), LDRColorA(7,7,7,8)},
552 // Mode 5: Color w/ Separate Alpha, 1 Subset, RGB 777, A8, 16x2/16x2-bit indices, 2-bit rotation
553 {0, 0, 2, 0, 0, 4, 0, LDRColorA(7,7,7,7), LDRColorA(8,8,8,8)},
554 // Mode 6: Color+Alpha, 1 Subset, RGBAP 77771 (unique P-bit), 16x4-bit indecies
555 {1, 6, 4, 0, 0, 2, 0, LDRColorA(5,5,5,5), LDRColorA(6,6,6,6)}
556 // Mode 7: Color+Alpha, 2 Subsets, RGBAP 55551 (unique P-bit), 2-bit indices, 64 partitions
560 //-------------------------------------------------------------------------------------
562 //-------------------------------------------------------------------------------------
564 inline static void Swap( T& a, T& b )
571 inline static bool IsFixUpOffset(_In_range_(0,2) size_t uPartitions, _In_range_(0,63) size_t uShape, _In_range_(0,15) size_t uOffset)
573 assert(uPartitions < 3 && uShape < 64 && uOffset < 16);
574 __analysis_assume(uPartitions < 3 && uShape < 64 && uOffset < 16);
575 for(size_t p = 0; p <= uPartitions; p++)
577 if(uOffset == g_aFixUp[uPartitions][uShape][p])
585 inline static float ErrorMetricRGB(_In_ const LDRColorA& a, _In_ const LDRColorA& b)
587 float er = float(a.r) - float(b.r);
588 float eg = float(a.g) - float(b.g);
589 float eb = float(a.b) - float(b.b);
590 // weigh the components nonuniformly
594 return er*er + eg*eg + eb*eb;
597 inline static float ErrorMetricAlpha(_In_ const LDRColorA& a, _In_ const LDRColorA& b)
599 float ea = float(a.a) - float(b.a);
603 inline static float ErrorMetric(_In_ const LDRColorA& a, _In_ const LDRColorA& b)
605 return ErrorMetricRGB(a, b) + ErrorMetricAlpha(a, b);
608 inline static void TransformForward(_Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[])
610 aEndPts[0].B -= aEndPts[0].A;
611 aEndPts[1].A -= aEndPts[0].A;
612 aEndPts[1].B -= aEndPts[0].A;
615 inline static void TransformInverse(_Inout_count_c_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], _In_ const LDRColorA& Prec, _In_ bool bSigned)
617 INTColor WrapMask((1 << Prec.r) - 1, (1 << Prec.g) - 1, (1 << Prec.b) - 1);
618 aEndPts[0].B += aEndPts[0].A; aEndPts[0].B &= WrapMask;
619 aEndPts[1].A += aEndPts[0].A; aEndPts[1].A &= WrapMask;
620 aEndPts[1].B += aEndPts[0].A; aEndPts[1].B &= WrapMask;
623 aEndPts[0].B.SignExtend(Prec);
624 aEndPts[1].A.SignExtend(Prec);
625 aEndPts[1].B.SignExtend(Prec);
629 inline static float Norm(_In_ const INTColor& a, _In_ const INTColor& b)
631 float dr = float(a.r) - float(b.r);
632 float dg = float(a.g) - float(b.g);
633 float db = float(a.b) - float(b.b);
634 return dr * dr + dg * dg + db * db;
637 // return # of bits needed to store n. handle signed or unsigned cases properly
638 inline static int NBits(_In_ int n, _In_ bool bIsSigned)
643 return 0; // no bits needed for 0, signed or not
647 for(nb = 0; n; ++nb, n >>= 1);
648 return nb + (bIsSigned ? 1 : 0);
653 for(nb = 0; n < -1; ++nb, n >>= 1) ;
659 //-------------------------------------------------------------------------------------
660 static float OptimizeRGB(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
661 _Out_ HDRColorA* pX, _Out_ HDRColorA* pY,
662 _In_ size_t cSteps, _In_ size_t cPixels, _In_count_(cPixels) const size_t* pIndex)
664 float fError = FLT_MAX;
665 const float *pC = (3 == cSteps) ? pC3 : pC4;
666 const float *pD = (3 == cSteps) ? pD3 : pD4;
668 // Find Min and Max points, as starting point
669 HDRColorA X(1.0f, 1.0f, 1.0f, 0.0f);
670 HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
672 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
674 if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
675 if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
676 if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
677 if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
678 if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
679 if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
688 float fAB = AB.r * AB.r + AB.g * AB.g + AB.b * AB.b;
690 // Single color block.. no need to root-find
693 pX->r = X.r; pX->g = X.g; pX->b = X.b;
694 pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
698 // Try all four axis directions, to determine which diagonal best fits data
699 float fABInv = 1.0f / fAB;
702 Dir.r = AB.r * fABInv;
703 Dir.g = AB.g * fABInv;
704 Dir.b = AB.b * fABInv;
707 Mid.r = (X.r + Y.r) * 0.5f;
708 Mid.g = (X.g + Y.g) * 0.5f;
709 Mid.b = (X.b + Y.b) * 0.5f;
712 fDir[0] = fDir[1] = fDir[2] = fDir[3] = 0.0f;
714 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
717 Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
718 Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
719 Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
722 f = Pt.r + Pt.g + Pt.b; fDir[0] += f * f;
723 f = Pt.r + Pt.g - Pt.b; fDir[1] += f * f;
724 f = Pt.r - Pt.g + Pt.b; fDir[2] += f * f;
725 f = Pt.r - Pt.g - Pt.b; fDir[3] += f * f;
728 float fDirMax = fDir[0];
731 for(size_t iDir = 1; iDir < 4; iDir++)
733 if(fDir[iDir] > fDirMax)
735 fDirMax = fDir[iDir];
740 if(iDirMax & 2) Swap( X.g, Y.g );
741 if(iDirMax & 1) Swap( X.b, Y.b );
743 // Two color block.. no need to root-find
744 if(fAB < 1.0f / 4096.0f)
746 pX->r = X.r; pX->g = X.g; pX->b = X.b;
747 pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
751 // Use Newton's Method to find local minima of sum-of-squares error.
752 float fSteps = (float) (cSteps - 1);
754 for(size_t iIteration = 0; iIteration < 8; iIteration++)
756 // Calculate new steps
759 for(size_t iStep = 0; iStep < cSteps; iStep++)
761 pSteps[iStep].r = X.r * pC[iStep] + Y.r * pD[iStep];
762 pSteps[iStep].g = X.g * pC[iStep] + Y.g * pD[iStep];
763 pSteps[iStep].b = X.b * pC[iStep] + Y.b * pD[iStep];
766 // Calculate color direction
771 float fLen = (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b);
773 if(fLen < (1.0f / 4096.0f))
776 float fScale = fSteps / fLen;
782 // Evaluate function, and derivatives
783 float d2X = 0.0f, d2Y = 0.0f;
784 HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
786 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
788 float fDot = (pPoints[pIndex[iPoint]].r - X.r) * Dir.r +
789 (pPoints[pIndex[iPoint]].g - X.g) * Dir.g +
790 (pPoints[pIndex[iPoint]].b - X.b) * Dir.b;
798 iStep = size_t(fDot + 0.5f);
801 Diff.r = pSteps[iStep].r - pPoints[pIndex[iPoint]].r;
802 Diff.g = pSteps[iStep].g - pPoints[pIndex[iPoint]].g;
803 Diff.b = pSteps[iStep].b - pPoints[pIndex[iPoint]].b;
805 float fC = pC[iStep] * (1.0f / 8.0f);
806 float fD = pD[iStep] * (1.0f / 8.0f);
808 d2X += fC * pC[iStep];
813 d2Y += fD * pD[iStep];
822 float f = -1.0f / d2X;
831 float f = -1.0f / d2Y;
838 if((dX.r * dX.r < fEpsilon) && (dX.g * dX.g < fEpsilon) && (dX.b * dX.b < fEpsilon) &&
839 (dY.r * dY.r < fEpsilon) && (dY.g * dY.g < fEpsilon) && (dY.b * dY.b < fEpsilon))
845 pX->r = X.r; pX->g = X.g; pX->b = X.b;
846 pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
851 //-------------------------------------------------------------------------------------
852 static float OptimizeRGBA(_In_count_c_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
853 _Out_ HDRColorA* pX, _Out_ HDRColorA* pY,
854 _In_ size_t cSteps, _In_ size_t cPixels, _In_count_(cPixels) const size_t* pIndex)
856 float fError = FLT_MAX;
857 const float *pC = (3 == cSteps) ? pC3 : pC4;
858 const float *pD = (3 == cSteps) ? pD3 : pD4;
860 // Find Min and Max points, as starting point
861 HDRColorA X(1.0f, 1.0f, 1.0f, 1.0f);
862 HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
864 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
866 if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
867 if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
868 if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
869 if(pPoints[pIndex[iPoint]].a < X.a) X.a = pPoints[pIndex[iPoint]].a;
870 if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
871 if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
872 if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
873 if(pPoints[pIndex[iPoint]].a > Y.a) Y.a = pPoints[pIndex[iPoint]].a;
877 HDRColorA AB = Y - X;
880 // Single color block.. no need to root-find
888 // Try all four axis directions, to determine which diagonal best fits data
889 float fABInv = 1.0f / fAB;
890 HDRColorA Dir = AB * fABInv;
891 HDRColorA Mid = (X + Y) * 0.5f;
894 fDir[0] = fDir[1] = fDir[2] = fDir[3] = fDir[4] = fDir[5] = fDir[6] = fDir[7] = 0.0f;
896 for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
899 Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
900 Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
901 Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
902 Pt.a = (pPoints[pIndex[iPoint]].a - Mid.a) * Dir.a;
905 f = Pt.r + Pt.g + Pt.b + Pt.a; fDir[0] += f * f;
906 f = Pt.r + Pt.g + Pt.b - Pt.a; fDir[1] += f * f;
907 f = Pt.r + Pt.g - Pt.b + Pt.a; fDir[2] += f * f;
908 f = Pt.r + Pt.g - Pt.b - Pt.a; fDir[3] += f * f;
909 f = Pt.r - Pt.g + Pt.b + Pt.a; fDir[4] += f * f;
910 f = Pt.r - Pt.g + Pt.b - Pt.a; fDir[5] += f * f;
911 f = Pt.r - Pt.g - Pt.b + Pt.a; fDir[6] += f * f;
912 f = Pt.r - Pt.g - Pt.b - Pt.a; fDir[7] += f * f;
915 float fDirMax = fDir[0];
918 for(size_t iDir = 1; iDir < 8; iDir++)
920 if(fDir[iDir] > fDirMax)
922 fDirMax = fDir[iDir];
927 if(iDirMax & 4) Swap(X.g, Y.g);
928 if(iDirMax & 2) Swap(X.b, Y.b);
929 if(iDirMax & 1) Swap(X.a, Y.a);
931 // Two color block.. no need to root-find
932 if(fAB < 1.0f / 4096.0f)
939 // Use Newton's Method to find local minima of sum-of-squares error.
940 float fSteps = (float) (cSteps - 1);
942 for(size_t iIteration = 0; iIteration < 8 && fError > 0.0f; iIteration++)
944 // Calculate new steps
945 HDRColorA pSteps[BC7_MAX_INDICES];
947 LDRColorA aSteps[BC7_MAX_INDICES];
949 lX = (X * 255.0f).ToLDRColorA();
950 lY = (Y * 255.0f).ToLDRColorA();
952 for(size_t iStep = 0; iStep < cSteps; iStep++)
954 pSteps[iStep] = X * pC[iStep] + Y * pD[iStep];
955 //LDRColorA::Interpolate(lX, lY, i, i, wcprec, waprec, aSteps[i]);
958 // Calculate color direction
960 float fLen = Dir * Dir;
961 if(fLen < (1.0f / 4096.0f))
964 float fScale = fSteps / fLen;
967 // Evaluate function, and derivatives
968 float d2X = 0.0f, d2Y = 0.0f;
969 HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
971 for(size_t iPoint = 0; iPoint < cPixels; ++iPoint)
973 float fDot = (pPoints[pIndex[iPoint]] - X) * Dir;
980 iStep = size_t(fDot + 0.5f);
982 HDRColorA Diff = pSteps[iStep] - pPoints[pIndex[iPoint]];
983 float fC = pC[iStep] * (1.0f / 8.0f);
984 float fD = pD[iStep] * (1.0f / 8.0f);
986 d2X += fC * pC[iStep];
989 d2Y += fD * pD[iStep];
996 float f = -1.0f / d2X;
1002 float f = -1.0f / d2Y;
1006 if((dX * dX < fEpsilon) && (dY * dY < fEpsilon))
1016 //-------------------------------------------------------------------------------------
1017 #pragma warning(disable: 4616 6001 6297)
1019 static float ComputeError(_Inout_ const LDRColorA& pixel, _In_count_x_(1 << uIndexPrec) const LDRColorA aPalette[],
1020 _In_ uint8_t uIndexPrec, _In_ uint8_t uIndexPrec2, _Out_opt_ size_t* pBestIndex = nullptr, _Out_opt_ size_t* pBestIndex2 = nullptr)
1022 const size_t uNumIndices = 1 << uIndexPrec;
1023 const size_t uNumIndices2 = 1 << uIndexPrec2;
1024 float fTotalErr = 0;
1025 float fBestErr = FLT_MAX;
1032 if(uIndexPrec2 == 0)
1034 for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
1036 float fErr = ErrorMetric(pixel, aPalette[i]);
1037 if(fErr > fBestErr) // error increased, so we're done searching
1046 fTotalErr += fBestErr;
1050 for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
1052 float fErr = ErrorMetricRGB(pixel, aPalette[i]);
1053 if(fErr > fBestErr) // error increased, so we're done searching
1062 fTotalErr += fBestErr;
1064 for(register size_t i = 0; i < uNumIndices2 && fBestErr > 0; i++)
1066 float fErr = ErrorMetricAlpha(pixel, aPalette[i]);
1067 if(fErr > fBestErr) // error increased, so we're done searching
1076 fTotalErr += fBestErr;
1083 inline static void FillWithErrorColors( _Out_cap_c_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut )
1085 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1088 // Use Magenta in debug as a highly-visible error color
1089 pOut[i] = HDRColorA(1.0f, 0.0f, 1.0f, 1.0f);
1091 // In production use, default to black
1092 pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
1098 //-------------------------------------------------------------------------------------
1100 //-------------------------------------------------------------------------------------
1101 void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const
1105 size_t uStartBit = 0;
1106 uint8_t uMode = GetBits(uStartBit, 2);
1107 if(uMode != 0x00 && uMode != 0x01)
1109 uMode = (GetBits(uStartBit, 3) << 2) | uMode;
1112 assert( uMode < 32 );
1113 __analysis_assume( uMode < 32 );
1115 if ( ms_aModeToInfo[uMode] >= 0 )
1117 assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
1118 __analysis_assume(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
1119 const ModeDescriptor* desc = ms_aDesc[ms_aModeToInfo[uMode]];
1121 assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
1122 __analysis_assume(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
1123 const ModeInfo& info = ms_aInfo[ms_aModeToInfo[uMode]];
1125 INTEndPntPair aEndPts[BC6H_MAX_REGIONS];
1126 memset(aEndPts, 0, BC6H_MAX_REGIONS * 2 * sizeof(INTColor));
1127 uint32_t uShape = 0;
1130 const size_t uHeaderBits = info.uPartitions > 0 ? 82 : 65;
1131 while(uStartBit < uHeaderBits)
1133 size_t uCurBit = uStartBit;
1134 if(GetBit(uStartBit))
1136 switch(desc[uCurBit].m_eField)
1138 case D: uShape |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1139 case RW: aEndPts[0].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1140 case RX: aEndPts[0].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1141 case RY: aEndPts[1].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1142 case RZ: aEndPts[1].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1143 case GW: aEndPts[0].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1144 case GX: aEndPts[0].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1145 case GY: aEndPts[1].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1146 case GZ: aEndPts[1].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1147 case BW: aEndPts[0].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1148 case BX: aEndPts[0].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1149 case BY: aEndPts[1].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1150 case BZ: aEndPts[1].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1154 OutputDebugStringA( "BC6H: Invalid header bits encountered during decoding\n" );
1156 FillWithErrorColors( pOut );
1163 assert( uShape < 64 );
1164 __analysis_assume( uShape < 64 );
1166 // Sign extend necessary end points
1169 aEndPts[0].A.SignExtend(info.RGBAPrec[0][0]);
1171 if(bSigned || info.bTransformed)
1173 assert( info.uPartitions < BC6H_MAX_REGIONS );
1174 __analysis_assume( info.uPartitions < BC6H_MAX_REGIONS );
1175 for(size_t p = 0; p <= info.uPartitions; ++p)
1179 aEndPts[p].A.SignExtend(info.RGBAPrec[p][0]);
1181 aEndPts[p].B.SignExtend(info.RGBAPrec[p][1]);
1185 // Inverse transform the end points
1186 if(info.bTransformed)
1188 TransformInverse(aEndPts, info.RGBAPrec[0][0], bSigned);
1192 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1194 size_t uNumBits = IsFixUpOffset(info.uPartitions, uShape, i) ? info.uIndexPrec-1 : info.uIndexPrec;
1195 if ( uStartBit + uNumBits > 128 )
1198 OutputDebugStringA( "BC6H: Invalid block encountered during decoding\n" );
1200 FillWithErrorColors( pOut );
1203 uint8_t uIndex = GetBits(uStartBit, uNumBits);
1205 if ( uIndex >= ((info.uPartitions > 0) ? 8 : 16) )
1208 OutputDebugStringA( "BC6H: Invalid index encountered during decoding\n" );
1210 FillWithErrorColors( pOut );
1214 size_t uRegion = g_aPartitionTable[info.uPartitions][uShape][i];
1215 assert( uRegion < BC6H_MAX_REGIONS );
1216 __analysis_assume( uRegion < BC6H_MAX_REGIONS );
1218 // Unquantize endpoints and interpolate
1219 int r1 = Unquantize(aEndPts[uRegion].A.r, info.RGBAPrec[0][0].r, bSigned);
1220 int g1 = Unquantize(aEndPts[uRegion].A.g, info.RGBAPrec[0][0].g, bSigned);
1221 int b1 = Unquantize(aEndPts[uRegion].A.b, info.RGBAPrec[0][0].b, bSigned);
1222 int r2 = Unquantize(aEndPts[uRegion].B.r, info.RGBAPrec[0][0].r, bSigned);
1223 int g2 = Unquantize(aEndPts[uRegion].B.g, info.RGBAPrec[0][0].g, bSigned);
1224 int b2 = Unquantize(aEndPts[uRegion].B.b, info.RGBAPrec[0][0].b, bSigned);
1225 const int* aWeights = info.uPartitions > 0 ? g_aWeights3 : g_aWeights4;
1227 fc.r = FinishUnquantize((r1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + r2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1228 fc.g = FinishUnquantize((g1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + g2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1229 fc.b = FinishUnquantize((b1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + b2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1232 fc.ToF16(rgb, bSigned);
1234 pOut[i].r = XMConvertHalfToFloat( rgb[0] );
1235 pOut[i].g = XMConvertHalfToFloat( rgb[1] );
1236 pOut[i].b = XMConvertHalfToFloat( rgb[2] );
1243 OutputDebugStringA( "BC6H: Invalid mode encountered during decoding\n" );
1245 FillWithErrorColors( pOut );
1249 void D3DX_BC6H::Encode(bool bSigned, const HDRColorA* const pIn)
1253 EncodeParams EP(pIn, bSigned);
1255 for(EP.uMode = 0; EP.uMode < ARRAYSIZE(ms_aInfo) && EP.fBestErr > 0; ++EP.uMode)
1257 const uint8_t uShapes = ms_aInfo[EP.uMode].uPartitions ? 32 : 1;
1258 // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
1259 // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
1260 const size_t uItems = std::max<size_t>(1, uShapes >> 2);
1261 float afRoughMSE[BC6H_MAX_SHAPES];
1262 uint8_t auShape[BC6H_MAX_SHAPES];
1264 // pick the best uItems shapes and refine these.
1265 for(EP.uShape = 0; EP.uShape < uShapes; ++EP.uShape)
1267 size_t uShape = EP.uShape;
1268 afRoughMSE[uShape] = RoughMSE(&EP);
1269 auShape[uShape] = static_cast<uint8_t>(uShape);
1272 // Bubble up the first uItems items
1273 for(register size_t i = 0; i < uItems; i++)
1275 for(register size_t j = i + 1; j < uShapes; j++)
1277 if(afRoughMSE[i] > afRoughMSE[j])
1279 Swap(afRoughMSE[i], afRoughMSE[j]);
1280 Swap(auShape[i], auShape[j]);
1285 for(size_t i = 0; i < uItems && EP.fBestErr > 0; i++)
1287 EP.uShape = auShape[i];
1294 //-------------------------------------------------------------------------------------
1295 int D3DX_BC6H::Quantize(int iValue, int prec, bool bSigned)
1297 assert(prec > 1); // didn't bother to make it work for 1
1301 assert(iValue >= -F16MAX && iValue <= F16MAX);
1307 q = (prec >= 16) ? iValue : (iValue << (prec-1)) / (F16MAX+1);
1310 assert (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
1314 assert(iValue >= 0 && iValue <= F16MAX);
1315 q = (prec >= 15) ? iValue : (iValue << prec) / (F16MAX+1);
1316 assert (q >= 0 && q < (1 << prec));
1322 int D3DX_BC6H::Unquantize(int comp, uint8_t uBitsPerComp, bool bSigned)
1327 if(uBitsPerComp >= 16)
1339 if(comp == 0) unq = 0;
1340 else if(comp >= ((1 << (uBitsPerComp - 1)) - 1)) unq = 0x7FFF;
1341 else unq = ((comp << 15) + 0x4000) >> (uBitsPerComp-1);
1348 if(uBitsPerComp >= 15) unq = comp;
1349 else if(comp == 0) unq = 0;
1350 else if(comp == ((1 << uBitsPerComp) - 1)) unq = 0xFFFF;
1351 else unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
1357 int D3DX_BC6H::FinishUnquantize(int comp, bool bSigned)
1361 return (comp < 0) ? -(((-comp) * 31) >> 5) : (comp * 31) >> 5; // scale the magnitude by 31/32
1365 return (comp * 31) >> 6; // scale the magnitude by 31/64
1370 //-------------------------------------------------------------------------------------
1371 bool D3DX_BC6H::EndPointsFit(const EncodeParams* pEP, const INTEndPntPair aEndPts[])
1374 const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
1375 const bool bIsSigned = pEP->bSigned;
1376 const LDRColorA& Prec0 = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1377 const LDRColorA& Prec1 = ms_aInfo[pEP->uMode].RGBAPrec[0][1];
1378 const LDRColorA& Prec2 = ms_aInfo[pEP->uMode].RGBAPrec[1][0];
1379 const LDRColorA& Prec3 = ms_aInfo[pEP->uMode].RGBAPrec[1][1];
1382 aBits[0].r = NBits(aEndPts[0].A.r, bIsSigned);
1383 aBits[0].g = NBits(aEndPts[0].A.g, bIsSigned);
1384 aBits[0].b = NBits(aEndPts[0].A.b, bIsSigned);
1385 aBits[1].r = NBits(aEndPts[0].B.r, bTransformed || bIsSigned);
1386 aBits[1].g = NBits(aEndPts[0].B.g, bTransformed || bIsSigned);
1387 aBits[1].b = NBits(aEndPts[0].B.b, bTransformed || bIsSigned);
1388 if(aBits[0].r > Prec0.r || aBits[1].r > Prec1.r ||
1389 aBits[0].g > Prec0.g || aBits[1].g > Prec1.g ||
1390 aBits[0].b > Prec0.b || aBits[1].b > Prec1.b)
1393 if(ms_aInfo[pEP->uMode].uPartitions)
1395 aBits[2].r = NBits(aEndPts[1].A.r, bTransformed || bIsSigned);
1396 aBits[2].g = NBits(aEndPts[1].A.g, bTransformed || bIsSigned);
1397 aBits[2].b = NBits(aEndPts[1].A.b, bTransformed || bIsSigned);
1398 aBits[3].r = NBits(aEndPts[1].B.r, bTransformed || bIsSigned);
1399 aBits[3].g = NBits(aEndPts[1].B.g, bTransformed || bIsSigned);
1400 aBits[3].b = NBits(aEndPts[1].B.b, bTransformed || bIsSigned);
1402 if(aBits[2].r > Prec2.r || aBits[3].r > Prec3.r ||
1403 aBits[2].g > Prec2.g || aBits[3].g > Prec3.g ||
1404 aBits[2].b > Prec2.b || aBits[3].b > Prec3.b)
1411 void D3DX_BC6H::GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPntPair& endPts, INTColor aPalette[]) const
1414 const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1415 const size_t uNumIndices = 1 << uIndexPrec;
1416 assert( uNumIndices > 0 );
1417 __analysis_assume( uNumIndices > 0 );
1418 const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1421 INTEndPntPair unqEndPts;
1422 unqEndPts.A.r = Unquantize(endPts.A.r, Prec.r, pEP->bSigned);
1423 unqEndPts.A.g = Unquantize(endPts.A.g, Prec.g, pEP->bSigned);
1424 unqEndPts.A.b = Unquantize(endPts.A.b, Prec.b, pEP->bSigned);
1425 unqEndPts.B.r = Unquantize(endPts.B.r, Prec.r, pEP->bSigned);
1426 unqEndPts.B.g = Unquantize(endPts.B.g, Prec.g, pEP->bSigned);
1427 unqEndPts.B.b = Unquantize(endPts.B.b, Prec.b, pEP->bSigned);
1430 const int* aWeights = nullptr;
1433 case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); __analysis_assume(uNumIndices <= 8); break;
1434 case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); __analysis_assume(uNumIndices <= 16); break;
1435 default: assert(false); for(size_t i=0; i < uNumIndices; ++i) aPalette[i] = INTColor(0,0,0); return;
1438 for (size_t i = 0; i < uNumIndices; ++i)
1440 aPalette[i].r = FinishUnquantize(
1441 (unqEndPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1443 aPalette[i].g = FinishUnquantize(
1444 (unqEndPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1446 aPalette[i].b = FinishUnquantize(
1447 (unqEndPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1452 // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
1453 float D3DX_BC6H::MapColorsQuantized(const EncodeParams* pEP, const INTColor aColors[], size_t np, const INTEndPntPair &endPts) const
1457 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1458 const uint8_t uNumIndices = 1 << uIndexPrec;
1459 INTColor aPalette[BC6H_MAX_INDICES];
1460 GeneratePaletteQuantized(pEP, endPts, aPalette);
1463 for(size_t i = 0; i < np; ++i)
1465 float fBestErr = Norm(aColors[i], aPalette[0]);
1466 for(int j = 1; j < uNumIndices && fBestErr > 0; ++j)
1468 float fErr = Norm(aColors[i], aPalette[j]);
1469 if(fErr > fBestErr) break; // error increased, so we're done searching
1470 if(fErr < fBestErr) fBestErr = fErr;
1472 fTotErr += fBestErr;
1477 float D3DX_BC6H::PerturbOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, uint8_t ch,
1478 const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts, float fOldErr, int do_b) const
1484 case 0: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].r; break;
1485 case 1: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].g; break;
1486 case 2: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].b; break;
1487 default: assert(false); newEndPts = oldEndPts; return FLT_MAX;
1489 INTEndPntPair tmpEndPts;
1490 float fMinErr = fOldErr;
1493 // copy real endpoints so we can perturb them
1494 tmpEndPts = newEndPts = oldEndPts;
1496 // do a logarithmic search for the best error for this endpoint (which)
1497 for(int step = 1 << (uPrec-1); step; step >>= 1)
1499 bool bImproved = false;
1500 for(int sign = -1; sign <= 1; sign += 2)
1504 tmpEndPts.A[ch] = newEndPts.A[ch] + sign * step;
1505 if(tmpEndPts.A[ch] < 0 || tmpEndPts.A[ch] >= (1 << uPrec))
1510 tmpEndPts.B[ch] = newEndPts.B[ch] + sign * step;
1511 if(tmpEndPts.B[ch] < 0 || tmpEndPts.B[ch] >= (1 << uPrec))
1515 float fErr = MapColorsQuantized(pEP, aColors, np, tmpEndPts);
1521 beststep = sign * step;
1524 // if this was an improvement, move the endpoint and continue search from there
1528 newEndPts.A[ch] += beststep;
1530 newEndPts.B[ch] += beststep;
1536 void D3DX_BC6H::OptimizeOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, float aOrgErr,
1537 const INTEndPntPair &aOrgEndPts, INTEndPntPair &aOptEndPts) const
1540 float aOptErr = aOrgErr;
1541 aOptEndPts.A = aOrgEndPts.A;
1542 aOptEndPts.B = aOrgEndPts.B;
1544 INTEndPntPair new_a, new_b;
1545 INTEndPntPair newEndPts;
1548 // now optimize each channel separately
1549 for(uint8_t ch = 0; ch < 3; ++ch)
1551 // figure out which endpoint when perturbed gives the most improvement and start there
1552 // if we just alternate, we can easily end up in a local minima
1553 float fErr0 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_a, aOptErr, 0); // perturb endpt A
1554 float fErr1 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_b, aOptErr, 1); // perturb endpt B
1558 if(fErr0 >= aOptErr) continue;
1559 aOptEndPts.A[ch] = new_a.A[ch];
1561 do_b = 1; // do B next
1565 if(fErr1 >= aOptErr) continue;
1566 aOptEndPts.B[ch] = new_b.B[ch];
1568 do_b = 0; // do A next
1571 // now alternate endpoints and keep trying until there is no improvement
1574 float fErr = PerturbOne(pEP, aColors, np, ch, aOptEndPts, newEndPts, aOptErr, do_b);
1578 aOptEndPts.A[ch] = newEndPts.A[ch];
1580 aOptEndPts.B[ch] = newEndPts.B[ch];
1582 do_b = 1 - do_b; // now move the other endpoint
1587 void D3DX_BC6H::OptimizeEndPoints(const EncodeParams* pEP, const float aOrgErr[], const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[]) const
1590 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1591 assert( uPartitions < BC6H_MAX_REGIONS );
1592 __analysis_assume( uPartitions < BC6H_MAX_REGIONS );
1593 INTColor aPixels[NUM_PIXELS_PER_BLOCK];
1595 for(size_t p = 0; p <= uPartitions; ++p)
1597 // collect the pixels in the region
1599 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1601 if(g_aPartitionTable[p][pEP->uShape][i] == p)
1603 aPixels[np++] = pEP->aIPixels[i];
1607 OptimizeOne(pEP, aPixels, np, aOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
1611 // Swap endpoints as needed to ensure that the indices at fix up have a 0 high-order bit
1612 void D3DX_BC6H::SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], size_t aIndices[])
1615 const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1616 const size_t uNumIndices = 1 << ms_aInfo[pEP->uMode].uIndexPrec;
1617 const size_t uHighIndexBit = uNumIndices >> 1;
1619 assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1620 __analysis_assume( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1622 for(size_t p = 0; p <= uPartitions; ++p)
1624 size_t i = g_aFixUp[uPartitions][pEP->uShape][p];
1625 assert(g_aPartitionTable[uPartitions][pEP->uShape][i] == p);
1626 if(aIndices[i] & uHighIndexBit)
1628 // high bit is set, swap the aEndPts and indices for this region
1629 Swap(aEndPts[p].A, aEndPts[p].B);
1631 for(size_t j = 0; j < NUM_PIXELS_PER_BLOCK; ++j)
1632 if(g_aPartitionTable[uPartitions][pEP->uShape][j] == p)
1633 aIndices[j] = uNumIndices - 1 - aIndices[j];
1638 // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
1639 void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndPts[], size_t aIndices[], float aTotErr[]) const
1642 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1643 const uint8_t uNumIndices = 1 << ms_aInfo[pEP->uMode].uIndexPrec;
1645 assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1646 __analysis_assume( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1648 // build list of possibles
1649 INTColor aPalette[BC6H_MAX_REGIONS][BC6H_MAX_INDICES];
1651 for(size_t p = 0; p <= uPartitions; ++p)
1653 GeneratePaletteQuantized(pEP, aEndPts[p], aPalette[p]);
1657 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1659 const uint8_t uRegion = g_aPartitionTable[uPartitions][pEP->uShape][i];
1660 assert( uRegion < BC6H_MAX_REGIONS );
1661 __analysis_assume( uRegion < BC6H_MAX_REGIONS );
1662 float fBestErr = Norm(pEP->aIPixels[i], aPalette[uRegion][0]);
1665 for(uint8_t j = 1; j < uNumIndices && fBestErr > 0; ++j)
1667 float fErr = Norm(pEP->aIPixels[i], aPalette[uRegion][j]);
1668 if(fErr > fBestErr) break; // error increased, so we're done searching
1675 aTotErr[uRegion] += fBestErr;
1679 void D3DX_BC6H::QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* aQntEndPts) const
1681 assert( pEP && aQntEndPts );
1682 const INTEndPntPair* aUnqEndPts = pEP->aUnqEndPts[pEP->uShape];
1683 const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1684 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1685 assert( uPartitions < BC6H_MAX_REGIONS );
1686 __analysis_assume( uPartitions < BC6H_MAX_REGIONS );
1688 for(size_t p = 0; p <= uPartitions; ++p)
1690 aQntEndPts[p].A.r = Quantize(aUnqEndPts[p].A.r, Prec.r, pEP->bSigned);
1691 aQntEndPts[p].A.g = Quantize(aUnqEndPts[p].A.g, Prec.g, pEP->bSigned);
1692 aQntEndPts[p].A.b = Quantize(aUnqEndPts[p].A.b, Prec.b, pEP->bSigned);
1693 aQntEndPts[p].B.r = Quantize(aUnqEndPts[p].B.r, Prec.r, pEP->bSigned);
1694 aQntEndPts[p].B.g = Quantize(aUnqEndPts[p].B.g, Prec.g, pEP->bSigned);
1695 aQntEndPts[p].B.b = Quantize(aUnqEndPts[p].B.b, Prec.b, pEP->bSigned);
1699 void D3DX_BC6H::EmitBlock(const EncodeParams* pEP, const INTEndPntPair aEndPts[], const size_t aIndices[])
1702 const uint8_t uRealMode = ms_aInfo[pEP->uMode].uMode;
1703 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1704 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1705 const size_t uHeaderBits = uPartitions > 0 ? 82 : 65;
1706 const ModeDescriptor* desc = ms_aDesc[pEP->uMode];
1707 size_t uStartBit = 0;
1709 while(uStartBit < uHeaderBits)
1711 switch(desc[uStartBit].m_eField)
1713 case M: SetBit(uStartBit, uint8_t(uRealMode >> desc[uStartBit].m_uBit) & 0x01); break;
1714 case D: SetBit(uStartBit, uint8_t(pEP->uShape >> desc[uStartBit].m_uBit) & 0x01); break;
1715 case RW: SetBit(uStartBit, uint8_t(aEndPts[0].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
1716 case RX: SetBit(uStartBit, uint8_t(aEndPts[0].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
1717 case RY: SetBit(uStartBit, uint8_t(aEndPts[1].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
1718 case RZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
1719 case GW: SetBit(uStartBit, uint8_t(aEndPts[0].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
1720 case GX: SetBit(uStartBit, uint8_t(aEndPts[0].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
1721 case GY: SetBit(uStartBit, uint8_t(aEndPts[1].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
1722 case GZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
1723 case BW: SetBit(uStartBit, uint8_t(aEndPts[0].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
1724 case BX: SetBit(uStartBit, uint8_t(aEndPts[0].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
1725 case BY: SetBit(uStartBit, uint8_t(aEndPts[1].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
1726 case BZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
1727 default: assert(false);
1731 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1733 if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, pEP->uShape, i))
1734 SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aIndices[i] ));
1736 SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aIndices[i] ));
1738 assert(uStartBit == 128);
1741 void D3DX_BC6H::Refine(EncodeParams* pEP)
1744 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1745 assert( uPartitions < BC6H_MAX_REGIONS );
1746 __analysis_assume( uPartitions < BC6H_MAX_REGIONS );
1748 const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
1749 float aOrgErr[BC6H_MAX_REGIONS], aOptErr[BC6H_MAX_REGIONS];
1750 INTEndPntPair aOrgEndPts[BC6H_MAX_REGIONS], aOptEndPts[BC6H_MAX_REGIONS];
1751 size_t aOrgIdx[NUM_PIXELS_PER_BLOCK], aOptIdx[NUM_PIXELS_PER_BLOCK];
1753 QuantizeEndPts(pEP, aOrgEndPts);
1754 AssignIndices(pEP, aOrgEndPts, aOrgIdx, aOrgErr);
1755 SwapIndices(pEP, aOrgEndPts, aOrgIdx);
1757 if(bTransformed) TransformForward(aOrgEndPts);
1758 if(EndPointsFit(pEP, aOrgEndPts))
1760 if(bTransformed) TransformInverse(aOrgEndPts, ms_aInfo[pEP->uMode].RGBAPrec[0][0], pEP->bSigned);
1761 OptimizeEndPoints(pEP, aOrgErr, aOrgEndPts, aOptEndPts);
1762 AssignIndices(pEP, aOptEndPts, aOptIdx, aOptErr);
1763 SwapIndices(pEP, aOptEndPts, aOptIdx);
1765 float fOrgTotErr = 0.0f, fOptTotErr = 0.0f;
1766 for(size_t p = 0; p <= uPartitions; ++p)
1768 fOrgTotErr += aOrgErr[p];
1769 fOptTotErr += aOptErr[p];
1772 if(bTransformed) TransformForward(aOptEndPts);
1773 if(EndPointsFit(pEP, aOptEndPts) && fOptTotErr < fOrgTotErr && fOptTotErr < pEP->fBestErr)
1775 pEP->fBestErr = fOptTotErr;
1776 EmitBlock(pEP, aOptEndPts, aOptIdx);
1778 else if(fOrgTotErr < pEP->fBestErr)
1780 // either it stopped fitting when we optimized it, or there was no improvement
1781 // so go back to the unoptimized endpoints which we know will fit
1782 if(bTransformed) TransformForward(aOrgEndPts);
1783 pEP->fBestErr = fOrgTotErr;
1784 EmitBlock(pEP, aOrgEndPts, aOrgIdx);
1789 void D3DX_BC6H::GeneratePaletteUnquantized(const EncodeParams* pEP, size_t uRegion, INTColor aPalette[])
1792 assert( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1793 __analysis_assume( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1794 const INTEndPntPair& endPts = pEP->aUnqEndPts[pEP->uShape][uRegion];
1795 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1796 const uint8_t uNumIndices = 1 << uIndexPrec;
1797 assert( uNumIndices > 0 );
1798 __analysis_assume( uNumIndices > 0 );
1800 const int* aWeights = nullptr;
1803 case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); __analysis_assume(uNumIndices <= 8); break;
1804 case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); __analysis_assume(uNumIndices <= 16); break;
1805 default: assert(false); for(size_t i = 0; i < uNumIndices; ++i) aPalette[i] = INTColor(0,0,0); return;
1808 for(register size_t i = 0; i < uNumIndices; ++i)
1810 aPalette[i].r = (endPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1811 aPalette[i].g = (endPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1812 aPalette[i].b = (endPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1816 float D3DX_BC6H::MapColors(const EncodeParams* pEP, size_t uRegion, size_t np, const size_t* auIndex) const
1819 const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1820 const uint8_t uNumIndices = 1 << uIndexPrec;
1821 INTColor aPalette[BC6H_MAX_INDICES];
1822 GeneratePaletteUnquantized(pEP, uRegion, aPalette);
1824 float fTotalErr = 0.0f;
1825 for(size_t i = 0; i < np; ++i)
1827 float fBestErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[0]);
1828 for(uint8_t j = 1; j < uNumIndices && fBestErr > 0.0f; ++j)
1830 float fErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[j]);
1831 if(fErr > fBestErr) break; // error increased, so we're done searching
1832 if(fErr < fBestErr) fBestErr = fErr;
1834 fTotalErr += fBestErr;
1840 float D3DX_BC6H::RoughMSE(EncodeParams* pEP) const
1843 assert( pEP->uShape < BC6H_MAX_SHAPES);
1844 __analysis_assume( pEP->uShape < BC6H_MAX_SHAPES);
1846 INTEndPntPair* aEndPts = pEP->aUnqEndPts[pEP->uShape];
1848 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1849 assert( uPartitions < BC6H_MAX_REGIONS );
1850 __analysis_assume( uPartitions < BC6H_MAX_REGIONS );
1852 size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
1854 float fError = 0.0f;
1855 for(size_t p = 0; p <= uPartitions; ++p)
1858 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1860 if(g_aPartitionTable[uPartitions][pEP->uShape][i] == p)
1866 // handle simple cases
1870 aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
1871 aEndPts[p].B = pEP->aIPixels[auPixIdx[0]];
1876 aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
1877 aEndPts[p].B = pEP->aIPixels[auPixIdx[1]];
1882 OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
1883 aEndPts[p].A.Set(epA, pEP->bSigned);
1884 aEndPts[p].B.Set(epB, pEP->bSigned);
1887 aEndPts[p].A.Clamp(-F16MAX, F16MAX);
1888 aEndPts[p].B.Clamp(-F16MAX, F16MAX);
1892 aEndPts[p].A.Clamp(0, F16MAX);
1893 aEndPts[p].B.Clamp(0, F16MAX);
1896 fError += MapColors(pEP, p, np, auPixIdx);
1904 //-------------------------------------------------------------------------------------
1906 //-------------------------------------------------------------------------------------
1907 void D3DX_BC7::Decode(HDRColorA* pOut) const
1912 while(uFirst < 128 && !GetBit(uFirst)) {}
1913 uint8_t uMode = uint8_t(uFirst - 1);
1917 const uint8_t uPartitions = ms_aInfo[uMode].uPartitions;
1918 assert( uPartitions < BC7_MAX_REGIONS );
1919 __analysis_assume( uPartitions < BC7_MAX_REGIONS );
1921 const uint8_t uNumEndPts = (uPartitions + 1) << 1;
1922 const uint8_t uIndexPrec = ms_aInfo[uMode].uIndexPrec;
1923 const uint8_t uIndexPrec2 = ms_aInfo[uMode].uIndexPrec2;
1925 size_t uStartBit = uMode + 1;
1927 uint8_t uShape = GetBits(uStartBit, ms_aInfo[uMode].uPartitionBits);
1928 assert( uShape < BC7_MAX_SHAPES );
1929 __analysis_assume( uShape < BC7_MAX_SHAPES );
1931 uint8_t uRotation = GetBits(uStartBit, ms_aInfo[uMode].uRotationBits);
1932 assert( uRotation < 4 );
1934 uint8_t uIndexMode = GetBits(uStartBit, ms_aInfo[uMode].uIndexModeBits);
1935 assert( uIndexMode < 2 );
1937 LDRColorA c[BC7_MAX_REGIONS << 1];
1938 const LDRColorA RGBAPrec = ms_aInfo[uMode].RGBAPrec;
1939 const LDRColorA RGBAPrecWithP = ms_aInfo[uMode].RGBAPrecWithP;
1941 assert( uNumEndPts <= (BC7_MAX_REGIONS << 1) );
1944 for(i = 0; i < uNumEndPts; i++)
1946 if ( uStartBit + RGBAPrec.r > 128 )
1949 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1951 FillWithErrorColors( pOut );
1955 c[i].r = GetBits(uStartBit, RGBAPrec.r);
1959 for(i = 0; i < uNumEndPts; i++)
1961 if ( uStartBit + RGBAPrec.g > 128 )
1964 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1966 FillWithErrorColors( pOut );
1970 c[i].g = GetBits(uStartBit, RGBAPrec.g);
1974 for(i = 0; i < uNumEndPts; i++)
1976 if ( uStartBit + RGBAPrec.b > 128 )
1979 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1981 FillWithErrorColors( pOut );
1985 c[i].b = GetBits(uStartBit, RGBAPrec.b);
1989 for(i = 0; i < uNumEndPts; i++)
1991 if ( uStartBit + RGBAPrec.a > 128 )
1994 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1996 FillWithErrorColors( pOut );
2000 c[i].a = RGBAPrec.a ? GetBits(uStartBit, RGBAPrec.a) : 255;
2004 assert( ms_aInfo[uMode].uPBits <= 6 );
2005 __analysis_assume( ms_aInfo[uMode].uPBits <= 6 );
2006 for(i = 0; i < ms_aInfo[uMode].uPBits; i++)
2008 if ( uStartBit > 127 )
2011 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2013 FillWithErrorColors( pOut );
2017 P[i] = GetBit(uStartBit);
2020 if(ms_aInfo[uMode].uPBits)
2022 for(i = 0; i < uNumEndPts; i++)
2024 size_t pi = i * ms_aInfo[uMode].uPBits / uNumEndPts;
2025 for(register uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2027 if(RGBAPrec[ch] != RGBAPrecWithP[ch])
2029 c[i][ch] = (c[i][ch] << 1) | P[pi];
2035 for(i = 0; i < uNumEndPts; i++)
2037 c[i] = Unquantize(c[i], RGBAPrecWithP);
2040 uint8_t w1[NUM_PIXELS_PER_BLOCK], w2[NUM_PIXELS_PER_BLOCK];
2042 // read color indices
2043 for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2045 size_t uNumBits = IsFixUpOffset(ms_aInfo[uMode].uPartitions, uShape, i) ? uIndexPrec - 1 : uIndexPrec;
2046 if ( uStartBit + uNumBits > 128 )
2049 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2051 FillWithErrorColors( pOut );
2054 w1[i] = GetBits(uStartBit, uNumBits);
2057 // read alpha indices
2060 for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2062 size_t uNumBits = i ? uIndexPrec2 : uIndexPrec2 - 1;
2063 if ( uStartBit + uNumBits > 128 )
2066 OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2068 FillWithErrorColors( pOut );
2071 w2[i] = GetBits(uStartBit, uNumBits );
2075 for(i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2077 uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2079 if(uIndexPrec2 == 0)
2081 LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w1[i], uIndexPrec, uIndexPrec, outPixel);
2087 LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w2[i], uIndexPrec, uIndexPrec2, outPixel);
2091 LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w2[i], w1[i], uIndexPrec2, uIndexPrec, outPixel);
2097 case 1: Swap(outPixel.r, outPixel.a); break;
2098 case 2: Swap(outPixel.g, outPixel.a); break;
2099 case 3: Swap(outPixel.b, outPixel.a); break;
2102 pOut[i] = HDRColorA(outPixel);
2108 OutputDebugStringA( "BC7: Invalid mode encountered during decoding\n" );
2110 FillWithErrorColors( pOut );
2114 void D3DX_BC7::Encode(const HDRColorA* const pIn)
2118 D3DX_BC7 final = *this;
2119 EncodeParams EP(pIn);
2120 float fMSEBest = FLT_MAX;
2122 for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2124 EP.aLDRPixels[i].r = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].r * 255.0f + 0.01f ) ) );
2125 EP.aLDRPixels[i].g = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].g * 255.0f + 0.01f ) ) );
2126 EP.aLDRPixels[i].b = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].b * 255.0f + 0.01f ) ) );
2127 EP.aLDRPixels[i].a = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].a * 255.0f + 0.01f ) ) );
2130 for(EP.uMode = 0; EP.uMode < 8 && fMSEBest > 0; ++EP.uMode)
2132 const size_t uShapes = 1 << ms_aInfo[EP.uMode].uPartitionBits;
2133 assert( uShapes <= BC7_MAX_SHAPES );
2134 __analysis_assume( uShapes <= BC7_MAX_SHAPES );
2136 const size_t uNumRots = 1 << ms_aInfo[EP.uMode].uRotationBits;
2137 const size_t uNumIdxMode = 1 << ms_aInfo[EP.uMode].uIndexModeBits;
2138 // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
2139 // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
2140 const size_t uItems = std::max<size_t>(1, uShapes >> 2);
2141 float afRoughMSE[BC7_MAX_SHAPES];
2142 size_t auShape[BC7_MAX_SHAPES];
2144 for(size_t r = 0; r < uNumRots && fMSEBest > 0; ++r)
2148 case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
2149 case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
2150 case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
2153 for(size_t im = 0; im < uNumIdxMode && fMSEBest > 0; ++im)
2155 // pick the best uItems shapes and refine these.
2156 for(size_t s = 0; s < uShapes; s++)
2158 afRoughMSE[s] = RoughMSE(&EP, s, im);
2162 // Bubble up the first uItems items
2163 for(size_t i = 0; i < uItems; i++)
2165 for(size_t j = i + 1; j < uShapes; j++)
2167 if(afRoughMSE[i] > afRoughMSE[j])
2169 Swap(afRoughMSE[i], afRoughMSE[j]);
2170 Swap(auShape[i], auShape[j]);
2175 for(size_t i = 0; i < uItems && fMSEBest > 0; i++)
2177 float fMSE = Refine(&EP, auShape[i], r, im);
2188 case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
2189 case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
2190 case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) Swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
2199 //-------------------------------------------------------------------------------------
2200 void D3DX_BC7::GeneratePaletteQuantized(const EncodeParams* pEP, size_t uIndexMode, const LDREndPntPair& endPts, LDRColorA aPalette[]) const
2203 const size_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2204 const size_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2205 const size_t uNumIndices = 1 << uIndexPrec;
2206 const size_t uNumIndices2 = 1 << uIndexPrec2;
2207 assert( uNumIndices > 0 && uNumIndices2 > 0 );
2208 __analysis_assume( uNumIndices > 0 && uNumIndices2 > 0 );
2209 assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2210 __analysis_assume( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2212 LDRColorA a = Unquantize(endPts.A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2213 LDRColorA b = Unquantize(endPts.B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2214 if(uIndexPrec2 == 0)
2216 for(register size_t i = 0; i < uNumIndices; i++)
2217 LDRColorA::Interpolate(a, b, i, i, uIndexPrec, uIndexPrec, aPalette[i]);
2221 for(register size_t i = 0; i < uNumIndices; i++)
2222 LDRColorA::InterpolateRGB(a, b, i, uIndexPrec, aPalette[i]);
2223 for(register size_t i = 0; i < uNumIndices2; i++)
2224 LDRColorA::InterpolateA(a, b, i, uIndexPrec2, aPalette[i]);
2228 float D3DX_BC7::PerturbOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
2229 const LDREndPntPair &oldEndPts, LDREndPntPair &newEndPts, float fOldErr, uint8_t do_b) const
2232 const int prec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
2233 LDREndPntPair tmp_endPts = newEndPts = oldEndPts;
2234 float fMinErr = fOldErr;
2235 uint8_t* pnew_c = (do_b ? &newEndPts.B[ch] : &newEndPts.A[ch]);
2236 uint8_t* ptmp_c = (do_b ? &tmp_endPts.B[ch] : &tmp_endPts.A[ch]);
2238 // do a logarithmic search for the best error for this endpoint (which)
2239 for(int step = 1 << (prec-1); step; step >>= 1)
2241 bool bImproved = false;
2243 for(int sign = -1; sign <= 1; sign += 2)
2245 int tmp = int(*pnew_c) + sign * step;
2246 if(tmp < 0 || tmp >= (1 << prec))
2249 *ptmp_c = (uint8_t) tmp;
2251 float fTotalErr = MapColors(pEP, aColors, np, uIndexMode, tmp_endPts, fMinErr);
2252 if(fTotalErr < fMinErr)
2255 fMinErr = fTotalErr;
2256 beststep = sign * step;
2260 // if this was an improvement, move the endpoint and continue search from there
2262 *pnew_c = uint8_t(int(*pnew_c) + beststep);
2267 // perturb the endpoints at least -3 to 3.
2268 // always ensure endpoint ordering is preserved (no need to overlap the scan)
2269 void D3DX_BC7::Exhaustive(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
2270 float& fOrgErr, LDREndPntPair& optEndPt) const
2273 const uint8_t uPrec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
2274 LDREndPntPair tmpEndPt;
2280 // ok figure out the range of A and B
2281 tmpEndPt = optEndPt;
2282 int alow = std::max<int>(0, int(optEndPt.A[ch]) - delta);
2283 int ahigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.A[ch]) + delta);
2284 int blow = std::max<int>(0, int(optEndPt.B[ch]) - delta);
2285 int bhigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.B[ch]) + delta);
2289 float fBestErr = fOrgErr;
2290 if(optEndPt.A[ch] <= optEndPt.B[ch])
2293 for(int a = alow; a <= ahigh; ++a)
2295 for(int b = std::max<int>(a, blow); b < bhigh; ++b)
2297 tmpEndPt.A[ch] = (uint8_t) a;
2298 tmpEndPt.B[ch] = (uint8_t) b;
2300 float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
2313 for(int b = blow; b < bhigh; ++b)
2315 for(int a = std::max<int>(b, alow); a <= ahigh; ++a)
2317 tmpEndPt.A[ch] = (uint8_t) a;
2318 tmpEndPt.B[ch] = (uint8_t) b;
2320 float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
2331 if(fBestErr < fOrgErr)
2333 optEndPt.A[ch] = (uint8_t) amin;
2334 optEndPt.B[ch] = (uint8_t) bmin;
2339 void D3DX_BC7::OptimizeOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode,
2340 float fOrgErr, const LDREndPntPair& org, LDREndPntPair& opt) const
2344 float fOptErr = fOrgErr;
2347 LDREndPntPair new_a, new_b;
2348 LDREndPntPair newEndPts;
2351 // now optimize each channel separately
2352 for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ++ch)
2354 if(ms_aInfo[pEP->uMode].RGBAPrecWithP[ch] == 0)
2357 // figure out which endpoint when perturbed gives the most improvement and start there
2358 // if we just alternate, we can easily end up in a local minima
2359 float fErr0 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_a, fOptErr, 0); // perturb endpt A
2360 float fErr1 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_b, fOptErr, 1); // perturb endpt B
2362 uint8_t& copt_a = opt.A[ch];
2363 uint8_t& copt_b = opt.B[ch];
2364 uint8_t& cnew_a = new_a.A[ch];
2365 uint8_t& cnew_b = new_a.B[ch];
2369 if(fErr0 >= fOptErr)
2373 do_b = 1; // do B next
2377 if(fErr1 >= fOptErr)
2381 do_b = 0; // do A next
2384 // now alternate endpoints and keep trying until there is no improvement
2387 float fErr = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, newEndPts, fOptErr, do_b);
2395 do_b = 1 - do_b; // now move the other endpoint
2399 // finally, do a small exhaustive search around what we think is the global minima to be sure
2400 for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2401 Exhaustive(pEP, aColors, np, uIndexMode, ch, fOptErr, opt);
2404 void D3DX_BC7::OptimizeEndPoints(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, const float afOrgErr[],
2405 const LDREndPntPair aOrgEndPts[], LDREndPntPair aOptEndPts[]) const
2408 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2409 assert( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
2410 __analysis_assume( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
2412 LDRColorA aPixels[NUM_PIXELS_PER_BLOCK];
2414 for(size_t p = 0; p <= uPartitions; ++p)
2416 // collect the pixels in the region
2418 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2419 if(g_aPartitionTable[uPartitions][uShape][i] == p)
2420 aPixels[np++] = pEP->aLDRPixels[i];
2422 OptimizeOne(pEP, aPixels, np, uIndexMode, afOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
2426 void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, LDREndPntPair endPts[], size_t aIndices[], size_t aIndices2[],
2427 float afTotErr[]) const
2430 assert( uShape < BC7_MAX_SHAPES );
2431 __analysis_assume( uShape < BC7_MAX_SHAPES );
2433 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2434 assert( uPartitions < BC7_MAX_REGIONS );
2435 __analysis_assume( uPartitions < BC7_MAX_REGIONS );
2437 const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2438 const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2439 const uint8_t uNumIndices = 1 << uIndexPrec;
2440 const uint8_t uNumIndices2 = 1 << uIndexPrec2;
2442 assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2443 __analysis_assume( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2445 const uint8_t uHighestIndexBit = uNumIndices >> 1;
2446 const uint8_t uHighestIndexBit2 = uNumIndices2 >> 1;
2447 LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
2449 // build list of possibles
2450 LDREndPntPair adjusted_endPts;
2451 for(size_t p = 0; p <= uPartitions; p++)
2453 GeneratePaletteQuantized(pEP, uIndexMode, endPts[p], aPalette[p]);
2457 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2459 uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2460 assert( uRegion < BC7_MAX_REGIONS );
2461 __analysis_assume( uRegion < BC7_MAX_REGIONS );
2462 afTotErr[uRegion] += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2, &(aIndices[i]), &(aIndices2[i]));
2465 // swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
2466 if(uIndexPrec2 == 0)
2468 for(register size_t p = 0; p <= uPartitions; p++)
2470 if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
2472 Swap(endPts[p].A, endPts[p].B);
2473 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2474 if(g_aPartitionTable[uPartitions][uShape][i] == p)
2475 aIndices[i] = uNumIndices - 1 - aIndices[i];
2477 assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
2482 for(register size_t p = 0; p <= uPartitions; p++)
2484 if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
2486 Swap(endPts[p].A.r, endPts[p].B.r);
2487 Swap(endPts[p].A.g, endPts[p].B.g);
2488 Swap(endPts[p].A.b, endPts[p].B.b);
2489 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2490 if(g_aPartitionTable[uPartitions][uShape][i] == p)
2491 aIndices[i] = uNumIndices - 1 - aIndices[i];
2493 assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
2495 if(aIndices2[0] & uHighestIndexBit2)
2497 Swap(endPts[p].A.a, endPts[p].B.a);
2498 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2499 aIndices2[i] = uNumIndices2 - 1 - aIndices2[i];
2501 assert((aIndices2[0] & uHighestIndexBit2) == 0);
2506 void D3DX_BC7::EmitBlock(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode, const LDREndPntPair aEndPts[], const size_t aIndex[], const size_t aIndex2[])
2509 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2510 assert( uPartitions < BC7_MAX_REGIONS );
2511 __analysis_assume( uPartitions < BC7_MAX_REGIONS );
2513 const size_t uPBits = ms_aInfo[pEP->uMode].uPBits;
2514 const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
2515 const size_t uIndexPrec2 = ms_aInfo[pEP->uMode].uIndexPrec2;
2516 const LDRColorA RGBAPrec = ms_aInfo[pEP->uMode].RGBAPrec;
2517 const LDRColorA RGBAPrecWithP = ms_aInfo[pEP->uMode].RGBAPrecWithP;
2519 size_t uStartBit = 0;
2520 SetBits(uStartBit, pEP->uMode, 0);
2521 SetBits(uStartBit, 1, 1);
2522 SetBits(uStartBit, ms_aInfo[pEP->uMode].uRotationBits, static_cast<uint8_t>( uRotation ));
2523 SetBits(uStartBit, ms_aInfo[pEP->uMode].uIndexModeBits, static_cast<uint8_t>( uIndexMode ));
2524 SetBits(uStartBit, ms_aInfo[pEP->uMode].uPartitionBits, static_cast<uint8_t>( uShape ));
2528 const size_t uNumEP = (1 + uPartitions) << 1;
2529 uint8_t aPVote[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
2530 uint8_t aCount[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
2531 for(uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2534 for(i = 0; i <= uPartitions; i++)
2536 if(RGBAPrec[ch] == RGBAPrecWithP[ch])
2538 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch]);
2539 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch]);
2543 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] >> 1);
2544 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] >> 1);
2545 size_t idx = ep++ * uPBits / uNumEP;
2546 assert(idx < (BC7_MAX_REGIONS << 1));
2547 __analysis_assume(idx < (BC7_MAX_REGIONS << 1));
2548 aPVote[idx] += aEndPts[i].A[ch] & 0x01;
2550 idx = ep++ * uPBits / uNumEP;
2551 assert(idx < (BC7_MAX_REGIONS << 1));
2552 __analysis_assume(idx < (BC7_MAX_REGIONS << 1));
2553 aPVote[idx] += aEndPts[i].B[ch] & 0x01;
2559 for(i = 0; i < uPBits; i++)
2561 SetBits(uStartBit, 1, aPVote[i] > (aCount[i] >> 1) ? 1 : 0);
2566 for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2568 for(i = 0; i <= uPartitions; i++)
2570 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] );
2571 SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] );
2576 const size_t* aI1 = uIndexMode ? aIndex2 : aIndex;
2577 const size_t* aI2 = uIndexMode ? aIndex : aIndex2;
2578 for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2580 if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, uShape, i))
2581 SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aI1[i] ));
2583 SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aI1[i] ));
2586 for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2587 SetBits(uStartBit, i ? uIndexPrec2 : uIndexPrec2 - 1, static_cast<uint8_t>( aI2[i] ));
2589 assert(uStartBit == 128);
2592 float D3DX_BC7::Refine(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode)
2595 assert( uShape < BC7_MAX_SHAPES );
2596 __analysis_assume( uShape < BC7_MAX_SHAPES );
2597 const LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
2599 const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2600 assert( uPartitions < BC7_MAX_REGIONS );
2601 __analysis_assume( uPartitions < BC7_MAX_REGIONS );
2603 LDREndPntPair aOrgEndPts[BC7_MAX_REGIONS];
2604 LDREndPntPair aOptEndPts[BC7_MAX_REGIONS];
2605 size_t aOrgIdx[NUM_PIXELS_PER_BLOCK];
2606 size_t aOrgIdx2[NUM_PIXELS_PER_BLOCK];
2607 size_t aOptIdx[NUM_PIXELS_PER_BLOCK];
2608 size_t aOptIdx2[NUM_PIXELS_PER_BLOCK];
2609 float aOrgErr[BC7_MAX_REGIONS];
2610 float aOptErr[BC7_MAX_REGIONS];
2612 for(register size_t p = 0; p <= uPartitions; p++)
2614 aOrgEndPts[p].A = Quantize(aEndPts[p].A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2615 aOrgEndPts[p].B = Quantize(aEndPts[p].B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2618 AssignIndices(pEP, uShape, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2, aOrgErr);
2619 OptimizeEndPoints(pEP, uShape, uIndexMode, aOrgErr, aOrgEndPts, aOptEndPts);
2620 AssignIndices(pEP, uShape, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2, aOptErr);
2622 float fOrgTotErr = 0, fOptTotErr = 0;
2623 for(register size_t p = 0; p <= uPartitions; p++)
2625 fOrgTotErr += aOrgErr[p];
2626 fOptTotErr += aOptErr[p];
2628 if(fOptTotErr < fOrgTotErr)
2630 EmitBlock(pEP, uShape, uRotation, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2);
2635 EmitBlock(pEP, uShape, uRotation, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2);
2640 float D3DX_BC7::MapColors(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, const LDREndPntPair& endPts, float fMinErr) const
2643 const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2644 const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2645 LDRColorA aPalette[BC7_MAX_INDICES];
2646 float fTotalErr = 0;
2648 GeneratePaletteQuantized(pEP, uIndexMode, endPts, aPalette);
2649 for(register size_t i = 0; i < np; ++i)
2651 fTotalErr += ComputeError(aColors[i], aPalette, uIndexPrec, uIndexPrec2);
2652 if(fTotalErr > fMinErr) // check for early exit
2654 fTotalErr = FLT_MAX;
2662 float D3DX_BC7::RoughMSE(EncodeParams* pEP, size_t uShape, size_t uIndexMode)
2665 assert( uShape < BC7_MAX_SHAPES );
2666 __analysis_assume( uShape < BC7_MAX_SHAPES );
2667 LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
2669 const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2670 assert( uPartitions < BC7_MAX_REGIONS );
2671 __analysis_assume( uPartitions < BC7_MAX_REGIONS );
2673 const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2674 const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2675 const uint8_t uNumIndices = 1 << uIndexPrec;
2676 const uint8_t uNumIndices2 = 1 << uIndexPrec2;
2677 size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
2678 LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
2680 for(size_t p = 0; p <= uPartitions; p++)
2683 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2685 if (g_aPartitionTable[uPartitions][uShape][i] == p)
2691 // handle simple cases
2695 aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
2696 aEndPts[p].B = pEP->aLDRPixels[auPixIdx[0]];
2701 aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
2702 aEndPts[p].B = pEP->aLDRPixels[auPixIdx[1]];
2706 if(uIndexPrec2 == 0)
2709 OptimizeRGBA(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
2710 epA.Clamp(0.0f, 1.0f);
2711 epB.Clamp(0.0f, 1.0f);
2714 aEndPts[p].A = epA.ToLDRColorA();
2715 aEndPts[p].B = epB.ToLDRColorA();
2719 uint8_t uMinAlpha = 255, uMaxAlpha = 0;
2720 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2722 uMinAlpha = std::min<uint8_t>(uMinAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
2723 uMaxAlpha = std::max<uint8_t>(uMaxAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
2727 OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
2728 epA.Clamp(0.0f, 1.0f);
2729 epB.Clamp(0.0f, 1.0f);
2732 aEndPts[p].A = epA.ToLDRColorA();
2733 aEndPts[p].B = epB.ToLDRColorA();
2734 aEndPts[p].A.a = uMinAlpha;
2735 aEndPts[p].B.a = uMaxAlpha;
2739 if(uIndexPrec2 == 0)
2741 for(size_t p = 0; p <= uPartitions; p++)
2742 for(register size_t i = 0; i < uNumIndices; i++)
2743 LDRColorA::Interpolate(aEndPts[p].A, aEndPts[p].B, i, i, uIndexPrec, uIndexPrec, aPalette[p][i]);
2747 for(size_t p = 0; p <= uPartitions; p++)
2749 for(register size_t i = 0; i < uNumIndices; i++)
2750 LDRColorA::InterpolateRGB(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec, aPalette[p][i]);
2751 for(register size_t i = 0; i < uNumIndices2; i++)
2752 LDRColorA::InterpolateA(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec2, aPalette[p][i]);
2756 float fTotalErr = 0;
2757 for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2759 uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2760 fTotalErr += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2);
2766 //=====================================================================================
2768 //=====================================================================================
2770 //-------------------------------------------------------------------------------------
2772 //-------------------------------------------------------------------------------------
2773 void D3DXDecodeBC6HU(XMVECTOR *pColor, const uint8_t *pBC)
2775 assert( pColor && pBC );
2776 static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2777 reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(false, reinterpret_cast<HDRColorA*>(pColor));
2780 void D3DXDecodeBC6HS(XMVECTOR *pColor, const uint8_t *pBC)
2782 assert( pColor && pBC );
2783 static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2784 reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(true, reinterpret_cast<HDRColorA*>(pColor));
2787 void D3DXEncodeBC6HU(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2789 UNREFERENCED_PARAMETER(flags);
2790 assert( pBC && pColor );
2791 static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2792 reinterpret_cast< D3DX_BC6H* >( pBC )->Encode(false, reinterpret_cast<const HDRColorA*>(pColor));
2795 void D3DXEncodeBC6HS(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2797 UNREFERENCED_PARAMETER(flags);
2798 assert( pBC && pColor );
2799 static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2800 reinterpret_cast< D3DX_BC6H* >( pBC )->Encode(true, reinterpret_cast<const HDRColorA*>(pColor));
2804 //-------------------------------------------------------------------------------------
2806 //-------------------------------------------------------------------------------------
2807 void D3DXDecodeBC7(XMVECTOR *pColor, const uint8_t *pBC)
2809 assert( pColor && pBC );
2810 static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
2811 reinterpret_cast< const D3DX_BC7* >( pBC )->Decode(reinterpret_cast<HDRColorA*>(pColor));
2814 void D3DXEncodeBC7(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2816 UNREFERENCED_PARAMETER(flags);
2817 assert( pBC && pColor );
2818 static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
2819 reinterpret_cast< D3DX_BC7* >( pBC )->Encode(reinterpret_cast<const HDRColorA*>(pColor));