1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 // File: vogl_dxt1.cpp
30 // This class is not optimized for performance on small blocks, unlike typical DXT1 compressors. It's optimized for scalability and quality:
31 // - Very high quality in terms of avg. RMSE or Luma RMSE. Goal is to always match or beat every other known offline DXTc compressor: ATI_Compress, squish, NVidia texture tools, nvdxt.exe, etc.
32 // - Reasonable scalability and stability with hundreds to many thousands of input colors (including inputs with many thousands of equal/nearly equal colors).
33 // - Any quality optimization which results in even a tiny improvement is worth it -- as long as it's either a constant or linear slowdown.
34 // Tiny quality improvements can be extremely valuable in large clusters.
35 // - Quality should scale well vs. CPU time cost, i.e. the more time you spend the higher the quality.
36 #include "vogl_core.h"
37 #include "vogl_dxt1.h"
38 #include "vogl_ryg_dxt.hpp"
39 #include "vogl_dxt_fast.h"
40 #include "vogl_intersect.h"
41 #include "vogl_vec_interval.h"
45 //-----------------------------------------------------------------------------------------------------------------------------------------
47 static const int16 g_fast_probe_table[] = { 0, 1, 2, 3 };
48 static const uint cFastProbeTableSize = sizeof(g_fast_probe_table) / sizeof(g_fast_probe_table[0]);
50 static const int16 g_normal_probe_table[] = { 0, 1, 3, 5, 7 };
51 static const uint cNormalProbeTableSize = sizeof(g_normal_probe_table) / sizeof(g_normal_probe_table[0]);
53 static const int16 g_better_probe_table[] = { 0, 1, 2, 3, 5, 9, 15, 19, 27, 43 };
54 static const uint cBetterProbeTableSize = sizeof(g_better_probe_table) / sizeof(g_better_probe_table[0]);
56 static const int16 g_uber_probe_table[] = { 0, 1, 2, 3, 5, 7, 9, 10, 13, 15, 19, 27, 43, 59, 91 };
57 static const uint cUberProbeTableSize = sizeof(g_uber_probe_table) / sizeof(g_uber_probe_table[0]);
59 //-----------------------------------------------------------------------------------------------------------------------------------------
61 dxt1_endpoint_optimizer::dxt1_endpoint_optimizer()
66 m_has_color_weighting(false),
67 m_all_pixels_grayscale(false)
69 m_low_coords.reserve(512);
70 m_high_coords.reserve(512);
72 m_unique_colors.reserve(512);
73 m_temp_unique_colors.reserve(512);
74 m_unique_packed_colors.reserve(512);
76 m_norm_unique_colors.reserve(512);
77 m_norm_unique_colors_weighted.reserve(512);
79 m_lo_cells.reserve(128);
80 m_hi_cells.reserve(128);
83 void dxt1_endpoint_optimizer::clear()
89 if (m_unique_color_hash_map.get_table_size() > 8192)
90 m_unique_color_hash_map.clear();
92 m_unique_color_hash_map.reset();
94 if (m_solutions_tried.get_table_size() > 8192)
95 m_solutions_tried.clear();
97 m_unique_colors.resize(0);
99 m_has_transparent_pixels = false;
100 m_total_unique_color_weight = 0;
102 m_norm_unique_colors.resize(0);
103 m_mean_norm_color.clear();
105 m_norm_unique_colors_weighted.resize(0);
106 m_mean_norm_color_weighted.clear();
108 m_principle_axis.clear();
111 m_all_pixels_grayscale = false;
112 m_has_color_weighting = false;
113 m_perceptual = false;
116 bool dxt1_endpoint_optimizer::handle_all_transparent_block()
118 m_pResults->m_low_color = 0;
119 m_pResults->m_high_color = 0;
120 m_pResults->m_alpha_block = true;
122 memset(m_pResults->m_pSelectors, 3, m_pParams->m_num_pixels);
127 // All selectors are equal. Try compressing as if it was solid, using the block's average color, using ryg's optimal single color compression tables.
128 bool dxt1_endpoint_optimizer::try_average_block_as_solid()
134 uint total_weight = 0;
135 for (uint i = 0; i < m_unique_colors.size(); i++)
137 uint weight = m_unique_colors[i].m_weight;
138 total_weight += weight;
140 tot_r += m_unique_colors[i].m_color.r * weight;
141 tot_g += m_unique_colors[i].m_color.g * weight;
142 tot_b += m_unique_colors[i].m_color.b * weight;
145 const uint half_total_weight = total_weight >> 1;
146 uint ave_r = static_cast<uint>((tot_r + half_total_weight) / total_weight);
147 uint ave_g = static_cast<uint>((tot_g + half_total_weight) / total_weight);
148 uint ave_b = static_cast<uint>((tot_b + half_total_weight) / total_weight);
150 uint low_color = (ryg_dxt::OMatch5[ave_r][0] << 11) | (ryg_dxt::OMatch6[ave_g][0] << 5) | ryg_dxt::OMatch5[ave_b][0];
151 uint high_color = (ryg_dxt::OMatch5[ave_r][1] << 11) | (ryg_dxt::OMatch6[ave_g][1] << 5) | ryg_dxt::OMatch5[ave_b][1];
152 bool improved = evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution);
154 if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error))
156 low_color = (ryg_dxt::OMatch5_3[ave_r][0] << 11) | (ryg_dxt::OMatch6_3[ave_g][0] << 5) | ryg_dxt::OMatch5_3[ave_b][0];
157 high_color = (ryg_dxt::OMatch5_3[ave_r][1] << 11) | (ryg_dxt::OMatch6_3[ave_g][1] << 5) | ryg_dxt::OMatch5_3[ave_b][1];
158 improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution);
161 if (m_pParams->m_quality == cCRNDXTQualityUber)
163 // Try compressing as all-solid using the other (non-average) colors in the block in uber.
164 for (uint i = 0; i < m_unique_colors.size(); i++)
166 uint r = m_unique_colors[i].m_color[0];
167 uint g = m_unique_colors[i].m_color[1];
168 uint b = m_unique_colors[i].m_color[2];
169 if ((r == ave_r) && (g == ave_g) && (b == ave_b))
172 uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0];
173 uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1];
174 improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution);
176 if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error))
178 low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0];
179 high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1];
180 improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution);
188 // Block is solid, trying using ryg's optimal single color tables.
189 bool dxt1_endpoint_optimizer::handle_solid_block()
191 int r = m_unique_colors[0].m_color.r;
192 int g = m_unique_colors[0].m_color.g;
193 int b = m_unique_colors[0].m_color.b;
195 //uint packed_color = dxt1_block::pack_color(r, g, b, true);
196 //evaluate_solution(dxt1_solution_coordinates((uint16)packed_color, (uint16)packed_color), false, &m_best_solution);
198 uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0];
199 uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1];
200 evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), false, &m_best_solution);
202 if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error))
204 low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0];
205 high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1];
206 evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution);
209 return_solution(*m_pResults, m_best_solution);
214 void dxt1_endpoint_optimizer::compute_vectors(const vec3F &perceptual_weights)
216 m_norm_unique_colors.resize(0);
217 m_norm_unique_colors_weighted.resize(0);
219 m_mean_norm_color.clear();
220 m_mean_norm_color_weighted.clear();
222 for (uint i = 0; i < m_unique_colors.size(); i++)
224 const color_quad_u8 &color = m_unique_colors[i].m_color;
225 const uint weight = m_unique_colors[i].m_weight;
227 vec3F norm_color(color.r * 1.0f / 255.0f, color.g * 1.0f / 255.0f, color.b * 1.0f / 255.0f);
228 vec3F norm_color_weighted(vec3F::mul_components(perceptual_weights, norm_color));
230 m_norm_unique_colors.push_back(norm_color);
231 m_norm_unique_colors_weighted.push_back(norm_color_weighted);
233 m_mean_norm_color += norm_color * (float)weight;
234 m_mean_norm_color_weighted += norm_color_weighted * (float)weight;
237 if (m_total_unique_color_weight)
239 m_mean_norm_color *= (1.0f / m_total_unique_color_weight);
240 m_mean_norm_color_weighted *= (1.0f / m_total_unique_color_weight);
243 for (uint i = 0; i < m_unique_colors.size(); i++)
245 m_norm_unique_colors[i] -= m_mean_norm_color;
246 m_norm_unique_colors_weighted[i] -= m_mean_norm_color_weighted;
250 // Compute PCA (principle axis, i.e. direction of largest variance) of input vectors.
251 void dxt1_endpoint_optimizer::compute_pca(vec3F &axis, const vec3F_array &norm_colors, const vec3F &def)
256 VOGL_ASSERT(m_unique_colors.size() == norm_colors.size());
260 for (uint i = 0; i < norm_colors.size(); i++)
262 const uint weight = m_unique_colors[i].m_weight;
264 for (uint j = 0; j < weight; j++)
266 vec3F x(norm_colors[i] * norm_colors[i][0]);
267 vec3F y(norm_colors[i] * norm_colors[i][1]);
268 vec3F z(norm_colors[i] * norm_colors[i][2]);
270 vec3F v(first ? norm_colors[0] : axis);
281 axis.normalize(&def);
283 double cov[6] = { 0, 0, 0, 0, 0, 0 };
285 //vec3F lo(math::cNearlyInfinite);
286 //vec3F hi(-math::cNearlyInfinite);
288 for (uint i = 0; i < norm_colors.size(); i++)
290 const vec3F &v = norm_colors[i];
292 //if (v[0] < lo[0]) lo[0] = v[0];
293 //if (v[1] < lo[1]) lo[1] = v[1];
294 //if (v[2] < lo[2]) lo[2] = v[2];
295 //if (v[0] > hi[0]) hi[0] = v[0];
296 //if (v[1] > hi[1]) hi[1] = v[1];
297 //if (v[2] > hi[2]) hi[2] = v[2];
303 if (m_unique_colors[i].m_weight > 1)
305 const double weight = m_unique_colors[i].m_weight;
307 cov[0] += r * r * weight;
308 cov[1] += r * g * weight;
309 cov[2] += r * b * weight;
310 cov[3] += g * g * weight;
311 cov[4] += g * b * weight;
312 cov[5] += b * b * weight;
325 double vfr, vfg, vfb;
326 //vfr = hi[0] - lo[0];
327 //vfg = hi[1] - lo[1];
328 //vfb = hi[2] - lo[2];
329 // This is more stable.
334 const uint cNumIters = 8;
336 for (uint iter = 0; iter < cNumIters; iter++)
338 double r = vfr * cov[0] + vfg * cov[1] + vfb * cov[2];
339 double g = vfr * cov[1] + vfg * cov[3] + vfb * cov[4];
340 double b = vfr * cov[2] + vfg * cov[4] + vfb * cov[5];
342 double m = math::maximum(fabs(r), fabs(g), fabs(b));
351 double delta = math::square(vfr - r) + math::square(vfg - g) + math::square(vfb - b);
357 if ((iter > 2) && (delta < 1e-8))
361 double len = vfr * vfr + vfg * vfg + vfb * vfb;
369 len = 1.0f / sqrt(len);
374 axis.set(static_cast<float>(vfr), static_cast<float>(vfg), static_cast<float>(vfb));
379 static const uint8 g_invTableNull[4] = { 0, 1, 2, 3 };
380 static const uint8 g_invTableAlpha[4] = { 1, 0, 2, 3 };
381 static const uint8 g_invTableColor[4] = { 1, 0, 3, 2 };
383 // Computes a valid (encodable) DXT1 solution (low/high colors, swizzled selectors) from input.
384 void dxt1_endpoint_optimizer::return_solution(results &res, const potential_solution &solution)
386 bool invert_selectors;
388 if (solution.m_alpha_block)
389 invert_selectors = (solution.m_coords.m_low_color > solution.m_coords.m_high_color);
392 VOGL_ASSERT(solution.m_coords.m_low_color != solution.m_coords.m_high_color);
394 invert_selectors = (solution.m_coords.m_low_color < solution.m_coords.m_high_color);
397 if (invert_selectors)
399 res.m_low_color = solution.m_coords.m_high_color;
400 res.m_high_color = solution.m_coords.m_low_color;
404 res.m_low_color = solution.m_coords.m_low_color;
405 res.m_high_color = solution.m_coords.m_high_color;
408 const uint8 *pInvert_table = g_invTableNull;
409 if (invert_selectors)
410 pInvert_table = solution.m_alpha_block ? g_invTableAlpha : g_invTableColor;
412 const uint alpha_thresh = m_pParams->m_pixels_have_alpha ? (m_pParams->m_dxt1a_alpha_threshold << 24U) : 0;
414 const uint32 *pSrc_pixels = reinterpret_cast<const uint32 *>(m_pParams->m_pPixels);
415 uint8 *pDst_selectors = res.m_pSelectors;
417 if ((m_unique_colors.size() == 1) && (!m_pParams->m_pixels_have_alpha))
419 uint32 c = utils::read_le32(pSrc_pixels);
421 VOGL_ASSERT(c >= alpha_thresh);
425 unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c));
426 VOGL_ASSERT(it != m_unique_color_hash_map.end());
428 uint unique_color_index = it->second;
430 uint selector = pInvert_table[solution.m_selectors[unique_color_index]];
432 memset(pDst_selectors, selector, m_pParams->m_num_pixels);
436 uint8 *pDst_selectors_end = pDst_selectors + m_pParams->m_num_pixels;
438 uint8 prev_selector = 0;
439 uint32 prev_color = 0;
443 uint32 c = utils::read_le32(pSrc_pixels);
448 if (c >= alpha_thresh)
453 selector = prev_selector;
456 unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c));
458 VOGL_ASSERT(it != m_unique_color_hash_map.end());
460 uint unique_color_index = it->second;
462 selector = pInvert_table[solution.m_selectors[unique_color_index]];
465 prev_selector = selector;
469 *pDst_selectors++ = selector;
471 } while (pDst_selectors != pDst_selectors_end);
474 res.m_alpha_block = solution.m_alpha_block;
475 res.m_error = solution.m_error;
478 inline vec3F dxt1_endpoint_optimizer::unpack_to_vec3F(uint16 packed_color)
480 color_quad_u8 c(dxt1_block::unpack_color(packed_color, false));
482 return vec3F(c.r * 1.0f / 31.0f, c.g * 1.0f / 63.0f, c.b * 1.0f / 31.0f);
485 inline vec3F dxt1_endpoint_optimizer::unpack_to_vec3F_raw(uint16 packed_color)
487 color_quad_u8 c(dxt1_block::unpack_color(packed_color, false));
489 return vec3F(c.r, c.g, c.b);
492 // Per-component 1D endpoint optimization.
493 void dxt1_endpoint_optimizer::optimize_endpoint_comps()
495 if ((m_best_solution.m_alpha_block) || (!m_best_solution.m_error))
498 //color_quad_u8 orig_l(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false));
499 //color_quad_u8 orig_h(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false));
500 //uint orig_error = m_best_solution.m_error;
502 color_quad_u8 orig_l_scaled(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true));
503 color_quad_u8 orig_h_scaled(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true));
505 color_quad_u8 min_color(0xFF, 0xFF, 0xFF, 0xFF);
506 color_quad_u8 max_color(0, 0, 0, 0);
507 for (uint i = 0; i < m_unique_colors.size(); i++)
509 min_color = color_quad_u8::component_min(min_color, m_unique_colors[i].m_color);
510 max_color = color_quad_u8::component_max(max_color, m_unique_colors[i].m_color);
513 // Try to separately optimize each component. This is a 1D problem so it's easy to compute accurate per-component error bounds.
514 for (uint comp_index = 0; comp_index < 3; comp_index++)
517 ll[0] = orig_l_scaled[comp_index];
518 ll[1] = orig_h_scaled[comp_index];
519 ll[2] = (ll[0] * 2 + ll[1]) / 3;
520 ll[3] = (ll[0] + ll[1] * 2) / 3;
522 uint error_to_beat = 0;
523 uint min_color_weight = 0;
524 uint max_color_weight = 0;
525 for (uint i = 0; i < m_unique_colors.size(); i++)
527 uint c = m_unique_colors[i].m_color[comp_index];
528 uint w = m_unique_colors[i].m_weight;
530 int delta = ll[m_best_solution.m_selectors[i]] - c;
531 error_to_beat += (int)w * (delta * delta);
533 if (c == min_color[comp_index])
534 min_color_weight += w;
535 if (c == max_color[comp_index])
536 max_color_weight += w;
542 VOGL_ASSERT((min_color_weight > 0) && (max_color_weight > 0));
543 const uint error_to_beat_div_min_color_weight = min_color_weight ? ((error_to_beat + min_color_weight - 1) / min_color_weight) : 0;
544 const uint error_to_beat_div_max_color_weight = max_color_weight ? ((error_to_beat + max_color_weight - 1) / max_color_weight) : 0;
546 const uint m = (comp_index == 1) ? 63 : 31;
547 const uint m_shift = (comp_index == 1) ? 3 : 2;
549 for (uint o = 0; o <= m; o++)
553 tl[0] = (comp_index == 1) ? ((o << 2) | (o >> 4)) : ((o << 3) | (o >> 2));
555 for (uint h = 0; h < 8; h++)
557 const uint pl = h << m_shift;
558 const uint ph = ((h + 1) << m_shift) - 1;
560 uint tl_l = (comp_index == 1) ? ((pl << 2) | (pl >> 4)) : ((pl << 3) | (pl >> 2));
561 uint tl_h = (comp_index == 1) ? ((ph << 2) | (ph >> 4)) : ((ph << 3) | (ph >> 2));
563 tl_l = math::minimum(tl_l, tl[0]);
564 tl_h = math::maximum(tl_h, tl[0]);
566 uint c_l = min_color[comp_index];
567 uint c_h = max_color[comp_index];
571 uint min_possible_error = math::square<int>(tl_l - c_l);
572 if (min_possible_error > error_to_beat_div_min_color_weight)
577 uint min_possible_error = math::square<int>(c_h - tl_h);
578 if (min_possible_error > error_to_beat_div_max_color_weight)
582 for (uint p = pl; p <= ph; p++)
584 tl[1] = (comp_index == 1) ? ((p << 2) | (p >> 4)) : ((p << 3) | (p >> 2));
586 tl[2] = (tl[0] * 2 + tl[1]) / 3;
587 tl[3] = (tl[0] + tl[1] * 2) / 3;
589 uint trial_error = 0;
590 for (uint i = 0; i < m_unique_colors.size(); i++)
592 int delta = tl[m_best_solution.m_selectors[i]] - m_unique_colors[i].m_color[comp_index];
593 trial_error += m_unique_colors[i].m_weight * (delta * delta);
594 if (trial_error >= error_to_beat)
598 //VOGL_ASSERT(trial_error >= min_possible_error);
600 if (trial_error < error_to_beat)
602 color_quad_u8 l(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false));
603 color_quad_u8 h(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false));
604 l[comp_index] = static_cast<uint8>(o);
605 h[comp_index] = static_cast<uint8>(p);
607 bool better = evaluate_solution(
608 dxt1_solution_coordinates(dxt1_block::pack_color(l, false), dxt1_block::pack_color(h, false)),
609 true, &m_best_solution);
610 VOGL_NOTE_UNUSED(better);
615 printf("comp: %u, orig: %u %u, new: %u %u, orig_error: %u, new_error: %u\n", comp_index,
616 orig_l[comp_index], orig_h[comp_index],
617 l[comp_index], h[comp_index],
618 orig_error, m_best_solution.m_error);
620 if (!m_best_solution.m_error)
624 for (uint i = 0; i < m_unique_colors.size(); i++)
626 int delta = tl[m_best_solution.m_selectors[i]] - m_unique_colors[i].m_color[comp_index];
627 error_to_beat += m_unique_colors[i].m_weight * (delta * delta);
633 } // if (trial_error < error_to_beat)
635 } // for (uint p = 0; p <= m; p++)
638 } // for (uint o = 0; o <= m; o++)
643 // Voxel adjacency delta coordinations.
644 static const struct adjacent_coords
677 // Attempt to refine current solution's endpoints given the current selectors using least squares.
678 bool dxt1_endpoint_optimizer::refine_solution(int refinement_level)
680 VOGL_ASSERT(m_best_solution.m_valid);
682 static const int w1Tab[4] = { 3, 0, 2, 1 };
684 static const int prods_0[4] = { 0x00, 0x00, 0x02, 0x02 };
685 static const int prods_1[4] = { 0x00, 0x09, 0x01, 0x04 };
686 static const int prods_2[4] = { 0x09, 0x00, 0x04, 0x01 };
691 double At1_r, At1_g, At1_b;
692 double At2_r, At2_g, At2_b;
694 At1_r = At1_g = At1_b = 0;
695 At2_r = At2_g = At2_b = 0;
696 for (uint i = 0; i < m_unique_colors.size(); i++)
698 const color_quad_u8 &c = m_unique_colors[i].m_color;
699 const double weight = m_unique_colors[i].m_weight;
701 double r = c.r * weight;
702 double g = c.g * weight;
703 double b = c.b * weight;
704 int step = m_best_solution.m_selectors[i] ^ 1;
706 int w1 = w1Tab[step];
708 akku_0 += prods_0[step] * weight;
709 akku_1 += prods_1[step] * weight;
710 akku_2 += prods_2[step] * weight;
719 At2_r = 3 * At2_r - At1_r;
720 At2_g = 3 * At2_g - At1_g;
721 At2_b = 3 * At2_b - At1_b;
727 double t = xx * yy - xy * xy;
728 if (!yy || !xx || (fabs(t) < .0000125f))
731 double frb = (3.0f * 31.0f / 255.0f) / t;
732 double fg = frb * (63.0f / 31.0f);
734 bool improved = false;
736 if (refinement_level == 0)
739 max16 = math::clamp<int>(static_cast<int>((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31) << 11;
740 max16 |= math::clamp<int>(static_cast<int>((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63) << 5;
741 max16 |= math::clamp<int>(static_cast<int>((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31) << 0;
744 min16 = math::clamp<int>(static_cast<int>((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31) << 11;
745 min16 |= math::clamp<int>(static_cast<int>((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63) << 5;
746 min16 |= math::clamp<int>(static_cast<int>((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31) << 0;
748 dxt1_solution_coordinates nc((uint16)min16, (uint16)max16);
750 improved |= evaluate_solution(nc, true, &m_best_solution, false);
752 else if (refinement_level == 1)
754 // Try exploring the local lattice neighbors of the least squares optimized result.
758 e[0][0] = (uint8)math::clamp<int>(static_cast<int>((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31);
759 e[0][1] = (uint8)math::clamp<int>(static_cast<int>((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63);
760 e[0][2] = (uint8)math::clamp<int>(static_cast<int>((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31);
763 e[1][0] = (uint8)math::clamp<int>(static_cast<int>((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31);
764 e[1][1] = (uint8)math::clamp<int>(static_cast<int>((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63);
765 e[1][2] = (uint8)math::clamp<int>(static_cast<int>((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31);
767 for (uint i = 0; i < 2; i++)
769 for (int rr = -1; rr <= 1; rr++)
771 for (int gr = -1; gr <= 1; gr++)
773 for (int br = -1; br <= 1; br++)
775 dxt1_solution_coordinates nc;
781 c[i][0] = (uint8)math::clamp<int>(c[i][0] + rr, 0, 31);
782 c[i][1] = (uint8)math::clamp<int>(c[i][1] + gr, 0, 63);
783 c[i][2] = (uint8)math::clamp<int>(c[i][2] + br, 0, 31);
785 nc.m_low_color = dxt1_block::pack_color(c[0], false);
786 nc.m_high_color = dxt1_block::pack_color(c[1], false);
790 if ((nc.m_low_color != m_best_solution.m_coords.m_low_color) || (nc.m_high_color != m_best_solution.m_coords.m_high_color))
792 improved |= evaluate_solution(nc, true, &m_best_solution, false);
801 // Try even harder to explore the local lattice neighbors of the least squares optimized result.
804 e[0][0] = (uint8)math::clamp<int>(static_cast<int>((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31);
805 e[0][1] = (uint8)math::clamp<int>(static_cast<int>((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63);
806 e[0][2] = (uint8)math::clamp<int>(static_cast<int>((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31);
809 e[1][0] = (uint8)math::clamp<int>(static_cast<int>((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31);
810 e[1][1] = (uint8)math::clamp<int>(static_cast<int>((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63);
811 e[1][2] = (uint8)math::clamp<int>(static_cast<int>((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31);
813 for (int orr = -1; orr <= 1; orr++)
815 for (int ogr = -1; ogr <= 1; ogr++)
817 for (int obr = -1; obr <= 1; obr++)
819 dxt1_solution_coordinates nc;
825 c[0][0] = (uint8)math::clamp<int>(c[0][0] + orr, 0, 31);
826 c[0][1] = (uint8)math::clamp<int>(c[0][1] + ogr, 0, 63);
827 c[0][2] = (uint8)math::clamp<int>(c[0][2] + obr, 0, 31);
829 for (int rr = -1; rr <= 1; rr++)
831 for (int gr = -1; gr <= 1; gr++)
833 for (int br = -1; br <= 1; br++)
835 c[1][0] = (uint8)math::clamp<int>(c[1][0] + rr, 0, 31);
836 c[1][1] = (uint8)math::clamp<int>(c[1][1] + gr, 0, 63);
837 c[1][2] = (uint8)math::clamp<int>(c[1][2] + br, 0, 31);
839 nc.m_low_color = dxt1_block::pack_color(c[0], false);
840 nc.m_high_color = dxt1_block::pack_color(c[1], false);
843 improved |= evaluate_solution(nc, true, &m_best_solution, false);
855 //-----------------------------------------------------------------------------------------------------------------------------------------
857 // Primary endpoint optimization entrypoint.
858 bool dxt1_endpoint_optimizer::optimize_endpoints(vec3F &low_color, vec3F &high_color)
860 vec3F orig_low_color(low_color);
861 vec3F orig_high_color(high_color);
863 m_trial_solution.clear();
866 const int16 *pProbe_table = g_uber_probe_table;
868 float dist_per_trial = .015625f;
870 // How many probes, and the distance between each probe depends on the quality level.
871 switch (m_pParams->m_quality)
873 case cCRNDXTQualitySuperFast:
874 pProbe_table = g_fast_probe_table;
875 probe_range = cFastProbeTableSize;
876 dist_per_trial = .027063293f;
879 case cCRNDXTQualityFast:
880 pProbe_table = g_fast_probe_table;
881 probe_range = cFastProbeTableSize;
882 dist_per_trial = .027063293f;
885 case cCRNDXTQualityNormal:
886 pProbe_table = g_normal_probe_table;
887 probe_range = cNormalProbeTableSize;
888 dist_per_trial = .027063293f;
891 case cCRNDXTQualityBetter:
892 pProbe_table = g_better_probe_table;
893 probe_range = cBetterProbeTableSize;
897 pProbe_table = g_uber_probe_table;
898 probe_range = cUberProbeTableSize;
903 m_solutions_tried.reset();
905 if (m_pParams->m_endpoint_caching)
907 // Try the previous X winning endpoints. This may not give us optimal results, but it may increase the probability of early outs while evaluating potential solutions.
908 const uint num_prev_results = math::minimum<uint>(cMaxPrevResults, m_num_prev_results);
909 for (uint i = 0; i < num_prev_results; i++)
911 const dxt1_solution_coordinates &coords = m_prev_results[i];
913 solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U)));
914 if (!solution_res.second)
917 evaluate_solution(coords, true, &m_best_solution);
920 if (!m_best_solution.m_error)
922 // Got lucky - one of the previous endpoints is optimal.
923 return_solution(*m_pResults, m_best_solution);
928 if (m_pParams->m_quality >= cCRNDXTQualityBetter)
930 //evaluate_solution(dxt1_solution_coordinates(low_color, high_color), true, &m_best_solution);
933 try_median4(orig_low_color, orig_high_color);
936 uint probe_low[cUberProbeTableSize * 2 + 1];
937 uint probe_high[cUberProbeTableSize * 2 + 1];
939 vec3F scaled_principle_axis[2];
941 scaled_principle_axis[1] = m_principle_axis * dist_per_trial;
942 scaled_principle_axis[1][0] *= 31.0f;
943 scaled_principle_axis[1][1] *= 63.0f;
944 scaled_principle_axis[1][2] *= 31.0f;
946 scaled_principle_axis[0] = -scaled_principle_axis[1];
948 //vec3F initial_ofs(scaled_principle_axis * (float)-probe_range);
949 //initial_ofs[0] += .5f;
950 //initial_ofs[1] += .5f;
951 //initial_ofs[2] += .5f;
953 low_color[0] = math::clamp(low_color[0] * 31.0f, 0.0f, 31.0f);
954 low_color[1] = math::clamp(low_color[1] * 63.0f, 0.0f, 63.0f);
955 low_color[2] = math::clamp(low_color[2] * 31.0f, 0.0f, 31.0f);
957 high_color[0] = math::clamp(high_color[0] * 31.0f, 0.0f, 31.0f);
958 high_color[1] = math::clamp(high_color[1] * 63.0f, 0.0f, 63.0f);
959 high_color[2] = math::clamp(high_color[2] * 31.0f, 0.0f, 31.0f);
961 for (uint pass = 0; pass < num_passes; pass++)
963 // Now separately sweep or probe the low and high colors along the principle axis, both positively and negatively.
964 // This results in two arrays of candidate low/high endpoints. Every unique combination of candidate endpoints is tried as a potential solution.
965 // In higher quality modes, the various nearby lattice neighbors of each candidate endpoint are also explored, which allows the current solution to "wobble" or "migrate"
966 // to areas with lower error.
967 // This entire process can be repeated up to X times (depending on the quality level) until a local minimum is established.
968 // This method is very stable and scalable. It could be implemented more elegantly, but I'm now very cautious of touching this code.
971 low_color = unpack_to_vec3F_raw(m_best_solution.m_coords.m_low_color);
972 high_color = unpack_to_vec3F_raw(m_best_solution.m_coords.m_high_color);
975 const uint64_t prev_best_error = m_best_solution.m_error;
976 if (!prev_best_error)
979 // Sweep low endpoint along principle axis, record positions
980 int prev_packed_color[2] = { -1, -1 };
981 uint num_low_trials = 0;
982 vec3F initial_probe_low_color(low_color + vec3F(.5f));
983 for (uint i = 0; i < probe_range; i++)
985 const int ls = i ? 0 : 1;
986 int x = pProbe_table[i];
988 for (int s = ls; s < 2; s++)
990 vec3F probe_low_color(initial_probe_low_color + scaled_principle_axis[s] * (float)x);
992 int r = math::clamp((int)floor(probe_low_color[0]), 0, 31);
993 int g = math::clamp((int)floor(probe_low_color[1]), 0, 63);
994 int b = math::clamp((int)floor(probe_low_color[2]), 0, 31);
996 int packed_color = b | (g << 5U) | (r << 11U);
997 if (packed_color != prev_packed_color[s])
999 probe_low[num_low_trials++] = packed_color;
1000 prev_packed_color[s] = packed_color;
1005 prev_packed_color[0] = -1;
1006 prev_packed_color[1] = -1;
1008 // Sweep high endpoint along principle axis, record positions
1009 uint num_high_trials = 0;
1010 vec3F initial_probe_high_color(high_color + vec3F(.5f));
1011 for (uint i = 0; i < probe_range; i++)
1013 const int ls = i ? 0 : 1;
1014 int x = pProbe_table[i];
1016 for (int s = ls; s < 2; s++)
1018 vec3F probe_high_color(initial_probe_high_color + scaled_principle_axis[s] * (float)x);
1020 int r = math::clamp((int)floor(probe_high_color[0]), 0, 31);
1021 int g = math::clamp((int)floor(probe_high_color[1]), 0, 63);
1022 int b = math::clamp((int)floor(probe_high_color[2]), 0, 31);
1024 int packed_color = b | (g << 5U) | (r << 11U);
1025 if (packed_color != prev_packed_color[s])
1027 probe_high[num_high_trials++] = packed_color;
1028 prev_packed_color[s] = packed_color;
1033 // Now try all unique combinations.
1034 for (uint i = 0; i < num_low_trials; i++)
1036 for (uint j = 0; j < num_high_trials; j++)
1038 dxt1_solution_coordinates coords((uint16)probe_low[i], (uint16)probe_high[j]);
1039 coords.canonicalize();
1041 solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U)));
1042 if (!solution_res.second)
1045 evaluate_solution(coords, true, &m_best_solution);
1049 if (m_pParams->m_quality >= cCRNDXTQualityNormal)
1051 // Generate new candidates by exploring the low color's direct lattice neighbors
1052 color_quad_u8 lc(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false));
1054 for (int i = 0; i < 26; i++)
1056 int r = lc.r + g_adjacency[i].x;
1057 if ((r < 0) || (r > 31))
1060 int g = lc.g + g_adjacency[i].y;
1061 if ((g < 0) || (g > 63))
1064 int b = lc.b + g_adjacency[i].z;
1065 if ((b < 0) || (b > 31))
1068 dxt1_solution_coordinates coords(dxt1_block::pack_color(r, g, b, false), m_best_solution.m_coords.m_high_color);
1069 coords.canonicalize();
1071 solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U)));
1072 if (solution_res.second)
1073 evaluate_solution(coords, true, &m_best_solution);
1076 if (m_pParams->m_quality == cCRNDXTQualityUber)
1078 // Generate new candidates by exploring the low color's direct lattice neighbors - this time, explore much further separately on each axis.
1079 lc = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false);
1081 for (int a = 0; a < 3; a++)
1083 int limit = (a == 1) ? 63 : 31;
1085 for (int s = -2; s <= 2; s += 4)
1087 color_quad_u8 c(lc);
1089 if ((q < 0) || (q > limit))
1094 dxt1_solution_coordinates coords(dxt1_block::pack_color(c, false), m_best_solution.m_coords.m_high_color);
1095 coords.canonicalize();
1097 solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U)));
1098 if (solution_res.second)
1099 evaluate_solution(coords, true, &m_best_solution);
1104 // Generate new candidates by exploring the high color's direct lattice neighbors
1105 color_quad_u8 hc(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false));
1107 for (int i = 0; i < 26; i++)
1109 int r = hc.r + g_adjacency[i].x;
1110 if ((r < 0) || (r > 31))
1113 int g = hc.g + g_adjacency[i].y;
1114 if ((g < 0) || (g > 63))
1117 int b = hc.b + g_adjacency[i].z;
1118 if ((b < 0) || (b > 31))
1121 dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(r, g, b, false));
1122 coords.canonicalize();
1124 solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U)));
1125 if (solution_res.second)
1126 evaluate_solution(coords, true, &m_best_solution);
1129 if (m_pParams->m_quality == cCRNDXTQualityUber)
1131 // Generate new candidates by exploring the high color's direct lattice neighbors - this time, explore much further separately on each axis.
1132 hc = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false);
1134 for (int a = 0; a < 3; a++)
1136 int limit = (a == 1) ? 63 : 31;
1138 for (int s = -2; s <= 2; s += 4)
1140 color_quad_u8 c(hc);
1142 if ((q < 0) || (q > limit))
1147 dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(c, false));
1148 coords.canonicalize();
1150 solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U)));
1151 if (solution_res.second)
1152 evaluate_solution(coords, true, &m_best_solution);
1158 if ((!m_best_solution.m_error) || ((pass) && (m_best_solution.m_error == prev_best_error)))
1161 if (m_pParams->m_quality >= cCRNDXTQualityUber)
1163 // Attempt to refine current solution's endpoints given the current selectors using least squares.
1168 if (m_pParams->m_quality >= cCRNDXTQualityNormal)
1170 if ((m_best_solution.m_error) && (!m_pParams->m_pixels_have_alpha))
1172 bool choose_solid_block = false;
1173 if (m_best_solution.are_selectors_all_equal())
1175 // All selectors equal - try various solid-block optimizations
1176 choose_solid_block = try_average_block_as_solid();
1179 if ((!choose_solid_block) && (m_pParams->m_quality == cCRNDXTQualityUber))
1181 // Per-component 1D endpoint optimization.
1182 optimize_endpoint_comps();
1186 if (m_pParams->m_quality == cCRNDXTQualityUber)
1188 if (m_best_solution.m_error)
1190 // The pixels may have already been DXTc compressed by another compressor.
1191 // It's usually possible to recover the endpoints used to previously pack the block.
1192 try_combinatorial_encoding();
1197 return_solution(*m_pResults, m_best_solution);
1199 if (m_pParams->m_endpoint_caching)
1201 // Remember result for later reruse.
1202 m_prev_results[m_num_prev_results & (cMaxPrevResults - 1)] = m_best_solution.m_coords;
1203 m_num_prev_results++;
1209 static inline int mul_8bit(int a, int b)
1211 int t = a * b + 128;
1212 return (t + (t >> 8)) >> 8;
1215 bool dxt1_endpoint_optimizer::handle_multicolor_block()
1217 uint num_passes = 1;
1218 vec3F perceptual_weights(1.0f);
1222 // Compute RGB weighting for use in perceptual mode.
1223 // The more saturated the block, the more the weights deviate from (1,1,1).
1224 float ave_redness = 0;
1225 float ave_blueness = 0;
1228 for (uint i = 0; i < m_unique_colors.size(); i++)
1230 const color_quad_u8 &c = m_unique_colors[i].m_color;
1231 const float weight = (float)m_unique_colors[i].m_weight;
1233 int l = mul_8bit(c.r + c.g + c.b, 0x55); // /3
1235 l = math::maximum(1, l);
1237 float scale = weight / static_cast<float>(l);
1239 ave_redness += scale * c.r;
1240 ave_blueness += scale * c.b;
1243 ave_redness /= m_total_unique_color_weight;
1244 ave_blueness /= m_total_unique_color_weight;
1245 ave_l /= m_total_unique_color_weight;
1247 ave_l = math::minimum(1.0f, ave_l * 16.0f / 255.0f);
1249 //float r = ave_l * powf(math::saturate(ave_redness / 3.0f), 5.0f);
1250 //float b = ave_l * powf(math::saturate(ave_blueness / 3.0f), 5.0f);
1252 float p = ave_l * powf(math::saturate(math::maximum(ave_redness, ave_blueness) * 1.0f / 3.0f), 2.75f);
1259 perceptual_weights = vec3F::lerp(vec3F(.212f, .72f, .072f), perceptual_weights, p);
1263 for (uint pass_index = 0; pass_index < num_passes; pass_index++)
1265 compute_vectors(perceptual_weights);
1267 compute_pca(m_principle_axis, m_norm_unique_colors_weighted, vec3F(.2837149f, 0.9540631f, 0.096277453f));
1270 matrix44F m(matrix44F::make_scale_matrix(perceptual_weights[0], perceptual_weights[1], perceptual_weights[2]));
1271 matrix44F im(m.get_inverse());
1272 im.transpose_in_place();
1273 m_principle_axis = m_principle_axis * im;
1275 // Purposely scale the components of the principle axis by the perceptual weighting.
1276 // There's probably a cleaner way to go about this, but it works (more competitive in perceptual mode against nvdxt.exe or ATI_Compress).
1277 m_principle_axis[0] /= perceptual_weights[0];
1278 m_principle_axis[1] /= perceptual_weights[1];
1279 m_principle_axis[2] /= perceptual_weights[2];
1281 m_principle_axis.normalize_in_place();
1285 // Check for obviously wild principle axes and try to compensate by backing off the component weightings.
1286 if (fabs(m_principle_axis[0]) >= .795f)
1287 perceptual_weights.set(.424f, .6f, .072f);
1288 else if (fabs(m_principle_axis[2]) >= .795f)
1289 perceptual_weights.set(.212f, .6f, .212f);
1295 // Find bounds of projection onto (potentially skewed) principle axis.
1299 for (uint i = 0; i < m_norm_unique_colors.size(); i++)
1301 float d = m_norm_unique_colors[i].dot(m_principle_axis);
1302 l = math::minimum(l, d);
1303 h = math::maximum(h, d);
1306 vec3F low_color(m_mean_norm_color + l * m_principle_axis);
1307 vec3F high_color(m_mean_norm_color + h * m_principle_axis);
1309 if (!low_color.is_within_bounds(0.0f, 1.0f))
1311 // Low color is outside the lattice, so bring it back in by casting a ray.
1314 aabb3F bounds(vec3F(0.0f), vec3F(1.0f));
1315 intersection::result res = intersection::ray_aabb(coord, t, ray3F(low_color, m_principle_axis), bounds);
1316 if (res == intersection::cSuccess)
1320 if (!high_color.is_within_bounds(0.0f, 1.0f))
1322 // High color is outside the lattice, so bring it back in by casting a ray.
1325 aabb3F bounds(vec3F(0.0f), vec3F(1.0f));
1326 intersection::result res = intersection::ray_aabb(coord, t, ray3F(high_color, -m_principle_axis), bounds);
1327 if (res == intersection::cSuccess)
1331 // Now optimize the endpoints using the projection bounds on the (potentially skewed) principle axis as a starting point.
1332 if (!optimize_endpoints(low_color, high_color))
1338 bool dxt1_endpoint_optimizer::handle_grayscale_block()
1344 // Tries quantizing the block to 4 colors using vanilla LBG. It tries all combinations of the quantized results as potential endpoints.
1345 bool dxt1_endpoint_optimizer::try_median4(const vec3F &low_color, const vec3F &high_color)
1349 if (m_unique_colors.size() <= 4)
1351 for (uint i = 0; i < 4; i++)
1352 means[i] = m_norm_unique_colors[math::minimum<int>(m_norm_unique_colors.size() - 1, i)];
1356 means[0] = low_color - m_mean_norm_color;
1357 means[3] = high_color - m_mean_norm_color;
1358 means[1] = vec3F::lerp(means[0], means[3], 1.0f / 3.0f);
1359 means[2] = vec3F::lerp(means[0], means[3], 2.0f / 3.0f);
1363 const uint cMaxIters = 8;
1364 uint reassign_rover = 0;
1365 float prev_total_dist = math::cNearlyInfinite;
1366 for (uint iter = 0; iter < cMaxIters; iter++)
1369 float new_weights[4];
1370 utils::zero_object(new_means);
1371 utils::zero_object(new_weights);
1373 float total_dist = 0;
1375 for (uint i = 0; i < m_unique_colors.size(); i++)
1377 const vec3F &v = m_norm_unique_colors[i];
1379 float best_dist = means[0].squared_distance(v);
1382 for (uint j = 1; j < 4; j++)
1384 float dist = means[j].squared_distance(v);
1385 if (dist < best_dist)
1392 total_dist += best_dist;
1394 new_means[best_index] += v * (float)m_unique_colors[i].m_weight;
1395 new_weights[best_index] += (float)m_unique_colors[i].m_weight;
1398 uint highest_index = 0;
1399 float highest_weight = 0;
1400 bool empty_cell = false;
1401 for (uint j = 0; j < 4; j++)
1403 if (new_weights[j] > 0.0f)
1405 means[j] = new_means[j] / new_weights[j];
1406 if (new_weights[j] > highest_weight)
1408 highest_weight = new_weights[j];
1418 if (fabs(total_dist - prev_total_dist) < .00001f)
1421 prev_total_dist = total_dist;
1424 prev_total_dist = math::cNearlyInfinite;
1426 if ((empty_cell) && (iter != (cMaxIters - 1)))
1428 const uint ri = (highest_index + reassign_rover) & 3;
1431 for (uint j = 0; j < 4; j++)
1433 if (new_weights[j] == 0.0f)
1435 means[j] = means[ri];
1436 means[j] += vec3F::make_random(rm, -.00196f, .00196f);
1443 bool improved = false;
1445 for (uint i = 0; i < 3; i++)
1447 for (uint j = i + 1; j < 4; j++)
1449 const vec3F v0(means[i] + m_mean_norm_color);
1450 const vec3F v1(means[j] + m_mean_norm_color);
1452 dxt1_solution_coordinates sc(
1453 color_quad_u8((int)floor(.5f + v0[0] * 31.0f), (int)floor(.5f + v0[1] * 63.0f), (int)floor(.5f + v0[2] * 31.0f), 255),
1454 color_quad_u8((int)floor(.5f + v1[0] * 31.0f), (int)floor(.5f + v1[1] * 63.0f), (int)floor(.5f + v1[2] * 31.0f), 255), false);
1458 improved |= evaluate_solution(sc, true, &m_best_solution, false);
1462 improved |= refine_solution((m_pParams->m_quality == cCRNDXTQualityUber) ? 1 : 0);
1467 // Given candidate low/high endpoints, find the optimal selectors for 3 and 4 color blocks, compute the resulting error,
1468 // and use the candidate if it results in less error than the best found result so far.
1469 bool dxt1_endpoint_optimizer::evaluate_solution(
1470 const dxt1_solution_coordinates &coords,
1472 potential_solution *pBest_solution,
1473 bool alternate_rounding)
1477 if ((!m_pSolutions) || (alternate_rounding))
1479 if (m_pParams->m_quality >= cCRNDXTQualityBetter)
1480 return evaluate_solution_uber(m_trial_solution, coords, early_out, pBest_solution, alternate_rounding);
1482 return evaluate_solution_fast(m_trial_solution, coords, early_out, pBest_solution, alternate_rounding);
1485 evaluate_solution_uber(m_trial_solution, coords, false, NULL, alternate_rounding);
1487 VOGL_ASSERT(m_trial_solution.m_valid);
1489 // Caller has requested all considered candidate solutions for later analysis.
1490 m_pSolutions->resize(m_pSolutions->size() + 1);
1491 solution &new_solution = m_pSolutions->back();
1492 new_solution.m_selectors.resize(m_pParams->m_num_pixels);
1493 new_solution.m_results.m_pSelectors = &new_solution.m_selectors[0];
1495 return_solution(new_solution.m_results, m_trial_solution);
1497 if ((pBest_solution) && (m_trial_solution.m_error < m_best_solution.m_error))
1499 *pBest_solution = m_trial_solution;
1506 inline uint dxt1_endpoint_optimizer::color_distance(bool perceptual, const color_quad_u8 &e1, const color_quad_u8 &e2, bool alpha)
1510 return color::color_distance(true, e1, e2, alpha);
1512 else if (m_pParams->m_grayscale_sampling)
1514 // Computes error assuming shader will be converting the result to grayscale.
1515 int y0 = color::RGB_to_Y(e1);
1516 int y1 = color::RGB_to_Y(e2);
1520 int da = (int)e1[3] - (int)e2[3];
1521 return yd * yd + da * da;
1528 else if (m_has_color_weighting)
1530 // Compute error using user provided color component weights.
1531 int dr = (int)e1[0] - (int)e2[0];
1532 int dg = (int)e1[1] - (int)e2[1];
1533 int db = (int)e1[2] - (int)e2[2];
1535 dr = (dr * dr) * m_pParams->m_color_weights[0];
1536 dg = (dg * dg) * m_pParams->m_color_weights[1];
1537 db = (db * db) * m_pParams->m_color_weights[2];
1541 int da = (int)e1[3] - (int)e2[3];
1542 da = (da * da) * (m_pParams->m_color_weights[0] + m_pParams->m_color_weights[1] + m_pParams->m_color_weights[2]);
1543 return dr + dg + db + da;
1547 return dr + dg + db;
1552 return color::color_distance(false, e1, e2, alpha);
1556 bool dxt1_endpoint_optimizer::evaluate_solution_uber(
1557 potential_solution &solution,
1558 const dxt1_solution_coordinates &coords,
1560 potential_solution *pBest_solution,
1561 bool alternate_rounding)
1563 solution.m_coords = coords;
1564 solution.m_selectors.resize(m_unique_colors.size());
1566 if ((pBest_solution) && (early_out))
1567 solution.m_error = pBest_solution->m_error;
1569 solution.m_error = cUINT64_MAX;
1571 solution.m_alpha_block = false;
1572 solution.m_valid = false;
1574 uint first_block_type = 0;
1575 uint last_block_type = 1;
1577 if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks))
1578 first_block_type = 1;
1579 else if (!m_pParams->m_use_alpha_blocks)
1580 last_block_type = 0;
1582 m_trial_selectors.resize(m_unique_colors.size());
1584 color_quad_u8 colors[cDXT1SelectorValues];
1586 colors[0] = dxt1_block::unpack_color(coords.m_low_color, true);
1587 colors[1] = dxt1_block::unpack_color(coords.m_high_color, true);
1589 for (uint block_type = first_block_type; block_type <= last_block_type; block_type++)
1591 uint64_t trial_error = 0;
1595 colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 0);
1596 colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 0);
1600 for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--)
1602 const color_quad_u8 &c = m_unique_colors[unique_color_index].m_color;
1604 uint best_error = color_distance(true, c, colors[0], false);
1605 uint best_color_index = 0;
1607 uint err = color_distance(true, c, colors[1], false);
1608 if (err < best_error)
1611 best_color_index = 1;
1614 err = color_distance(true, c, colors[2], false);
1615 if (err < best_error)
1618 best_color_index = 2;
1621 err = color_distance(true, c, colors[3], false);
1622 if (err < best_error)
1625 best_color_index = 3;
1628 trial_error += best_error * m_unique_colors[unique_color_index].m_weight;
1629 if (trial_error >= solution.m_error)
1632 m_trial_selectors[unique_color_index] = static_cast<uint8>(best_color_index);
1637 for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--)
1639 const color_quad_u8 &c = m_unique_colors[unique_color_index].m_color;
1641 uint best_error = color_distance(false, c, colors[0], false);
1642 uint best_color_index = 0;
1644 uint err = color_distance(false, c, colors[1], false);
1645 if (err < best_error)
1648 best_color_index = 1;
1651 err = color_distance(false, c, colors[2], false);
1652 if (err < best_error)
1655 best_color_index = 2;
1658 err = color_distance(false, c, colors[3], false);
1659 if (err < best_error)
1662 best_color_index = 3;
1665 trial_error += best_error * m_unique_colors[unique_color_index].m_weight;
1666 if (trial_error >= solution.m_error)
1669 m_trial_selectors[unique_color_index] = static_cast<uint8>(best_color_index);
1675 colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U);
1679 for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--)
1681 const color_quad_u8 &c = m_unique_colors[unique_color_index].m_color;
1683 uint best_error = color_distance(true, c, colors[0], false);
1684 uint best_color_index = 0;
1686 uint err = color_distance(true, c, colors[1], false);
1687 if (err < best_error)
1690 best_color_index = 1;
1693 err = color_distance(true, c, colors[2], false);
1694 if (err < best_error)
1697 best_color_index = 2;
1700 trial_error += best_error * m_unique_colors[unique_color_index].m_weight;
1701 if (trial_error >= solution.m_error)
1704 m_trial_selectors[unique_color_index] = static_cast<uint8>(best_color_index);
1709 for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--)
1711 const color_quad_u8 &c = m_unique_colors[unique_color_index].m_color;
1713 uint best_error = color_distance(false, c, colors[0], false);
1714 uint best_color_index = 0;
1716 uint err = color_distance(false, c, colors[1], false);
1717 if (err < best_error)
1720 best_color_index = 1;
1723 err = color_distance(false, c, colors[2], false);
1724 if (err < best_error)
1727 best_color_index = 2;
1730 trial_error += best_error * m_unique_colors[unique_color_index].m_weight;
1731 if (trial_error >= solution.m_error)
1734 m_trial_selectors[unique_color_index] = static_cast<uint8>(best_color_index);
1739 if (trial_error < solution.m_error)
1741 solution.m_error = trial_error;
1742 solution.m_alpha_block = (block_type != 0);
1743 solution.m_selectors = m_trial_selectors;
1744 solution.m_valid = true;
1748 if ((!solution.m_alpha_block) && (solution.m_coords.m_low_color == solution.m_coords.m_high_color))
1751 if ((solution.m_coords.m_low_color & 31) != 31)
1753 solution.m_coords.m_low_color++;
1758 solution.m_coords.m_high_color--;
1762 for (uint i = 0; i < m_unique_colors.size(); i++)
1763 solution.m_selectors[i] = static_cast<uint8>(s);
1766 if ((pBest_solution) && (solution.m_error < pBest_solution->m_error))
1768 *pBest_solution = solution;
1775 bool dxt1_endpoint_optimizer::evaluate_solution_fast(
1776 potential_solution &solution,
1777 const dxt1_solution_coordinates &coords,
1779 potential_solution *pBest_solution,
1780 bool alternate_rounding)
1782 solution.m_coords = coords;
1783 solution.m_selectors.resize(m_unique_colors.size());
1785 if ((pBest_solution) && (early_out))
1786 solution.m_error = pBest_solution->m_error;
1788 solution.m_error = cUINT64_MAX;
1790 solution.m_alpha_block = false;
1791 solution.m_valid = false;
1793 uint first_block_type = 0;
1794 uint last_block_type = 1;
1796 if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks))
1797 first_block_type = 1;
1798 else if (!m_pParams->m_use_alpha_blocks)
1799 last_block_type = 0;
1801 m_trial_selectors.resize(m_unique_colors.size());
1803 color_quad_u8 colors[cDXT1SelectorValues];
1804 colors[0] = dxt1_block::unpack_color(coords.m_low_color, true);
1805 colors[1] = dxt1_block::unpack_color(coords.m_high_color, true);
1807 int vr = colors[1].r - colors[0].r;
1808 int vg = colors[1].g - colors[0].g;
1809 int vb = colors[1].b - colors[0].b;
1817 stops[0] = colors[0].r * vr + colors[0].g * vg + colors[0].b * vb;
1818 stops[1] = colors[1].r * vr + colors[1].g * vg + colors[1].b * vb;
1824 for (uint block_type = first_block_type; block_type <= last_block_type; block_type++)
1826 uint64_t trial_error = 0;
1830 colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 255U);
1831 colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 255U);
1833 stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb;
1834 stops[3] = colors[3].r * vr + colors[3].g * vg + colors[3].b * vb;
1837 int c0Point = stops[1] + stops[3];
1838 int halfPoint = stops[3] + stops[2];
1839 int c3Point = stops[2] + stops[0];
1841 for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--)
1843 const color_quad_u8 &c = m_unique_colors[unique_color_index].m_color;
1845 int dot = c.r * dirr + c.g * dirg + c.b * dirb;
1847 uint8 best_color_index;
1848 if (dot < halfPoint)
1849 best_color_index = (dot < c3Point) ? 0 : 2;
1851 best_color_index = (dot < c0Point) ? 3 : 1;
1853 uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false);
1855 trial_error += best_error * m_unique_colors[unique_color_index].m_weight;
1856 if (trial_error >= solution.m_error)
1859 m_trial_selectors[unique_color_index] = static_cast<uint8>(best_color_index);
1864 colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U);
1866 stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb;
1869 int c02Point = stops[0] + stops[2];
1870 int c21Point = stops[2] + stops[1];
1872 for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--)
1874 const color_quad_u8 &c = m_unique_colors[unique_color_index].m_color;
1876 int dot = c.r * dirr + c.g * dirg + c.b * dirb;
1878 uint8 best_color_index;
1880 best_color_index = 0;
1881 else if (dot < c21Point)
1882 best_color_index = 2;
1884 best_color_index = 1;
1886 uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false);
1888 trial_error += best_error * m_unique_colors[unique_color_index].m_weight;
1889 if (trial_error >= solution.m_error)
1892 m_trial_selectors[unique_color_index] = static_cast<uint8>(best_color_index);
1896 if (trial_error < solution.m_error)
1898 solution.m_error = trial_error;
1899 solution.m_alpha_block = (block_type != 0);
1900 solution.m_selectors = m_trial_selectors;
1901 solution.m_valid = true;
1905 if ((!solution.m_alpha_block) && (solution.m_coords.m_low_color == solution.m_coords.m_high_color))
1908 if ((solution.m_coords.m_low_color & 31) != 31)
1910 solution.m_coords.m_low_color++;
1915 solution.m_coords.m_high_color--;
1919 for (uint i = 0; i < m_unique_colors.size(); i++)
1920 solution.m_selectors[i] = static_cast<uint8>(s);
1923 if ((pBest_solution) && (solution.m_error < pBest_solution->m_error))
1925 *pBest_solution = solution;
1932 unique_color dxt1_endpoint_optimizer::lerp_color(const color_quad_u8 &a, const color_quad_u8 &b, float f, int rounding)
1936 float r = rounding ? 1.0f : 0.0f;
1937 res[0] = static_cast<uint8>(math::clamp(math::float_to_int(r + math::lerp<float>(a[0], b[0], f)), 0, 255));
1938 res[1] = static_cast<uint8>(math::clamp(math::float_to_int(r + math::lerp<float>(a[1], b[1], f)), 0, 255));
1939 res[2] = static_cast<uint8>(math::clamp(math::float_to_int(r + math::lerp<float>(a[2], b[2], f)), 0, 255));
1942 return unique_color(res, 1);
1945 // The block may have been already compressed using another DXTc compressor, such as squish, ATI_Compress, ryg_dxt, etc.
1946 // Attempt to recover the endpoints used by that block compressor.
1947 void dxt1_endpoint_optimizer::try_combinatorial_encoding()
1949 if ((m_unique_colors.size() < 2) || (m_unique_colors.size() > 4))
1952 m_temp_unique_colors = m_unique_colors;
1954 if (m_temp_unique_colors.size() == 2)
1959 for (uint k = 0; k < 2; k++)
1961 for (uint q = 0; q < 2; q++)
1963 const uint r = q ^ 1;
1966 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k));
1967 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 3.0f, k));
1970 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k));
1971 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 1.5f, k));
1976 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k));
1977 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k));
1980 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -.5f, k));
1981 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k));
1984 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -2.0f, k));
1985 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k));
1989 else if (m_temp_unique_colors.size() == 3)
1994 for (uint i = 0; i <= 2; i++)
1996 for (uint j = 0; j <= 2; j++)
2002 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.5f));
2005 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 2.0f / 3.0f));
2008 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.0f / 3.0f));
2011 m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, -.5f));
2016 m_unique_packed_colors.resize(0);
2018 for (uint i = 0; i < m_temp_unique_colors.size(); i++)
2020 const color_quad_u8 &unique_color = m_temp_unique_colors[i].m_color;
2021 const uint16 packed_color = dxt1_block::pack_color(unique_color, true);
2023 if (std::find(m_unique_packed_colors.begin(), m_unique_packed_colors.end(), packed_color) != m_unique_packed_colors.end())
2026 m_unique_packed_colors.push_back(packed_color);
2029 if (m_unique_packed_colors.size() < 2)
2032 for (uint alt_rounding = 0; alt_rounding < 2; alt_rounding++)
2034 for (uint i = 0; i < m_unique_packed_colors.size() - 1; i++)
2036 for (uint j = i + 1; j < m_unique_packed_colors.size(); j++)
2039 dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j]),
2041 (alt_rounding == 0) ? &m_best_solution : NULL,
2042 (alt_rounding != 0));
2044 if (m_trial_solution.m_error == 0)
2047 m_best_solution = m_trial_solution;
2058 // The fourth (transparent) color in 3 color "transparent" blocks is black, which can be optionally exploited for small gains in DXT1 mode if the caller
2059 // doesn't actually use alpha. (But not in DXT5 mode, because 3-color blocks aren't permitted by GPU's for DXT5.)
2060 bool dxt1_endpoint_optimizer::try_alpha_as_black_optimization()
2062 const params *pOrig_params = m_pParams;
2063 VOGL_NOTE_UNUSED(pOrig_params);
2064 results *pOrig_results = m_pResults;
2066 uint num_dark_colors = 0;
2068 for (uint i = 0; i < m_unique_colors.size(); i++)
2069 if ((m_unique_colors[i].m_color[0] <= 4) && (m_unique_colors[i].m_color[1] <= 4) && (m_unique_colors[i].m_color[2] <= 4))
2072 if ((!num_dark_colors) || (num_dark_colors == m_unique_colors.size()))
2075 params trial_params(*m_pParams);
2076 vogl::vector<color_quad_u8> trial_colors;
2077 trial_colors.insert(0, m_pParams->m_pPixels, m_pParams->m_num_pixels);
2079 trial_params.m_pPixels = trial_colors.get_ptr();
2080 trial_params.m_pixels_have_alpha = true;
2082 for (uint i = 0; i < trial_colors.size(); i++)
2083 if ((trial_colors[i][0] <= 4) && (trial_colors[i][1] <= 4) && (trial_colors[i][2] <= 4))
2084 trial_colors[i][3] = 0;
2086 results trial_results;
2088 vogl::vector<uint8> trial_selectors(m_pParams->m_num_pixels);
2089 trial_results.m_pSelectors = trial_selectors.get_ptr();
2091 if (!compute_internal(trial_params, trial_results, NULL))
2094 VOGL_ASSERT(trial_results.m_alpha_block);
2097 dxt1_block::get_block_colors3(c, trial_results.m_low_color, trial_results.m_high_color);
2099 uint64_t trial_error = 0;
2101 for (uint i = 0; i < trial_colors.size(); i++)
2103 if (trial_colors[i][3] == 0)
2105 VOGL_ASSERT(trial_selectors[i] == 3);
2109 VOGL_ASSERT(trial_selectors[i] != 3);
2112 trial_error += color_distance(m_perceptual, trial_colors[i], c[trial_selectors[i]], false);
2115 if (trial_error < pOrig_results->m_error)
2117 pOrig_results->m_error = trial_error;
2119 pOrig_results->m_low_color = trial_results.m_low_color;
2120 pOrig_results->m_high_color = trial_results.m_high_color;
2122 if (pOrig_results->m_pSelectors)
2123 memcpy(pOrig_results->m_pSelectors, trial_results.m_pSelectors, m_pParams->m_num_pixels);
2125 pOrig_results->m_alpha_block = true;
2131 bool dxt1_endpoint_optimizer::compute_internal(const params &p, results &r, solution_vec *pSolutions)
2137 m_pSolutions = pSolutions;
2139 m_has_color_weighting = (m_pParams->m_color_weights[0] != 1) || (m_pParams->m_color_weights[1] != 1) || (m_pParams->m_color_weights[2] != 1);
2140 m_perceptual = m_pParams->m_perceptual && !m_has_color_weighting && !m_pParams->m_grayscale_sampling;
2142 find_unique_colors();
2144 m_best_solution.clear();
2146 if (m_unique_colors.is_empty())
2147 return handle_all_transparent_block();
2148 else if ((m_unique_colors.size() == 1) && (!m_has_transparent_pixels))
2149 return handle_solid_block();
2152 if (!handle_multicolor_block())
2155 if ((m_all_pixels_grayscale) && (m_best_solution.m_error))
2157 if (!handle_grayscale_block())
2165 bool dxt1_endpoint_optimizer::compute(const params &p, results &r, solution_vec *pSolutions)
2170 bool status = compute_internal(p, r, pSolutions);
2174 if ((m_pParams->m_use_alpha_blocks) && (m_pParams->m_use_transparent_indices_for_black) && (!m_pParams->m_pixels_have_alpha) && (!pSolutions))
2176 if (!try_alpha_as_black_optimization())
2183 // Build array of unique colors and their weights.
2184 void dxt1_endpoint_optimizer::find_unique_colors()
2186 m_has_transparent_pixels = false;
2188 uint num_opaque_pixels = 0;
2190 const uint alpha_thresh = m_pParams->m_pixels_have_alpha ? (m_pParams->m_dxt1a_alpha_threshold << 24U) : 0;
2192 const uint32 *pSrc_pixels = reinterpret_cast<const uint32 *>(m_pParams->m_pPixels);
2193 const uint32 *pSrc_pixels_end = pSrc_pixels + m_pParams->m_num_pixels;
2195 m_unique_colors.resize(m_pParams->m_num_pixels);
2196 uint num_unique_colors = 0;
2198 m_all_pixels_grayscale = true;
2202 uint32 c = utils::read_le32(pSrc_pixels);
2205 if (c < alpha_thresh)
2207 m_has_transparent_pixels = true;
2211 if (m_all_pixels_grayscale)
2214 uint g = (c >> 8) & 0xFF;
2215 uint b = (c >> 16) & 0xFF;
2216 if ((r != g) || (r != b))
2217 m_all_pixels_grayscale = false;
2222 unique_color_hash_map::insert_result ins_result(m_unique_color_hash_map.insert(c, num_unique_colors));
2224 if (ins_result.second)
2226 utils::write_le32(&m_unique_colors[num_unique_colors].m_color.m_u32, c);
2227 m_unique_colors[num_unique_colors].m_weight = 1;
2228 num_unique_colors++;
2231 m_unique_colors[ins_result.first->second].m_weight++;
2233 num_opaque_pixels++;
2235 } while (pSrc_pixels != pSrc_pixels_end);
2237 m_unique_colors.resize(num_unique_colors);
2239 m_total_unique_color_weight = num_opaque_pixels;