1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
30 #include "vogl_core.h"
32 #if defined(_M_IX86) && defined(_MSC_VER)
34 #pragma intrinsic(__emulu)
35 unsigned __int64 __emulu(unsigned int a, unsigned int b);
36 //#elif defined(__GNUC__)
37 //#include <xmmintrin.h>
44 const float cPi = 3.1415926535f; // 180
45 const double cPiD = 3.14159265358979323846;
46 const float cHalfPi = 3.1415926535f * .5f; // 90
47 const float cTwoPi = 3.1415926535f * 2.0f; // 360
48 const float cFourPi = 3.1415926535f * 4.0f; // 720
49 const float cNearlyInfinite = 1.0e+37f;
50 const float cTinyEpsilon = 0.00000762939453125f; // .5*(2^-16)
51 const float cE = (float)2.71828182845904523536;
52 const double cED = 2.71828182845904523536;
54 const float cDegToRad = 0.01745329252f;
55 const float cRadToDeg = 57.29577951f;
57 const uint32 cFloatExpBits = 8;
58 const uint32 cFloatExpMask = (1 << cFloatExpBits) - 1;
59 const uint32 cFloatFractBits = 23;
60 const uint32 cFloatFractMask = (1 << cFloatFractBits) - 1;
61 const uint32 cFloatExpShift = 23;
62 const uint32 cFloatExpShiftedMask = cFloatExpMask << cFloatExpShift;
63 const int32 cFloatExpBias = 127;
64 const uint32 cFloatSignMask = 0x80000000U;
66 const uint32 cDoubleExpBits = 11;
67 const uint64_t cDoubleExpMask = (1ULL << cDoubleExpBits) - 1ULL;
68 const uint32 cDoubleFractBits = 52;
69 const uint64_t cDoubleFractMask = (1ULL << cDoubleFractBits) - 1ULL;
70 const uint32 cDoubleExpShift = 52;
71 const uint64_t cDoubleExpShiftedMask = cDoubleExpMask << cDoubleExpShift;
72 const int32 cDoubleExpBias = 1023;
73 const uint64_t cDoubleSignMask = 0x8000000000000000ULL;
75 extern uint g_bitmasks[32];
78 inline bool is_within_open_range(T a, T l, T h)
80 return (a >= l) && (a < h);
83 inline T open_range_check(T a, T h)
90 inline T open_range_check(T a, T l, T h)
94 VOGL_ASSERT((a >= l) && (a < h));
99 inline bool is_within_closed_range(T a, T l, T h)
101 return (a >= l) && (a <= h);
103 template <typename T>
104 inline T closed_range_check(T a, T h)
110 template <typename T>
111 inline T closed_range_check(T a, T l, T h)
115 VOGL_ASSERT((a >= l) && (a <= h));
119 // Yes I know these should probably be pass by ref, not val:
120 // http://www.stepanovpapers.com/notes.pdf
121 // Just don't use them on non-simple (non built-in) types!
122 template <typename T>
123 inline T minimum(T a, T b)
125 return (a < b) ? a : b;
127 template <typename T>
128 inline T minimum(T a, T b, T c)
130 return minimum(minimum(a, b), c);
132 template <typename T>
133 inline T minimum(T a, T b, T c, T d)
135 return minimum(minimum(minimum(a, b), c), d);
138 template <typename T>
139 inline T maximum(T a, T b)
141 return (a > b) ? a : b;
143 template <typename T>
144 inline T maximum(T a, T b, T c)
146 return maximum(maximum(a, b), c);
148 template <typename T>
149 inline T maximum(T a, T b, T c, T d)
151 return maximum(maximum(maximum(a, b), c), d);
154 template <typename T, typename U>
155 inline T lerp(T a, T b, U c)
157 return a + (b - a) * c;
159 template <typename T, typename U>
160 T cubic_lerp(const T &a, const T &b, U s)
167 return (a * ((2.0f * (s * s * s)) - (3.0f * (s * s)) + 1.0f)) +
168 (b * ((3.0f * (s * s)) - (2.0f * (s * s * s))));
171 template <typename T>
172 inline T clamp_low(T value, T low)
174 return (value < low) ? low : value;
177 template <typename T>
178 inline T clamp_high(T value, T high)
180 return (value > high) ? high : value;
182 template <typename T>
183 inline T clamp(T value, T low, T high)
185 return (value < low) ? low : ((value > high) ? high : value);
188 template <typename T>
189 inline T saturate(T value)
191 return (value < 0.0f) ? 0.0f : ((value > 1.0f) ? 1.0f : value);
194 template <typename T>
195 inline T frac(T value)
197 T abs_value = fabs(value);
198 return abs_value - floor(abs_value);
202 inline int float_to_int(float f)
204 return static_cast<int>(f);
206 inline int float_to_int(double f)
208 return static_cast<int>(f);
210 inline uint float_to_uint(float f)
212 return static_cast<uint>(f);
214 inline uint float_to_uint(double f)
216 return static_cast<uint>(f);
220 inline int float_to_int_round(float f)
222 return static_cast<int>(f + ((f < 0.0f) ? -.5f : .5f));
224 inline int64_t float_to_int64_round(float f)
226 return static_cast<int64_t>(f + ((f < 0.0f) ? -.5f : .5f));
228 inline int double_to_int_round(double f)
230 return static_cast<int>(f + ((f < 0.0f) ? -.5f : .5f));
232 inline int64_t double_to_int64_round(double f)
234 return static_cast<int64_t>(f + ((f < 0.0f) ? -.5f : .5f));
237 inline uint float_to_uint_round(float f)
239 return static_cast<uint>((f < 0.0f) ? 0.0f : (f + .5f));
241 inline uint64_t float_to_uint64_round(float f)
243 return static_cast<uint64_t>((f < 0.0f) ? 0.0f : (f + .5f));
245 inline uint double_to_uint_round(double f)
247 return static_cast<uint>((f < 0.0f) ? 0.0f : (f + .5f));
249 inline uint64_t double_to_uint64_round(double f)
251 return static_cast<uint64_t>((f < 0.0f) ? 0.0f : (f + .5f));
254 inline int float_to_int_nearest(float f)
256 //return _mm_cvtss_si32(_mm_load_ss(&f));
257 return float_to_int_round(f);
260 template <typename T>
261 inline int sign(T value)
263 return (value < 0) ? -1 : ((value > 0) ? 1 : 0);
266 template <typename T>
267 inline T square(T value)
269 return value * value;
272 inline bool is_power_of_2(uint32 x)
274 return x && ((x & (x - 1U)) == 0U);
276 inline bool is_power_of_2(uint64_t x)
278 return x && ((x & (x - 1U)) == 0U);
281 template <typename T>
282 inline bool is_pointer_aligned(T p, uint alignment)
284 VOGL_ASSERT(is_power_of_2(alignment));
285 return (reinterpret_cast<intptr_t>(p) & (alignment - 1)) == 0;
288 template <typename T>
289 inline T align_up_pointer(T p, uint alignment)
291 VOGL_ASSERT(is_power_of_2(alignment));
292 intptr_t q = reinterpret_cast<intptr_t>(p);
293 q = (q + alignment - 1) & (~(alignment - 1));
294 return reinterpret_cast<T>(q);
297 template <typename T>
298 inline uint get_bytes_to_align_up_pointer(T p, uint alignment)
300 return reinterpret_cast<uint8 *>(align_up_pointer(p, alignment)) - reinterpret_cast<uint8 *>(p);
303 template <typename T>
304 inline T align_up_value(T x, uint alignment)
306 VOGL_ASSERT(is_power_of_2(alignment));
307 uint64_t q = static_cast<uint64_t>(x);
308 q = (q + alignment - 1U) & (~static_cast<uint64_t>(alignment - 1U));
309 return static_cast<T>(q);
312 template <typename T>
313 inline T align_down_value(T x, uint alignment)
315 VOGL_ASSERT(is_power_of_2(alignment));
316 uint64_t q = static_cast<uint64_t>(x);
317 q = q & (~static_cast<uint64_t>(alignment - 1U));
318 return static_cast<T>(q);
321 template <typename T>
322 inline T get_align_up_value_delta(T x, uint alignment)
324 return align_up_value(x, alignment) - x;
328 inline T prev_wrap(T i, T n)
337 inline T next_wrap(T i, T n)
345 inline float deg_to_rad(float f)
347 return f * cDegToRad;
350 inline float rad_to_deg(float f)
352 return f * cRadToDeg;
355 // (x mod y) with special handling for negative x values.
356 static inline int posmod(int x, int y)
359 return (x < y) ? x : (x % y);
367 inline float posfmod(float x, float y)
369 VOGL_ASSERT(y > 0.0f);
373 float m = fmod(-x, y);
379 // From "Hackers Delight"
380 inline uint32 next_pow2(uint32 val)
391 inline uint64_t next_pow2(uint64_t val)
403 inline uint floor_log2i(uint v)
414 inline uint ceil_log2i(uint v)
416 uint l = floor_log2i(v);
417 if ((l != cIntBits) && (v > (1U << l)))
422 // Returns the total number of bits needed to encode v.
423 inline uint total_bits(uint v)
434 // Actually counts the number of set bits, but hey
435 inline uint bitmask_size(uint mask)
446 inline uint bitmask_ofs(uint mask)
451 while ((mask & 1U) == 0)
459 // true if n is prime. Umm, not fast.
460 bool is_prime(uint n);
462 // Find the smallest prime >= n.
463 uint get_prime(uint n);
465 // See Bit Twiddling Hacks (public domain)
466 // http://www-graphics.stanford.edu/~seander/bithacks.html
467 inline uint count_trailing_zero_bits(uint v)
469 uint c = 32; // c will be the number of zero bits on the right
471 static const unsigned int B[] = { 0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF };
472 static const unsigned int S[] = { 1, 2, 4, 8, 16 }; // Our Magic Binary Numbers
474 for (int i = 4; i >= 0; --i) // unroll for more speed
491 inline uint count_leading_zero_bits(uint v)
493 #if defined(_M_IX86) && defined(_MSC_VER)
495 #elif defined(__GNUC__)
496 return v ? __builtin_clz(v) : 32;
539 inline uint count_leading_zero_bits64(uint64_t v)
541 #if defined(_M_IX86) && defined(_MSC_VER)
543 #elif defined(__GNUC__)
544 return v ? __builtin_clzll(v) : 64;
593 // Returns 64-bit result of a * b
594 inline uint64_t emulu(uint32 a, uint32 b)
596 #if defined(_M_IX86) && defined(_MSC_VER)
597 return __emulu(a, b);
599 return static_cast<uint64_t>(a) * static_cast<uint64_t>(b);
603 double compute_entropy(const uint8 *p, uint n);
605 void compute_lower_pow2_dim(int &width, int &height);
606 void compute_upper_pow2_dim(int &width, int &height);
608 inline bool equal_tol(float a, float b, float t)
610 return fabs(a - b) <= ((maximum(fabs(a), fabs(b)) + 1.0f) * t);
613 inline bool equal_tol(double a, double b, double t)
615 return fabs(a - b) <= ((maximum(fabs(a), fabs(b)) + 1.0f) * t);
618 inline uint mul255(uint a, uint b)
620 uint t = a * b + 128;
621 return (t + (t >> 8)) >> 8;
624 inline uint clamp255(uint a)
627 a = (~(static_cast<int>(a) >> 31)) & 0xFF;
631 inline uint32 get_float_bits(float f)
642 inline uint64_t get_double_bits(double d)
653 inline uint32 get_float_mantissa(float f)
655 const uint32 u = get_float_bits(f);
656 return u & cFloatFractMask;
659 inline uint64_t get_double_mantissa(double d)
661 const uint64_t u = get_double_bits(d);
662 return u & cDoubleFractMask;
665 inline int get_float_exponent(float f)
667 const uint32 u = get_float_bits(f);
668 const int exp = (u >> cFloatExpShift) & cFloatExpMask;
669 return exp - cFloatExpBias;
672 inline int get_double_exponent(double d)
674 const uint64_t u = get_double_bits(d);
675 const int exp = (u >> cDoubleExpShift) & cDoubleExpMask;
676 return exp - cDoubleExpBias;
679 inline bool is_denormal(double d)
681 const uint64_t u = get_double_bits(d);
682 const uint64_t exp = (u >> cDoubleExpShift) & cDoubleExpMask;
683 const uint64_t mantissa = u & cDoubleFractMask;
684 return (exp == 0) && (mantissa != 0);
687 inline bool is_nan_or_inf(double d)
689 const uint64_t u = get_double_bits(d);
690 const uint64_t exp = (u >> cDoubleExpShift) & cDoubleExpMask;
691 return exp == cDoubleExpMask;
694 inline bool is_denormal(float f)
696 const uint32 u = get_float_bits(f);
697 const uint exp = (u >> cFloatExpShift) & cFloatExpMask;
698 const uint mantissa = u & cFloatFractMask;
699 return (exp == 0) && (mantissa != 0);
702 inline bool is_nan_or_inf(float f)
704 uint32 u = get_float_bits(f);
705 return ((u >> cFloatExpShift) & cFloatExpMask) == cFloatExpMask;
708 inline bool is_float_signed(float f)
710 return (get_float_bits(f) & cFloatSignMask) != 0;
713 inline bool is_double_signed(double d)
715 return (get_double_bits(d) & cDoubleSignMask) != 0;
718 inline uint64_t combine_two_uint32s(uint32 l, uint32 h)
720 return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
723 float gauss(int x, int y, float sigma_sqr);
727 cComputeGaussianFlagNormalize = 1,
728 cComputeGaussianFlagPrint = 2,
729 cComputeGaussianFlagNormalizeCenterToOne = 4
732 void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint flags);