git.cworth.org Git - vogl/blob - src/voglcore/vogl_math.h

   1 /**************************************************************************
   2  *
   3  * Copyright 2013-2014 RAD Game Tools and Valve Software
   4  * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a copy
   8  * of this software and associated documentation files (the "Software"), to deal
   9  * in the Software without restriction, including without limitation the rights
  10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11  * copies of the Software, and to permit persons to whom the Software is
  12  * furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included in
  15  * all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23  * THE SOFTWARE.
  24  *
  25  **************************************************************************/
  26
  27 // File: vogl_math.h
  28 #pragma once
  29
  30 #include "vogl_core.h"
  31
  32 #if defined(_M_IX86) && defined(_MSC_VER)
  33 #include <intrin.h>
  34 #pragma intrinsic(__emulu)
  35 unsigned __int64 __emulu(unsigned int a, unsigned int b);
  36 //#elif defined(__GNUC__)
  37 //#include <xmmintrin.h>
  38 #endif
  39
  40 namespace vogl
  41 {
  42     namespace math
  43     {
  44         const float cPi = 3.1415926535f; // 180
  45         const double cPiD = 3.14159265358979323846;
  46         const float cHalfPi = 3.1415926535f * .5f;  // 90
  47         const float cTwoPi = 3.1415926535f * 2.0f;  // 360
  48         const float cFourPi = 3.1415926535f * 4.0f; // 720
  49         const float cNearlyInfinite = 1.0e+37f;
  50         const float cTinyEpsilon = 0.00000762939453125f; // .5*(2^-16)
  51         const float cE = (float)2.71828182845904523536;
  52         const double cED = 2.71828182845904523536;
  53
  54         const float cDegToRad = 0.01745329252f;
  55         const float cRadToDeg = 57.29577951f;
  56
  57         const uint32 cFloatExpBits = 8;
  58         const uint32 cFloatExpMask = (1 << cFloatExpBits) - 1;
  59         const uint32 cFloatFractBits = 23;
  60         const uint32 cFloatFractMask = (1 << cFloatFractBits) - 1;
  61         const uint32 cFloatExpShift = 23;
  62         const uint32 cFloatExpShiftedMask = cFloatExpMask << cFloatExpShift;
  63         const int32 cFloatExpBias = 127;
  64         const uint32 cFloatSignMask = 0x80000000U;
  65
  66         const uint32 cDoubleExpBits = 11;
  67         const uint64_t cDoubleExpMask = (1ULL << cDoubleExpBits) - 1ULL;
  68         const uint32 cDoubleFractBits = 52;
  69         const uint64_t cDoubleFractMask = (1ULL << cDoubleFractBits) - 1ULL;
  70         const uint32 cDoubleExpShift = 52;
  71         const uint64_t cDoubleExpShiftedMask = cDoubleExpMask << cDoubleExpShift;
  72         const int32 cDoubleExpBias = 1023;
  73         const uint64_t cDoubleSignMask = 0x8000000000000000ULL;
  74
  75         extern uint g_bitmasks[32];
  76
  77         template <typename T>
  78         inline bool is_within_open_range(T a, T l, T h)
  79         {
  80             return (a >= l) && (a < h);
  81         }
  82         template <typename T>
  83         inline T open_range_check(T a, T h)
  84         {
  85             (void)h;
  86             VOGL_ASSERT(a < h);
  87             return a;
  88         }
  89         template <typename T>
  90         inline T open_range_check(T a, T l, T h)
  91         {
  92             (void)l;
  93             (void)h;
  94             VOGL_ASSERT((a >= l) && (a < h));
  95             return a;
  96         }
  97
  98         template <typename T>
  99         inline bool is_within_closed_range(T a, T l, T h)
 100         {
 101             return (a >= l) && (a <= h);
 102         }
 103         template <typename T>
 104         inline T closed_range_check(T a, T h)
 105         {
 106             (void)h;
 107             VOGL_ASSERT(a <= h);
 108             return a;
 109         }
 110         template <typename T>
 111         inline T closed_range_check(T a, T l, T h)
 112         {
 113             (void)l;
 114             (void)h;
 115             VOGL_ASSERT((a >= l) && (a <= h));
 116             return a;
 117         }
 118
 119         // Yes I know these should probably be pass by ref, not val:
 120         // http://www.stepanovpapers.com/notes.pdf
 121         // Just don't use them on non-simple (non built-in) types!
 122         template <typename T>
 123         inline T minimum(T a, T b)
 124         {
 125             return (a < b) ? a : b;
 126         }
 127         template <typename T>
 128         inline T minimum(T a, T b, T c)
 129         {
 130             return minimum(minimum(a, b), c);
 131         }
 132         template <typename T>
 133         inline T minimum(T a, T b, T c, T d)
 134         {
 135             return minimum(minimum(minimum(a, b), c), d);
 136         }
 137
 138         template <typename T>
 139         inline T maximum(T a, T b)
 140         {
 141             return (a > b) ? a : b;
 142         }
 143         template <typename T>
 144         inline T maximum(T a, T b, T c)
 145         {
 146             return maximum(maximum(a, b), c);
 147         }
 148         template <typename T>
 149         inline T maximum(T a, T b, T c, T d)
 150         {
 151             return maximum(maximum(maximum(a, b), c), d);
 152         }
 153
 154         template <typename T, typename U>
 155         inline T lerp(T a, T b, U c)
 156         {
 157             return a + (b - a) * c;
 158         }
 159         template <typename T, typename U>
 160         T cubic_lerp(const T &a, const T &b, U s)
 161         {
 162             if (s < 0.0f)
 163                 s = 0.0f;
 164             else if (s > 1.0f)
 165                 s = 1.0f;
 166
 167             return (a * ((2.0f * (s * s * s)) - (3.0f * (s * s)) + 1.0f)) +
 168                    (b * ((3.0f * (s * s)) - (2.0f * (s * s * s))));
 169         }
 170
 171         template <typename T>
 172         inline T clamp_low(T value, T low)
 173         {
 174             return (value < low) ? low : value;
 175         }
 176
 177         template <typename T>
 178         inline T clamp_high(T value, T high)
 179         {
 180             return (value > high) ? high : value;
 181         }
 182         template <typename T>
 183         inline T clamp(T value, T low, T high)
 184         {
 185             return (value < low) ? low : ((value > high) ? high : value);
 186         }
 187
 188         template <typename T>
 189         inline T saturate(T value)
 190         {
 191             return (value < 0.0f) ? 0.0f : ((value > 1.0f) ? 1.0f : value);
 192         }
 193
 194         template <typename T>
 195         inline T frac(T value)
 196         {
 197             T abs_value = fabs(value);
 198             return abs_value - floor(abs_value);
 199         }
 200
 201         // truncation
 202         inline int float_to_int(float f)
 203         {
 204             return static_cast<int>(f);
 205         }
 206         inline int float_to_int(double f)
 207         {
 208             return static_cast<int>(f);
 209         }
 210         inline uint float_to_uint(float f)
 211         {
 212             return static_cast<uint>(f);
 213         }
 214         inline uint float_to_uint(double f)
 215         {
 216             return static_cast<uint>(f);
 217         }
 218
 219         // round to nearest
 220         inline int float_to_int_round(float f)
 221         {
 222             return static_cast<int>(f + ((f < 0.0f) ? -.5f : .5f));
 223         }
 224         inline int64_t float_to_int64_round(float f)
 225         {
 226             return static_cast<int64_t>(f + ((f < 0.0f) ? -.5f : .5f));
 227         }
 228         inline int double_to_int_round(double f)
 229         {
 230             return static_cast<int>(f + ((f < 0.0f) ? -.5f : .5f));
 231         }
 232         inline int64_t double_to_int64_round(double f)
 233         {
 234             return static_cast<int64_t>(f + ((f < 0.0f) ? -.5f : .5f));
 235         }
 236
 237         inline uint float_to_uint_round(float f)
 238         {
 239             return static_cast<uint>((f < 0.0f) ? 0.0f : (f + .5f));
 240         }
 241         inline uint64_t float_to_uint64_round(float f)
 242         {
 243             return static_cast<uint64_t>((f < 0.0f) ? 0.0f : (f + .5f));
 244         }
 245         inline uint double_to_uint_round(double f)
 246         {
 247             return static_cast<uint>((f < 0.0f) ? 0.0f : (f + .5f));
 248         }
 249         inline uint64_t double_to_uint64_round(double f)
 250         {
 251             return static_cast<uint64_t>((f < 0.0f) ? 0.0f : (f + .5f));
 252         }
 253
 254         inline int float_to_int_nearest(float f)
 255         {
 256             //return _mm_cvtss_si32(_mm_load_ss(&f));
 257             return float_to_int_round(f);
 258         }
 259
 260         template <typename T>
 261         inline int sign(T value)
 262         {
 263             return (value < 0) ? -1 : ((value > 0) ? 1 : 0);
 264         }
 265
 266         template <typename T>
 267         inline T square(T value)
 268         {
 269             return value * value;
 270         }
 271
 272         inline bool is_power_of_2(uint32 x)
 273         {
 274             return x && ((x & (x - 1U)) == 0U);
 275         }
 276         inline bool is_power_of_2(uint64_t x)
 277         {
 278             return x && ((x & (x - 1U)) == 0U);
 279         }
 280
 281         template <typename T>
 282         inline bool is_pointer_aligned(T p, uint alignment)
 283         {
 284             VOGL_ASSERT(is_power_of_2(alignment));
 285             return (reinterpret_cast<intptr_t>(p) & (alignment - 1)) == 0;
 286         }
 287
 288         template <typename T>
 289         inline T align_up_pointer(T p, uint alignment)
 290         {
 291             VOGL_ASSERT(is_power_of_2(alignment));
 292             intptr_t q = reinterpret_cast<intptr_t>(p);
 293             q = (q + alignment - 1) & (~(alignment - 1));
 294             return reinterpret_cast<T>(q);
 295         }
 296
 297         template <typename T>
 298         inline uint get_bytes_to_align_up_pointer(T p, uint alignment)
 299         {
 300             return reinterpret_cast<uint8 *>(align_up_pointer(p, alignment)) - reinterpret_cast<uint8 *>(p);
 301         }
 302
 303         template <typename T>
 304         inline T align_up_value(T x, uint alignment)
 305         {
 306             VOGL_ASSERT(is_power_of_2(alignment));
 307             uint64_t q = static_cast<uint64_t>(x);
 308             q = (q + alignment - 1U) & (~static_cast<uint64_t>(alignment - 1U));
 309             return static_cast<T>(q);
 310         }
 311
 312         template <typename T>
 313         inline T align_down_value(T x, uint alignment)
 314         {
 315             VOGL_ASSERT(is_power_of_2(alignment));
 316             uint64_t q = static_cast<uint64_t>(x);
 317             q = q & (~static_cast<uint64_t>(alignment - 1U));
 318             return static_cast<T>(q);
 319         }
 320
 321         template <typename T>
 322         inline T get_align_up_value_delta(T x, uint alignment)
 323         {
 324             return align_up_value(x, alignment) - x;
 325         }
 326
 327         template <class T>
 328         inline T prev_wrap(T i, T n)
 329         {
 330             T temp = i - 1;
 331             if (temp < 0)
 332                 temp = n - 1;
 333             return temp;
 334         }
 335
 336         template <class T>
 337         inline T next_wrap(T i, T n)
 338         {
 339             T temp = i + 1;
 340             if (temp >= n)
 341                 temp = 0;
 342             return temp;
 343         }
 344
 345         inline float deg_to_rad(float f)
 346         {
 347             return f * cDegToRad;
 348         };
 349
 350         inline float rad_to_deg(float f)
 351         {
 352             return f * cRadToDeg;
 353         };
 354
 355         // (x mod y) with special handling for negative x values.
 356         static inline int posmod(int x, int y)
 357         {
 358             if (x >= 0)
 359                 return (x < y) ? x : (x % y);
 360
 361             int m = (-x) % y;
 362             if (m != 0)
 363                 m = y - m;
 364             return m;
 365         }
 366
 367         inline float posfmod(float x, float y)
 368         {
 369             VOGL_ASSERT(y > 0.0f);
 370
 371             if (x >= 0.0f)
 372                 return fmod(x, y);
 373             float m = fmod(-x, y);
 374             if (m != 0.0f)
 375                 m = y - m;
 376             return m;
 377         }
 378
 379         // From "Hackers Delight"
 380         inline uint32 next_pow2(uint32 val)
 381         {
 382             val--;
 383             val |= val >> 16;
 384             val |= val >> 8;
 385             val |= val >> 4;
 386             val |= val >> 2;
 387             val |= val >> 1;
 388             return val + 1;
 389         }
 390
 391         inline uint64_t next_pow2(uint64_t val)
 392         {
 393             val--;
 394             val |= val >> 32;
 395             val |= val >> 16;
 396             val |= val >> 8;
 397             val |= val >> 4;
 398             val |= val >> 2;
 399             val |= val >> 1;
 400             return val + 1;
 401         }
 402
 403         inline uint floor_log2i(uint v)
 404         {
 405             uint l = 0;
 406             while (v > 1U)
 407             {
 408                 v >>= 1;
 409                 l++;
 410             }
 411             return l;
 412         }
 413
 414         inline uint ceil_log2i(uint v)
 415         {
 416             uint l = floor_log2i(v);
 417             if ((l != cIntBits) && (v > (1U << l)))
 418                 l++;
 419             return l;
 420         }
 421
 422         // Returns the total number of bits needed to encode v.
 423         inline uint total_bits(uint v)
 424         {
 425             uint l = 0;
 426             while (v > 0U)
 427             {
 428                 v >>= 1;
 429                 l++;
 430             }
 431             return l;
 432         }
 433
 434         // Actually counts the number of set bits, but hey
 435         inline uint bitmask_size(uint mask)
 436         {
 437             uint size = 0;
 438             while (mask)
 439             {
 440                 mask &= (mask - 1U);
 441                 size++;
 442             }
 443             return size;
 444         }
 445
 446         inline uint bitmask_ofs(uint mask)
 447         {
 448             if (!mask)
 449                 return 0;
 450             uint ofs = 0;
 451             while ((mask & 1U) == 0)
 452             {
 453                 mask >>= 1U;
 454                 ofs++;
 455             }
 456             return ofs;
 457         }
 458
 459         // true if n is prime. Umm, not fast.
 460         bool is_prime(uint n);
 461
 462         // Find the smallest prime >= n.
 463         uint get_prime(uint n);
 464
 465         // See Bit Twiddling Hacks (public domain)
 466         // http://www-graphics.stanford.edu/~seander/bithacks.html
 467         inline uint count_trailing_zero_bits(uint v)
 468         {
 469             uint c = 32; // c will be the number of zero bits on the right
 470
 471             static const unsigned int B[] = { 0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF };
 472             static const unsigned int S[] = { 1, 2, 4, 8, 16 }; // Our Magic Binary Numbers
 473
 474             for (int i = 4; i >= 0; --i) // unroll for more speed
 475             {
 476                 if (v & B[i])
 477                 {
 478                     v <<= S[i];
 479                     c -= S[i];
 480                 }
 481             }
 482
 483             if (v)
 484             {
 485                 c--;
 486             }
 487
 488             return c;
 489         }
 490
 491         inline uint count_leading_zero_bits(uint v)
 492         {
 493 #if defined(_M_IX86) && defined(_MSC_VER)
 494             return __lzcnt(v);
 495 #elif defined(__GNUC__)
 496             return v ? __builtin_clz(v) : 32;
 497 #else
 498             uint temp;
 499             uint result = 32U;
 500
 501             temp = (v >> 16U);
 502             if (temp)
 503             {
 504                 result -= 16U;
 505                 v = temp;
 506             }
 507             temp = (v >> 8U);
 508             if (temp)
 509             {
 510                 result -= 8U;
 511                 v = temp;
 512             }
 513             temp = (v >> 4U);
 514             if (temp)
 515             {
 516                 result -= 4U;
 517                 v = temp;
 518             }
 519             temp = (v >> 2U);
 520             if (temp)
 521             {
 522                 result -= 2U;
 523                 v = temp;
 524             }
 525             temp = (v >> 1U);
 526             if (temp)
 527             {
 528                 result -= 1U;
 529                 v = temp;
 530             }
 531
 532             if (v & 1U)
 533                 result--;
 534
 535             return result;
 536 #endif
 537         }
 538
 539         inline uint count_leading_zero_bits64(uint64_t v)
 540         {
 541 #if defined(_M_IX86) && defined(_MSC_VER)
 542             return __lzcnt64(v);
 543 #elif defined(__GNUC__)
 544             return v ? __builtin_clzll(v) : 64;
 545 #else
 546             uint64_t temp;
 547             uint result = 64U;
 548
 549             temp = (v >> 32U);
 550             if (temp)
 551             {
 552                 result -= 32U;
 553                 v = temp;
 554             }
 555             temp = (v >> 16U);
 556             if (temp)
 557             {
 558                 result -= 16U;
 559                 v = temp;
 560             }
 561             temp = (v >> 8U);
 562             if (temp)
 563             {
 564                 result -= 8U;
 565                 v = temp;
 566             }
 567             temp = (v >> 4U);
 568             if (temp)
 569             {
 570                 result -= 4U;
 571                 v = temp;
 572             }
 573             temp = (v >> 2U);
 574             if (temp)
 575             {
 576                 result -= 2U;
 577                 v = temp;
 578             }
 579             temp = (v >> 1U);
 580             if (temp)
 581             {
 582                 result -= 1U;
 583                 v = temp;
 584             }
 585
 586             if (v & 1U)
 587                 result--;
 588
 589             return result;
 590 #endif
 591         }
 592
 593         // Returns 64-bit result of a * b
 594         inline uint64_t emulu(uint32 a, uint32 b)
 595         {
 596 #if defined(_M_IX86) && defined(_MSC_VER)
 597             return __emulu(a, b);
 598 #else
 599             return static_cast<uint64_t>(a) * static_cast<uint64_t>(b);
 600 #endif
 601         }
 602
 603         double compute_entropy(const uint8 *p, uint n);
 604
 605         void compute_lower_pow2_dim(int &width, int &height);
 606         void compute_upper_pow2_dim(int &width, int &height);
 607
 608         inline bool equal_tol(float a, float b, float t)
 609         {
 610             return fabs(a - b) <= ((maximum(fabs(a), fabs(b)) + 1.0f) * t);
 611         }
 612
 613         inline bool equal_tol(double a, double b, double t)
 614         {
 615             return fabs(a - b) <= ((maximum(fabs(a), fabs(b)) + 1.0f) * t);
 616         }
 617
 618         inline uint mul255(uint a, uint b)
 619         {
 620             uint t = a * b + 128;
 621             return (t + (t >> 8)) >> 8;
 622         }
 623
 624         inline uint clamp255(uint a)
 625         {
 626             if (a & 0xFFFFFF00U)
 627                 a = (~(static_cast<int>(a) >> 31)) & 0xFF;
 628             return a;
 629         }
 630
 631         inline uint32 get_float_bits(float f)
 632         {
 633             union
 634             {
 635                 float f;
 636                 uint32 u;
 637             } x;
 638             x.f = f;
 639             return x.u;
 640         }
 641
 642         inline uint64_t get_double_bits(double d)
 643         {
 644             union
 645             {
 646                 double d;
 647                 uint64_t u;
 648             } x;
 649             x.d = d;
 650             return x.u;
 651         }
 652
 653         inline uint32 get_float_mantissa(float f)
 654         {
 655             const uint32 u = get_float_bits(f);
 656             return u & cFloatFractMask;
 657         }
 658
 659         inline uint64_t get_double_mantissa(double d)
 660         {
 661             const uint64_t u = get_double_bits(d);
 662             return u & cDoubleFractMask;
 663         }
 664
 665         inline int get_float_exponent(float f)
 666         {
 667             const uint32 u = get_float_bits(f);
 668             const int exp = (u >> cFloatExpShift) & cFloatExpMask;
 669             return exp - cFloatExpBias;
 670         }
 671
 672         inline int get_double_exponent(double d)
 673         {
 674             const uint64_t u = get_double_bits(d);
 675             const int exp = (u >> cDoubleExpShift) & cDoubleExpMask;
 676             return exp - cDoubleExpBias;
 677         }
 678
 679         inline bool is_denormal(double d)
 680         {
 681             const uint64_t u = get_double_bits(d);
 682             const uint64_t exp = (u >> cDoubleExpShift) & cDoubleExpMask;
 683             const uint64_t mantissa = u & cDoubleFractMask;
 684             return (exp == 0) && (mantissa != 0);
 685         }
 686
 687         inline bool is_nan_or_inf(double d)
 688         {
 689             const uint64_t u = get_double_bits(d);
 690             const uint64_t exp = (u >> cDoubleExpShift) & cDoubleExpMask;
 691             return exp == cDoubleExpMask;
 692         }
 693
 694         inline bool is_denormal(float f)
 695         {
 696             const uint32 u = get_float_bits(f);
 697             const uint exp = (u >> cFloatExpShift) & cFloatExpMask;
 698             const uint mantissa = u & cFloatFractMask;
 699             return (exp == 0) && (mantissa != 0);
 700         }
 701
 702         inline bool is_nan_or_inf(float f)
 703         {
 704             uint32 u = get_float_bits(f);
 705             return ((u >> cFloatExpShift) & cFloatExpMask) == cFloatExpMask;
 706         }
 707
 708         inline bool is_float_signed(float f)
 709         {
 710             return (get_float_bits(f) & cFloatSignMask) != 0;
 711         }
 712
 713         inline bool is_double_signed(double d)
 714         {
 715             return (get_double_bits(d) & cDoubleSignMask) != 0;
 716         }
 717
 718         inline uint64_t combine_two_uint32s(uint32 l, uint32 h)
 719         {
 720             return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
 721         }
 722
 723         float gauss(int x, int y, float sigma_sqr);
 724
 725         enum
 726         {
 727             cComputeGaussianFlagNormalize = 1,
 728             cComputeGaussianFlagPrint = 2,
 729             cComputeGaussianFlagNormalizeCenterToOne = 4
 730         };
 731
 732         void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint flags);
 733     }
 734
 735 } // namespace vogl