1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 // File: vogl_rand.cpp
29 // http://www.ciphersbyritter.com/NEWS4/RANDC.HTM
30 // http://burtleburtle.net/bob/rand/smallprng.html
31 // http://www.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf
32 // See GPG7, page 120, or http://www.lomont.org/Math/Papers/2008/Lomont_PRNG_2008.pdf
33 // I've tested random::urand32() using the CISC test suite here: http://www.csis.hku.hk/cisc/download/idetect/
34 #include "vogl_core.h"
35 #include "vogl_rand.h"
36 #include "vogl_hash.h"
38 #include "vogl_bigint128.h"
40 #include "vogl_image_utils.h"
42 // TODO: Linux specific, needed by the seed_from_urand() method
44 #include <sys/syscall.h>
46 #define znew (z = 36969 * (z & 65535) + (z >> 16))
47 #define wnew (w = 18000 * (w & 65535) + (w >> 16))
48 #define MWC ((znew << 16) + wnew)
49 #define FIB ((b = a + b), (a = b - a))
50 #define KISS ((MWC ^ VOGL_CONG) + VOGL_SHR3)
51 #define LFIB4 (c++, t[c] = t[c] + t[UC(c + 58)] + t[UC(c + 119)] + t[UC(c + 178)])
52 #define SWB (c++, bro = (x < y), t[c] = (x = t[UC(c + 34)]) - (y = t[UC(c + 19)] + bro))
53 #define UNI (KISS * 2.328306e-10)
54 #define VNI ((long)KISS) * 4.656613e-10
55 #define UC (unsigned char) /*a cast operation*/
57 //#define ROTL(x,k) (((x)<<(k))|((x)>>(32-(k))))
58 #define ROTL(x, k) VOGL_ROTATE_LEFT(x, k)
63 fast_random g_fast_random;
64 thread_safe_random g_thread_safe_random;
66 static const double cNorm32 = 1.0 / (double)0x100000000ULL;
76 void kiss99::seed(uint32 i, uint32 j, uint32 k)
84 inline uint32 kiss99::next()
86 x = 69069 * x + 12345;
93 t += (698769069ULL * z);
94 c = static_cast<uint32>(t >> 32);
95 z = static_cast<uint32>(t);
100 inline uint32 ranctx::next()
102 uint32 e = a - ROTL(b, 27);
110 void ranctx::seed(uint32 seed)
112 a = 0xf1ea5eed, b = c = d = seed;
113 for (uint32 i = 0; i < 20; ++i)
122 void well512::seed(uint32 seed[well512::cStateSize])
124 memcpy(m_state, seed, sizeof(m_state));
128 void well512::seed(uint32 seed)
130 uint32 jsr = utils::swap32(seed) ^ 0xAAC29377;
132 for (uint i = 0; i < cStateSize; i++)
135 seed = bitmix32c(seed);
137 m_state[i] = seed ^ jsr;
142 void well512::seed(uint32 seed1, uint32 seed2, uint32 seed3)
145 uint32 jcong = seed3;
147 for (uint i = 0; i < cStateSize; i++)
150 seed1 = bitmix32c(seed1);
153 m_state[i] = seed1 ^ jsr ^ jcong;
158 void well512::seed(uint32 seed1, uint32 seed2, uint32 seed3, uint seed4)
161 uint32 jcong = seed3;
168 for (uint i = 4; i < cStateSize; i++)
173 m_state[i] = jsr ^ jcong ^ m_state[(i - 4) & (cStateSize - 1)];
179 inline uint32 well512::next()
182 a = m_state[m_index];
183 c = m_state[(m_index + 13) & 15];
184 b = a ^ c ^ (a << 16) ^ (c << 15);
185 c = m_state[(m_index + 9) & 15];
187 a = m_state[m_index] = b ^ c;
188 d = a ^ ((a << 5) & 0xDA442D20UL);
189 m_index = (m_index + 15) & 15;
190 a = m_state[m_index];
191 m_state[m_index] = a ^ b ^ d ^ (a << 2) ^ (b << 18) ^ (c << 28);
192 return m_state[m_index];
197 seed(12345, 65435, 34221);
200 random::random(uint32 i)
207 seed(12345, 65435, 34221);
210 void random::seed(uint32 i1, uint32 i2, uint32 i3)
212 m_ranctx.seed(i1 ^ i2 ^ i3);
214 m_kiss99.seed(i1, i2, i3);
216 m_well512.seed(i1, i2, i3);
218 m_pdes.seed((static_cast<uint64_t>(i1) | (static_cast<uint64_t>(i2) << 32U)) ^ (static_cast<uint64_t>(i3) << 10));
220 const uint warming_iters = 16;
221 for (uint i = 0; i < warming_iters; i++)
225 void random::seed(uint32 i1, uint32 i2, uint32 i3, uint i4)
227 m_ranctx.seed(i1 ^ i2 ^ i3);
229 m_kiss99.seed(i1, i2, i4);
231 m_well512.seed(i1, i2, i3, i4);
233 m_pdes.seed((static_cast<uint64_t>(i1) | (static_cast<uint64_t>(i4) << 32U)) ^ (static_cast<uint64_t>(i3) << 10));
235 const uint warming_iters = 16;
236 for (uint i = 0; i < warming_iters; i++)
240 void random::seed(const md5_hash &h)
242 seed(h[0], h[1], h[2], h[3]);
245 void random::seed(uint32 i)
247 md5_hash h(md5_hash_gen(&i, sizeof(i)).finalize());
248 seed(h[0], h[1], h[2], h[3]);
251 void random::seed_from_urandom()
254 uint32 buf[N] = { 0xABCDEF, 0x12345678, 0xFFFECABC, 0xABCDDEF0 };
255 FILE *fp = vogl_fopen("/dev/urandom", "rb");
258 size_t n = fread(buf, 1, sizeof(buf), fp);
263 // Hash thread, process ID's, etc. for good measure, or in case the call failed, or to make this easier to port. (we can just leave out the /dev/urandom read)
264 pid_t tid = (pid_t)syscall(SYS_gettid);
265 pid_t pid = getpid();
266 pid_t ppid = getppid();
268 buf[0] ^= static_cast<uint32>(time(NULL));
269 buf[1] ^= static_cast<uint32>(utils::RDTSC()) ^ static_cast<uint32>(ppid);
270 buf[2] ^= static_cast<uint32>(tid);
271 buf[3] ^= static_cast<uint32>(pid);
273 seed(buf[0], buf[1], buf[2], buf[3]);
276 uint32 random::urand32()
278 return m_kiss99.next() ^ m_ranctx.next() ^ m_well512.next() ^ m_pdes.next32();
281 uint64_t random::urand64()
283 uint64_t result = urand32();
289 uint32 random::fast_urand32()
291 return m_well512.next();
294 uint32 random::get_bit()
296 uint32 k = urand32();
297 return (k ^ (k >> 6) ^ (k >> 10) ^ (k >> 30)) & 1;
300 double random::drand(double l, double h)
306 double fract = (urand32() * (cNorm32 * cNorm32)) + (urand32() * cNorm32);
308 // clamp here shouldn't be necessary, but it ensures this function provably cannot return out of range results
309 return math::clamp(l + (h - l) * fract, l, h);
312 float random::frand(float l, float h)
318 float r = static_cast<float>(l + (h - l) * (urand32() * cNorm32));
320 // clamp here shouldn't be necessary, but it ensures this function provably cannot return out of range results
321 return math::clamp<float>(r, l, h);
324 static inline uint umul32_return_high(uint32 range, uint32 rnd)
326 #if defined(_M_IX86) && defined(_MSC_VER)
327 //uint32 rnd_range = static_cast<uint32>(__emulu(range, rnd) >> 32U);
329 *reinterpret_cast<uint64_t *>(x) = __emulu(range, rnd);
330 uint32 rnd_range = x[1];
332 uint32 rnd_range = static_cast<uint32>((range * static_cast<uint64_t>(rnd)) >> 32U);
338 int random::irand(int l, int h)
344 uint32 range = static_cast<uint32>(h - l);
346 uint32 rnd = urand32();
348 uint32 rnd_range = umul32_return_high(range, rnd);
350 int result = l + rnd_range;
351 VOGL_ASSERT((result >= l) && (result < h));
355 int random::irand_inclusive(int l, int h)
361 uint32 range = static_cast<uint32>(h - l);
363 uint32 rnd = urand32();
365 int result = static_cast<int>(rnd);
366 if (range != cUINT32_MAX)
369 uint32 rnd_range = umul32_return_high(range, rnd);
370 result = l + rnd_range;
373 VOGL_ASSERT((result >= l) && (result <= h));
377 uint random::urand(uint l, uint h)
383 uint32 range = h - l;
385 uint32 rnd = urand32();
387 uint32 rnd_range = umul32_return_high(range, rnd);
389 uint result = l + rnd_range;
390 VOGL_ASSERT((result >= l) && (result < h));
394 uint random::urand_inclusive(uint l, uint h)
400 uint32 range = h - l;
402 uint32 rnd = urand32();
405 if (range != cUINT32_MAX)
408 uint32 rnd_range = umul32_return_high(range, rnd);
409 result = l + rnd_range;
412 VOGL_ASSERT((result >= l) && (result <= h));
416 uint64_t random::urand64(uint64_t l, uint64_t h)
422 uint64_t range = h - l;
424 uint64_t rnd = urand64();
426 bigint128 mul_result;
427 bigint128::unsigned_multiply(range, rnd, mul_result);
429 uint64_t rnd_range = mul_result.get_qword(1);
431 uint64_t result = l + rnd_range;
432 VOGL_ASSERT((result >= l) && (result < h));
436 uint64_t random::urand64_inclusive(uint64_t l, uint64_t h)
442 uint64_t range = h - l;
444 uint64_t rnd = urand64();
446 uint64_t result = rnd;
447 if (range != cUINT64_MAX)
449 bigint128 mul_result;
450 bigint128::unsigned_multiply(range + 1, rnd, mul_result);
452 uint64_t rnd_range = mul_result.get_qword(1);
454 result = l + rnd_range;
457 VOGL_ASSERT((result >= l) && (result <= h));
462 ALGORITHM 712, COLLECTED ALGORITHMS FROM ACM.
463 THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE,
464 VOL. 18, NO. 4, DECEMBER, 1992, PP. 434-435.
465 The function returns a normally distributed pseudo-random number
466 with a given mean and standard devaiation. Calls are made to a
467 function subprogram which must return independent random
468 numbers uniform in the interval (0,1).
469 The algorithm uses the ratio of uniforms method of A.J. Kinderman
470 and J.F. Monahan augmented with quadratic bounding curves.
472 double random::gaussian(double mean, double stddev)
474 double q, u, v, x, y;
477 Generate P = (u,v) uniform in rect. enclosing acceptance region
478 Make sure that any random numbers <= 0 are rejected, since
479 gaussian() requires uniforms > 0, but RandomUniform() delivers >= 0.
485 if (u <= 0.0 || v <= 0.0)
490 v = 1.7156 * (v - 0.5);
492 /* Evaluate the quadratic form */
494 y = fabs(v) + 0.386595;
495 q = x * x + y * (0.19600 * y - 0.25472 * x);
497 /* Accept P if inside inner ellipse */
501 /* Reject P if outside outer ellipse, or outside acceptance region */
502 } while ((q > 0.27846) || (v * v > -4.0 * log(u) * u * u));
504 /* Return ratio of P's coordinates as the normal deviate */
505 return (mean + stddev * v / u);
508 thread_safe_random::thread_safe_random()
510 scoped_spinlock scoped_lock(m_spinlock);
514 thread_safe_random::thread_safe_random(uint32 i)
516 scoped_spinlock scoped_lock(m_spinlock);
520 void thread_safe_random::seed()
522 scoped_spinlock scoped_lock(m_spinlock);
526 void thread_safe_random::seed(uint32 i)
528 scoped_spinlock scoped_lock(m_spinlock);
532 void thread_safe_random::seed(uint32 i1, uint32 i2, uint32 i3)
534 scoped_spinlock scoped_lock(m_spinlock);
535 m_rm.seed(i1, i2, i3);
538 void thread_safe_random::seed(uint32 i1, uint32 i2, uint32 i3, uint32 i4)
540 scoped_spinlock scoped_lock(m_spinlock);
541 m_rm.seed(i1, i2, i3, i4);
544 void thread_safe_random::seed(const md5_hash &h)
546 scoped_spinlock scoped_lock(m_spinlock);
550 void thread_safe_random::seed_from_urandom()
552 scoped_spinlock scoped_lock(m_spinlock);
553 m_rm.seed_from_urandom();
556 uint8 thread_safe_random::urand8()
558 scoped_spinlock scoped_lock(m_spinlock);
559 return m_rm.urand8();
562 uint16 thread_safe_random::urand16()
564 scoped_spinlock scoped_lock(m_spinlock);
565 return m_rm.urand16();
568 uint32 thread_safe_random::urand32()
570 scoped_spinlock scoped_lock(m_spinlock);
571 return m_rm.urand32();
574 uint64_t thread_safe_random::urand64()
576 scoped_spinlock scoped_lock(m_spinlock);
577 return m_rm.urand64();
580 uint32 thread_safe_random::fast_urand32()
582 scoped_spinlock scoped_lock(m_spinlock);
583 return m_rm.fast_urand32();
586 uint32 thread_safe_random::get_bit()
588 scoped_spinlock scoped_lock(m_spinlock);
589 return m_rm.get_bit();
592 double thread_safe_random::drand(double l, double h)
594 scoped_spinlock scoped_lock(m_spinlock);
595 return m_rm.drand(l, h);
598 float thread_safe_random::frand(float l, float h)
600 scoped_spinlock scoped_lock(m_spinlock);
601 return m_rm.frand(l, h);
604 int thread_safe_random::irand(int l, int h)
606 scoped_spinlock scoped_lock(m_spinlock);
607 return m_rm.irand(l, h);
610 int thread_safe_random::irand_inclusive(int l, int h)
612 scoped_spinlock scoped_lock(m_spinlock);
613 return m_rm.irand_inclusive(l, h);
616 uint thread_safe_random::urand(uint l, uint h)
618 scoped_spinlock scoped_lock(m_spinlock);
619 return m_rm.urand(l, h);
622 uint thread_safe_random::urand_inclusive(uint l, uint h)
624 scoped_spinlock scoped_lock(m_spinlock);
625 return m_rm.urand_inclusive(l, h);
628 // Returns random 64-bit unsigned int between [l, h)
629 uint64_t thread_safe_random::urand64(uint64_t l, uint64_t h)
631 scoped_spinlock scoped_lock(m_spinlock);
632 return m_rm.urand64(l, h);
635 // Returns random 64-bit unsigned int between [l, h]
636 uint64_t thread_safe_random::urand64_inclusive(uint64_t l, uint64_t h)
638 scoped_spinlock scoped_lock(m_spinlock);
639 return m_rm.urand64_inclusive(l, h);
642 double thread_safe_random::gaussian(double mean, double stddev)
644 scoped_spinlock scoped_lock(m_spinlock);
645 return m_rm.gaussian(mean, stddev);
648 int fast_random::irand(int l, int h)
654 uint32 range = static_cast<uint32>(h - l);
656 uint32 rnd = urand32();
658 uint32 rnd_range = umul32_return_high(range, rnd);
660 int result = l + rnd_range;
661 VOGL_ASSERT((result >= l) && (result < h));
665 uint fast_random::urand(uint l, uint h)
671 uint32 range = h - l;
673 uint32 rnd = urand32();
675 uint32 rnd_range = umul32_return_high(range, rnd);
677 uint result = l + rnd_range;
678 VOGL_ASSERT((result >= l) && (result < h));
682 double fast_random::drand(double l, double h)
688 return math::clamp(l + (h - l) * (urand32() * cNorm32), l, h);
691 float fast_random::frand(float l, float h)
697 float r = static_cast<float>(l + (h - l) * (urand32() * cNorm32));
699 return math::clamp<float>(r, l, h);
702 // Got this from http://www.physics.buffalo.edu/phy410-505-2009/topic2/lec-2-1.pdf
703 template <typename T>
704 double chi_sqr_test(T gen)
706 int tries = 10000000;
707 const int BINS = 10000;
708 printf("Binning %i tries in %i bins\n", tries, BINS);
710 for (int b = 0; b < BINS; b++)
713 for (int i = 0; i < tries; i++)
716 VOGL_ASSERT(b < BINS);
722 double expect = tries / double(BINS);
723 for (int b = 0; b < BINS; b++)
725 double diff = bin[b] - expect;
726 chiSqr += diff * diff;
729 printf("chi-square: %f, chi-square/d.o.f. = %f\n", chiSqr, chiSqr / (BINS - 1));
730 return chiSqr / (BINS - 1);
733 struct fast_rand_test_obj
735 fast_rand_test_obj(fast_random &r)
738 printf("%s:\n", VOGL_METHOD_NAME);
740 int operator()(int limit)
742 return m_r.irand(0, limit);
749 rand_test_obj(random &r)
752 printf("%s:\n", VOGL_METHOD_NAME);
754 int operator()(int limit)
756 return m_r.irand(0, limit);
761 struct rand_test_obj2
763 rand_test_obj2(random &r)
766 printf("%s:\n", VOGL_METHOD_NAME);
768 int operator()(int limit)
771 for (uint i = 0; i < 32; i++)
772 b = (b << 1) | m_r.get_bit();
773 return (static_cast<double>(b) * limit) / (static_cast<double>(cUINT32_MAX) + 1.0f);
778 struct dbl_rand_test_obj
780 dbl_rand_test_obj(random &r)
783 printf("%s:\n", VOGL_METHOD_NAME);
785 int operator()(int limit)
787 return static_cast<int>(m_r.drand(0, 1.0f) * limit);
792 template <typename T>
793 double pi_estimate(const unsigned long points, T &func)
795 unsigned long hits = 0;
796 for (unsigned long i = 0; i < points; i++)
798 double x = func.drand(0.0f, 1.0f);
799 double y = func.drand(0.0f, 1.0f);
800 if ((x - 0.5) * (x - 0.5) + (y - 0.5) * (y - 0.5) < 0.25)
803 return 4.0f * double(hits) / double(points);
806 template <typename T>
808 const unsigned long trials,
809 const unsigned long points,
811 double &std_dev, T &func)
814 double squared_sum = 0;
815 for (unsigned long t = 0; t < trials; t++)
817 double pi = pi_estimate(points, func);
819 squared_sum += pi * pi;
821 average = sum / trials;
822 std_dev = math::maximum<double>(0.0f, squared_sum / trials - average * average);
823 std_dev = sqrt(std_dev / (trials - 1));
826 // Very basic tests, but better than nothing.
832 time_t seconds = time(NULL);
833 uint32 seed = static_cast<uint32>(seconds);
837 image_u8 eye_test_img(1024, 1024);
838 for (uint t = 0; t < 500; t++)
840 for (uint i = 0; i < 40000; i++)
845 x = static_cast<int>(rm.frand(0, 1.0f) * eye_test_img.get_width());
846 y = static_cast<int>(rm.frand(0, 1.0f) * eye_test_img.get_height());
850 x = rm.irand(0, eye_test_img.get_width());
851 y = rm.irand(0, eye_test_img.get_height());
853 uint l = rm.irand(1, 16);
854 color_quad_u8 c(eye_test_img(x, y));
855 int k = math::minimum<int>(c[0] + l, 255);
857 eye_test_img(x, y) = c;
860 dynamic_string filename(cVarArg, "rand_eye_test_%02i.png", t);
861 image_utils::write_to_file(filename.get_ptr(), eye_test_img, image_utils::cWriteFlagIgnoreAlpha);
862 printf("Wrote %s\n", filename.get_ptr());
865 chi_sqr_test(fast_rand_test_obj(frm));
866 chi_sqr_test(fast_rand_test_obj(frm));
868 chi_sqr_test(rand_test_obj(rm));
869 chi_sqr_test(rand_test_obj(rm));
870 chi_sqr_test(rand_test_obj2(rm));
872 chi_sqr_test(dbl_rand_test_obj(rm));
873 chi_sqr_test(dbl_rand_test_obj(rm));
875 for (uint i = 0; i < 5; i++)
878 measure_pi(10000, 10000, avg, std_dev, rm);
879 printf("pi avg: %1.15f, std dev: %1.15f\n", avg, std_dev);
881 // check for obviously bogus results
882 if ((fabs(avg) - 3.14f) > .01f)
885 if (fabs(std_dev) < .0001f)
891 const uint N = 1000000000;
892 printf("Running cycle test, %u tries:\n", N);
893 for (uint i = 0; i < N; i++)
896 if (memcmp(&cur_rm, &orig_rm, sizeof(cur_rm)) == 0)
898 printf("Cycle detected!\n");
901 if ((i & 0xFFFFFF) == 0xFFFFFF)
902 printf("%3.2f%%\n", (double)i / N * 100.0f);