From 8efecc5d6b8d4dcd6a7bdf9540a11355b4631782 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Sat, 29 Aug 2020 22:46:05 +0200 Subject: Updated crandom.h API! update to benchmark.c . Optimized cmap iter. --- examples/benchmark.c | 64 +++++++++++++++---- examples/birthday.c | 10 +-- examples/heap.c | 8 +-- examples/list.c | 6 +- examples/priority.c | 8 +-- examples/rngtest.c | 32 +++++----- stc/cdefs.h | 2 +- stc/cmap.h | 27 ++++---- stc/crandom.h | 170 ++++++++++++++++++++++++++++----------------------- 9 files changed, 190 insertions(+), 137 deletions(-) diff --git a/examples/benchmark.c b/examples/benchmark.c index 0b9c4f10..d4e1e82c 100644 --- a/examples/benchmark.c +++ b/examples/benchmark.c @@ -29,9 +29,9 @@ KHASH_MAP_INIT_INT64(ii, int64_t) size_t seed; static const float max_load_factor = 0.77f; -crandom_eng64_t rng; -#define SEED(s) rng = crandom_eng64_init(seed) -#define RAND(N) (crandom_i64(&rng) & ((1 << N) - 1)) +crand_rng64_t rng; +#define SEED(s) rng = crand_rng64_init(seed) +#define RAND(N) (crand_i64(&rng) & ((1 << N) - 1)) #define CMAP_SETUP(tt, Key, Value) cmap_##tt map = cmap_init \ @@ -41,6 +41,8 @@ crandom_eng64_t rng; #define CMAP_EMPLACE(tt, key, val) cmap_try_emplace(tt, &map, key, val) #define CMAP_ERASE(tt, key) cmap_##tt##_erase(&map, key) #define CMAP_FIND(tt, key) (cmap_##tt##_find(map, key) != NULL) +#define CMAP_FOR(tt, i) c_foreach (i, cmap_##tt, map) +#define CMAP_ITEM(tt, i) i.item->value #define CMAP_SIZE(tt) cmap_size(map) #define CMAP_BUCKETS(tt) cmap_bucket_count(map) #define CMAP_CLEAR(tt) cmap_##tt##_clear(&map) @@ -62,6 +64,8 @@ crandom_eng64_t rng; #define UMAP_EMPLACE(tt, key, val) map.emplace(key, val) #define UMAP_FIND(tt, key) (map.find(key) != map.end()) #define UMAP_ERASE(tt, key) map.erase(key) +#define UMAP_FOR(tt, i) for (auto i: map) +#define UMAP_ITEM(tt, i) i.second #define UMAP_SIZE(tt) map.size() #define UMAP_BUCKETS(tt) map.bucket_count() #define UMAP_CLEAR(tt) map.clear() @@ -73,6 +77,8 @@ crandom_eng64_t rng; #define BMAP_EMPLACE(tt, key, val) UMAP_EMPLACE(tt, key, val) #define BMAP_FIND(tt, key) UMAP_FIND(tt, key) #define BMAP_ERASE(tt, key) UMAP_ERASE(tt, key) +#define BMAP_FOR(tt, i) UMAP_FOR(tt, i) +#define BMAP_ITEM(tt, i) UMAP_ITEM(tt, i) #define BMAP_SIZE(tt) UMAP_SIZE(tt) #define BMAP_BUCKETS(tt) UMAP_BUCKETS(tt) #define BMAP_CLEAR(tt) UMAP_CLEAR(tt) @@ -84,6 +90,8 @@ crandom_eng64_t rng; #define FMAP_EMPLACE(tt, key, val) UMAP_EMPLACE(tt, key, val) #define FMAP_FIND(tt, key) UMAP_FIND(tt, key) #define FMAP_ERASE(tt, key) UMAP_ERASE(tt, key) +#define FMAP_FOR(tt, i) UMAP_FOR(tt, i) +#define FMAP_ITEM(tt, i) UMAP_ITEM(tt, i) #define FMAP_SIZE(tt) UMAP_SIZE(tt) #define FMAP_BUCKETS(tt) UMAP_BUCKETS(tt) #define FMAP_CLEAR(tt) UMAP_CLEAR(tt) @@ -95,6 +103,8 @@ crandom_eng64_t rng; #define HMAP_EMPLACE(tt, key, val) UMAP_EMPLACE(tt, key, val) #define HMAP_FIND(tt, key) UMAP_FIND(tt, key) #define HMAP_ERASE(tt, key) UMAP_ERASE(tt, key) +#define HMAP_FOR(tt, i) UMAP_FOR(tt, i) +#define HMAP_ITEM(tt, i) UMAP_ITEM(tt, i) #define HMAP_SIZE(tt) UMAP_SIZE(tt) #define HMAP_BUCKETS(tt) UMAP_BUCKETS(tt) #define HMAP_CLEAR(tt) UMAP_CLEAR(tt) @@ -106,6 +116,8 @@ crandom_eng64_t rng; #define RMAP_EMPLACE(tt, key, val) UMAP_EMPLACE(tt, key, val) #define RMAP_FIND(tt, key) UMAP_FIND(tt, key) #define RMAP_ERASE(tt, key) UMAP_ERASE(tt, key) +#define RMAP_FOR(tt, i) UMAP_FOR(tt, i) +#define RMAP_ITEM(tt, i) UMAP_ITEM(tt, i) #define RMAP_SIZE(tt) UMAP_SIZE(tt) #define RMAP_BUCKETS(tt) map.mask() #define RMAP_CLEAR(tt) UMAP_CLEAR(tt) @@ -117,15 +129,21 @@ crandom_eng64_t rng; #define SMAP_EMPLACE(tt, key, val) UMAP_EMPLACE(tt, key, val) #define SMAP_FIND(tt, key) UMAP_FIND(tt, key) #define SMAP_ERASE(tt, key) UMAP_ERASE(tt, key) +#define SMAP_FOR(tt, i) UMAP_FOR(tt, i) +#define SMAP_ITEM(tt, i) UMAP_ITEM(tt, i) #define SMAP_SIZE(tt) UMAP_SIZE(tt) #define SMAP_BUCKETS(tt) UMAP_BUCKETS(tt) #define SMAP_CLEAR(tt) UMAP_CLEAR(tt) #define SMAP_DTOR(tt) UMAP_DTOR(tt) -const size_t N1 = 10000000 * 5; -const size_t N2 = 10000000 * 5; -const size_t N3 = 10000000 * 5; -#define RR 20 +enum { + FAC = 5, + N1 = 10000000 * FAC, + N2 = 10000000 * FAC, + N3 = 10000000 * FAC, + N4 = 10000000 * FAC, + RR = 24 +}; int rr = RR; @@ -177,11 +195,28 @@ int rr = RR; M##_CLEAR(tt); \ } +#define MAP_TEST4(M, tt) \ +{ \ + M##_SETUP(tt, int64_t, int64_t); \ + size_t sum = 0; \ + SEED(seed); \ + for (size_t i = 0; i < N4; ++i) \ + M##_PUT(tt, RAND(rr), i); \ + clock_t difference, before = clock(); \ + for (int k=0; k<5; k++) M##_FOR (tt, i) \ + sum += M##_ITEM(tt, i); \ + difference = clock() - before; \ + printf(#M ": size: %zu, buckets: %8zu, time: %5.02f, sum %zu\n", \ + (size_t) M##_SIZE(tt), (size_t) M##_BUCKETS(tt), (float) difference / CLOCKS_PER_SEC, sum); \ + M##_CLEAR(tt); \ +} + + #ifndef __cplusplus -#define RUN_TEST(n) MAP_TEST##n(CMAP, ii) MAP_TEST##n(KMAP, ii) +#define RUN_TEST(n) MAP_TEST##n(CMAP, ii) /*MAP_TEST##n(KMAP, ii)*/ #else -#define RUN_TEST(n) MAP_TEST##n(CMAP, ii) MAP_TEST##n(KMAP, ii) MAP_TEST##n(UMAP, ii) MAP_TEST##n(SMAP, ii) \ - MAP_TEST##n(BMAP, ii) MAP_TEST##n(FMAP, ii) MAP_TEST##n(RMAP, ii) MAP_TEST##n(HMAP, ii) +#define RUN_TEST(n) MAP_TEST##n(CMAP, ii) /*MAP_TEST##n(KMAP, ii)*/ MAP_TEST##n(UMAP, ii) MAP_TEST##n(SMAP, ii) \ + MAP_TEST##n(BMAP, ii) MAP_TEST##n(FMAP, ii) /*MAP_TEST##n(RMAP, ii)*/ MAP_TEST##n(HMAP, ii) #endif int main(int argc, char* argv[]) @@ -189,12 +224,15 @@ int main(int argc, char* argv[]) rr = argc == 2 ? atoi(argv[1]) : RR; seed = time(NULL); printf("\nRandom keys are in range [0, 2^%d), seed = %zu:\n", rr, seed); - printf("\nUnordered maps: %zu repeats of Insert random key + try to remove a random key:\n", N1); + printf("\nUnordered maps: %d repeats of Insert random key + try to remove a random key:\n", N1); RUN_TEST(1) - printf("\nUnordered maps: Insert %zu index keys, then remove them in same order:\n", N2); + printf("\nUnordered maps: Insert %d index keys, then remove them in same order:\n", N2); RUN_TEST(2) - printf("\nUnordered maps: Insert %zu random keys, then remove them in same order:\n", N3); + printf("\nUnordered maps: Insert %d random keys, then remove them in same order:\n", N3); RUN_TEST(3) + + printf("\nUnordered maps: Iterate %d random keys, then remove them in same order:\n", N4); + RUN_TEST(4) } diff --git a/examples/birthday.c b/examples/birthday.c index 22ecdfae..4f6e7a8b 100644 --- a/examples/birthday.c +++ b/examples/birthday.c @@ -15,12 +15,12 @@ const static uint64_t mask = (1ull << 52) - 1; void repeats(void) { - crandom_eng64_t rng = crandom_eng64_init(seed); + crand_rng64_t rng = crand_rng64_init(seed); cmap_ic m = cmap_init; cmap_ic_reserve(&m, N); clock_t now = clock(); for (size_t i = 0; i < N; ++i) { - uint64_t k = crandom_i64(&rng) & mask; + uint64_t k = crand_i64(&rng) & mask; int v = ++cmap_ic_insert(&m, k, 0)->value; if (v > 1) printf("%zu: %llx - %d\n", i, k, v); } @@ -34,13 +34,13 @@ declare_cvec(x, uint64_t); void distribution(void) { - crandom_eng32_t rng = crandom_eng32_init(seed); // time(NULL), time(NULL)); + crand_rng32_t rng = crand_rng32_init(seed); // time(NULL), time(NULL)); const size_t N = 1ull << 28, M = 1ull << 9; // 1ull << 10; cmap_x map = cmap_x_with_capacity(M); clock_t now = clock(); - crandom_distrib_i32_t dist = crandom_uniform_i32_init(0, M); + crand_uniform_i32_t dist = crand_uniform_i32_init(rng, 0, M); for (size_t i = 0; i < N; ++i) { - ++cmap_x_insert(&map, crandom_uniform_i32(&rng, dist), 0)->value; + ++cmap_x_insert(&map, crand_uniform_i32(&dist), 0)->value; } float diff = (float) (clock() - now) / CLOCKS_PER_SEC; diff --git a/examples/heap.c b/examples/heap.c index e966451b..c6624819 100644 --- a/examples/heap.c +++ b/examples/heap.c @@ -9,12 +9,12 @@ declare_cvec_pqueue(f, >); int main() { uint32_t seed = time(NULL); - crandom_eng32_t pcg = crandom_eng32_init(seed); + crand_rng32_t pcg = crand_rng32_init(seed); int N = 30000000, M = 100; cvec_f vec = cvec_init; clock_t start = clock(); for (int i=0; ivalue); else break; diff --git a/examples/priority.c b/examples/priority.c index 2d5c23bd..598f448a 100644 --- a/examples/priority.c +++ b/examples/priority.c @@ -9,19 +9,19 @@ declare_cvec(i, int64_t); declare_cvec_pqueue(i, >); // min-heap (increasing values) int main() { - crandom_eng32_t pcg = crandom_eng32_init(time(NULL)); - crandom_distrib_i32_t dist = crandom_uniform_i32_init(0, 100000000); + crand_rng32_t pcg = crand_rng32_init(time(NULL)); + crand_uniform_i32_t dist = crand_uniform_i32_init(pcg, 0, 100000000); cvec_i heap = cvec_init; // Push ten million random numbers to priority queue for (int i=0; i<10000000; ++i) - cvec_i_pqueue_push(&heap, crandom_uniform_i32(&pcg, dist)); + cvec_i_pqueue_push(&heap, crand_uniform_i32(&dist)); // push some negative numbers too. c_push(&heap, cvec_i_pqueue, c_items(-231, -32, -873, -4, -343)); for (int i=0; i<10000000; ++i) - cvec_i_pqueue_push(&heap, crandom_uniform_i32(&pcg, dist)); + cvec_i_pqueue_push(&heap, crand_uniform_i32(&dist)); // Extract the hundred smallest. diff --git a/examples/rngtest.c b/examples/rngtest.c index 657e002d..6aa783e2 100644 --- a/examples/rngtest.c +++ b/examples/rngtest.c @@ -13,22 +13,22 @@ int main(void) clock_t difference, before; uint64_t v; - crandom_eng64_t sfc = crandom_eng64_init(time(NULL)); - crandom_distrib_i32_t i32dist = crandom_uniform_i32_init(10, 20); - crandom_distrib_f32_t f32dist = crandom_uniform_f32_init(10, 20); + crand_rng64_t stc = crand_rng64_init(time(NULL)); + crand_uniform_i64_t idist = crand_uniform_i64_init(stc, 10, 20); + crand_uniform_f64_t fdist = crand_uniform_f64_init(stc, 10, 20); - crandom_distrib_i64_t idist = crandom_uniform_i64_init(10, 20); - crandom_distrib_f64_t fdist = crandom_uniform_f64_init(10, 20); - - for (int i=0; i<30; ++i) printf("%02zd ", crandom_uniform_i64(&sfc, idist)); + for (int i=0; i<30; ++i) printf("%02zd ", crand_uniform_i64(&idist)); puts(""); - crandom_eng32_t pcg = crandom_eng32_init(time(NULL)); + crand_rng32_t pcg = crand_rng32_init(time(NULL)); + crand_uniform_i32_t i32dist = crand_uniform_i32_init(pcg, 10, 20); + crand_uniform_f32_t f32dist = crand_uniform_f32_init(pcg, 10, 20); + before = clock(); \ v = 0; for (size_t i=0; itable, map->table + map->bucket_count, map->_hashx}; \ + if (it._hx) while (*it._hx == 0) ++it.item, ++it._hx; \ + return it; \ +} \ +STC_FORCE_INLINE void \ +ctype##_##tag##_next(ctype##_##tag##_iter_t* it) { \ + while ((++it->item, *++it->_hx == 0)) ; \ +} \ \ STC_API uint32_t c_default_hash16(const void *data, size_t len); \ STC_API uint32_t c_default_hash32(const void* data, size_t len); \ @@ -386,18 +393,6 @@ ctype##_##tag##_erase(ctype##_##tag* self, ctype##_##tag##_rawkey_t rawKey) { \ uint32_t hx; \ size_t i = ctype##_##tag##_bucket(self, &rawKey, &hx); \ return ctype##_##tag##_erase_entry(self, self->table + i); \ -} \ - \ -STC_API ctype##_##tag##_iter_t \ -ctype##_##tag##_begin(ctype##_##tag* map) { \ - ctype##_##tag##_iter_t it = {map->table, map->table + map->bucket_count, map->_hashx}; \ - if (it._hx) while (*it._hx == 0) ++it.item, ++it._hx; \ - return it; \ -} \ - \ -STC_API void \ -ctype##_##tag##_next(ctype##_##tag##_iter_t* it) { \ - while ((++it->item, *++it->_hx == 0)) ; \ } /* https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ */ diff --git a/stc/crandom.h b/stc/crandom.h index f4b609b7..5821b4e7 100644 --- a/stc/crandom.h +++ b/stc/crandom.h @@ -28,131 +28,138 @@ #include #include /* - crandom_eng32_t eng = crandom_eng32_init(seed); - crandom_distrib_f32_t fdist = crandom_uniform_f32_init(1.0f, 6.0f); - crandom_distrib_i32_t idist = crandom_uniform_i32_init(1, 6); + crand_rng32_t rng = crand_rng32_init(seed); + crand_uniform_f32_t fdist = crand_uniform_f32_init(rng, 1.0f, 6.0f); + crand_uniform_i32_t idist = crand_uniform_i32_init(rng, 1, 6); - uint32_t i = crandom_i32(&eng); - int j = crandom_uniform_i32(&eng, idist); - float r = crandom_uniform_f32(&eng, fdist); + uint32_t i = crand_i32(&rng); + int j = crand_uniform_i32(&idist); + float r = crand_uniform_f32(&fdist); */ -typedef struct {uint64_t state[2];} crandom_eng32_t; -typedef struct {int32_t offset, range;} crandom_distrib_i32_t; -typedef struct {float offset, range;} crandom_distrib_f32_t; +/* 32-BIT RANDOM NUMBER GENERATOR */ -/* 32 bit random number generator engine */ -STC_API crandom_eng32_t crandom_eng32_with_seq(uint64_t seed, uint64_t seq); -STC_INLINE crandom_eng32_t crandom_eng32_init(uint64_t seed) { - return crandom_eng32_with_seq(seed, seed); +typedef struct {uint64_t state[2];} crand_rng32_t; +typedef struct {crand_rng32_t rng; int32_t offset; uint32_t range;} crand_uniform_i32_t; +typedef struct {crand_rng32_t rng; float offset, range;} crand_uniform_f32_t; + +/* engine initializers */ +STC_API crand_rng32_t crand_rng32_with_seq(uint64_t seed, uint64_t seq); +STC_INLINE crand_rng32_t crand_rng32_init(uint64_t seed) { + return crand_rng32_with_seq(seed, seed); } /* int random number generator, range [0, 2^32) */ -STC_API uint32_t crandom_i32(crandom_eng32_t* rng); +STC_API uint32_t crand_i32(crand_rng32_t* rng); -STC_INLINE float crandom_f32(crandom_eng32_t* rng) { - union {uint32_t i; float f;} u = {0x3F800000u | (crandom_i32(rng) >> 9)}; +STC_INLINE float crand_f32(crand_rng32_t* rng) { + union {uint32_t i; float f;} u = {0x3F800000u | (crand_i32(rng) >> 9)}; return u.f - 1.0f; } /* int random number generator in range [low, high] */ -STC_INLINE crandom_distrib_i32_t crandom_uniform_i32_init(int32_t low, int32_t high) { - crandom_distrib_i32_t dist = {low, high - low + 1}; return dist; +STC_INLINE crand_uniform_i32_t crand_uniform_i32_init(crand_rng32_t rng, int32_t low, int32_t high) { + crand_uniform_i32_t dist = {rng, low, high - low + 1}; return dist; } -STC_INLINE int32_t crandom_uniform_i32(crandom_eng32_t* rng, crandom_distrib_i32_t dist) { - return dist.offset + (int32_t) (((uint64_t) crandom_i32(rng) * dist.range) >> 32); +STC_INLINE int32_t crand_uniform_i32(crand_uniform_i32_t* dist) { + return dist->offset + (int32_t) (((uint64_t) crand_i32(&dist->rng) * dist->range) >> 32); } +/* https://github.com/lemire/fastrange */ +STC_API uint32_t crand_unbiased_i32(crand_uniform_i32_t* dist); /* float random number in range [low, high). Note: 23 bit resolution. */ -STC_INLINE crandom_distrib_f32_t crandom_uniform_f32_init(float low, float high) { - crandom_distrib_f32_t dist = {low, high - low}; return dist; +STC_INLINE crand_uniform_f32_t crand_uniform_f32_init(crand_rng32_t rng, float low, float high) { + crand_uniform_f32_t dist = {rng, low, high - low}; return dist; } -STC_INLINE float crandom_uniform_f32(crandom_eng32_t* rng, crandom_distrib_f32_t dist) { - return dist.offset + crandom_f32(rng) * dist.range; +STC_INLINE float crand_uniform_f32(crand_uniform_f32_t* dist) { + return dist->offset + crand_f32(&dist->rng) * dist->range; } -typedef struct {uint64_t state[4];} crandom_eng64_t; -typedef struct {int64_t offset, range;} crandom_distrib_i64_t; -typedef struct {double offset, range;} crandom_distrib_f64_t; +/* 64 BIT RANDOM NUMBER GENERATOR */ + +typedef struct {uint64_t state[4];} crand_rng64_t; +typedef struct {crand_rng64_t rng; int64_t offset; uint64_t range;} crand_uniform_i64_t; +typedef struct {crand_rng64_t rng; double offset, range;} crand_uniform_f64_t; +typedef struct {crand_rng64_t rng; double mean, stddev, next; bool has_next;} crand_normal_f64_t; + -/* 64 bit random number generator engine */ -STC_API crandom_eng64_t crandom_eng64_with_seq(uint64_t seed, uint64_t seq); -STC_INLINE crandom_eng64_t crandom_eng64_init(uint64_t seed) { - return crandom_eng64_with_seq(seed, 1); +/* engine initializers */ +STC_API crand_rng64_t crand_rng64_with_seq(uint64_t seed, uint64_t seq); +STC_INLINE crand_rng64_t crand_rng64_init(uint64_t seed) { + return crand_rng64_with_seq(seed, 1); } -/* int random number generator, range [0, 2^64) */ -STC_API uint64_t crandom_i64(crandom_eng64_t* rng); +/* int random number generator, range [0, 2^64). PRNG copyright Tyge Løvset, NORCE Research, 2020 */ +STC_API uint64_t crand_i64(crand_rng64_t* rng); /* double random number in range [low, high). 52 bit resolution. */ -STC_INLINE double crandom_f64(crandom_eng64_t* rng) { - union {uint64_t i; double f;} u = {0x3FF0000000000000ull | (crandom_i64(rng) >> 12)}; +STC_INLINE double crand_f64(crand_rng64_t* rng) { + union {uint64_t i; double f;} u = {0x3FF0000000000000ull | (crand_i64(rng) >> 12)}; return u.f - 1.0; } /* int random number generator in range [low, high] */ -STC_INLINE crandom_distrib_i64_t crandom_uniform_i64_init(int64_t low, int64_t high) { - crandom_distrib_i64_t dist = {low, high - low + 1}; return dist; +STC_INLINE crand_uniform_i64_t crand_uniform_i64_init(crand_rng64_t rng, int64_t low, int64_t high) { + crand_uniform_i64_t dist = {rng, low, high - low + 1}; return dist; } #if defined(_MSC_VER) && defined(_WIN64) #include #endif -STC_INLINE int64_t crandom_uniform_i64(crandom_eng64_t* rng, crandom_distrib_i64_t dist) { +STC_INLINE int64_t crand_uniform_i64(crand_uniform_i64_t* dist) { #if defined(__SIZEOF_INT128__) - return dist.offset + (int64_t) (((__uint128_t) crandom_i64(rng) * dist.range) >> 64); + return dist->offset + (int64_t) (((__uint128_t) crand_i64(&dist->rng) * dist->range) >> 64); #elif defined(_MSC_VER) && defined(_WIN64) - uint64_t hi; _umul128(crandom_i64(rng), dist.range, &hi); return dist.offset + hi; + uint64_t hi; _umul128(crand_i64(&dist->rng), dist->range, &hi); return dist->offset + hi; #else - return dist.offset + crandom_i64(rng) % dist.range; // slower + return dist->offset + crand_i64(&dist->rng) % dist->range; // slower #endif } -STC_INLINE crandom_distrib_f64_t crandom_uniform_f64_init(double low, double high) { - crandom_distrib_f64_t dist = {low, high - low}; return dist; +STC_INLINE crand_uniform_f64_t crand_uniform_f64_init(crand_rng64_t rng, double low, double high) { + crand_uniform_f64_t dist = {rng, low, high - low}; return dist; } -STC_INLINE double crandom_uniform_f64(crandom_eng64_t* rng, crandom_distrib_f64_t dist) { - return dist.offset + crandom_f64(rng) * dist.range; +STC_INLINE double crand_uniform_f64(crand_uniform_f64_t* dist) { + return dist->offset + crand_f64(&dist->rng) * dist->range; } -STC_INLINE crandom_distrib_f64_t crandom_normal_f64_init(double mean, double std_dev) { - crandom_distrib_f64_t dist = {mean, std_dev}; return dist; +STC_INLINE crand_normal_f64_t crand_normal_f64_init(crand_rng64_t rng, double mean, double stddev) { + crand_normal_f64_t dist = {rng, mean, stddev, 0.0, false}; return dist; } -STC_API double crandom_normal_f64(crandom_eng64_t* rng, crandom_distrib_f64_t dist); +STC_API double crand_normal_f64(crand_normal_f64_t* dist); #if !defined(STC_HEADER) || defined(STC_IMPLEMENTATION) -/* PCG32 random number generator: https://www.pcg-random.org/download.html */ - -STC_API crandom_eng32_t crandom_eng32_with_seq(uint64_t seed, uint64_t seq) { - crandom_eng32_t rng = {0u, (seq << 1u) | 1u}; /* inc must be odd */ - crandom_i32(&rng); +STC_API crand_rng32_t crand_rng32_with_seq(uint64_t seed, uint64_t seq) { + crand_rng32_t rng = {0u, (seq << 1u) | 1u}; /* inc must be odd */ + crand_i32(&rng); rng.state[0] += seed; - crandom_i32(&rng); + crand_i32(&rng); return rng; } -STC_API uint32_t crandom_i32(crandom_eng32_t* rng) { +/* PCG32 https://www.pcg-random.org/download.html */ +STC_API uint32_t crand_i32(crand_rng32_t* rng) { uint64_t old = rng->state[0]; rng->state[0] = old * 6364136223846793005ull + rng->state[1]; - uint32_t xors = ((old >> 18u) ^ old) >> 27u; + uint32_t xors = (uint32_t) (((old >> 18u) ^ old) >> 27u); uint32_t rot = old >> 59u; return (xors >> rot) | (xors << ((-rot) & 31)); } -STC_API crandom_eng64_t crandom_eng64_with_seq(uint64_t seed, uint64_t seq) { - crandom_eng64_t rng = {seed, seed, seed, (seq << 1u) | 1u}; /* increment must be odd */ - for (int i = 0; i < 12; ++i) crandom_i64(&rng); - return rng; -} - /* PRNG copyright Tyge Løvset, NORCE Research, 2020 */ /* Extremely fast PRNG suited for parallel usage with Weyl-sequence parameter. */ -/* Updates only 192bit state. Parallel: Ensures unique sequence per seq (2^63) */ -/* Minimum period is 2^64 per seq, but high average per Weyl seq. */ -STC_API uint64_t crandom_i64(crandom_eng64_t* rng) { +/* Even faster than sfc64: updates only 192bit state. Better for parallel processing: */ +/* Guarantees 2^63 unique threads with minimum 2^64 period length ~ 2^160 average period. */ +/* Tested with PractRand to 8 TB output: no issues */ +STC_API crand_rng64_t crand_rng64_with_seq(uint64_t seed, uint64_t seq) { + crand_rng64_t rng = {seed, seed, seed, (seq << 1u) | 1u}; /* increment must be odd */ + for (int i = 0; i < 12; ++i) crand_i64(&rng); + return rng; +} +STC_API uint64_t crand_i64(crand_rng64_t* rng) { enum {LROT = 24, RSHIFT = 11, LSHIFT = 3}; uint64_t *s = rng->state; const uint64_t b = s[1], result = s[0] ^ (s[2] += s[3]|1); @@ -161,21 +168,34 @@ STC_API uint64_t crandom_i64(crandom_eng64_t* rng) { return result; } -STC_API double crandom_normal_f64(crandom_eng64_t* rng, crandom_distrib_f64_t dist) { - static bool spare = false; /* Marsaglia polar method: */ - static double u2; double u1, s, m; - if (spare) { - spare = false; - return u2 * dist.range + dist.offset; +/* Unbiased uniform https://github.com/lemire/fastrange */ +STC_API uint32_t crand_unbiased_i32(crand_uniform_i32_t* dist) { + uint32_t r = dist->range; + uint64_t m = (uint64_t) crand_i32(&dist->rng) * r; + uint32_t l = (uint32_t) m; + if (l < r) { + uint32_t t = -r; + if (t >= r) if ((t -= r) >= r) t %= r; + while (l < t) l = (uint32_t) (m = (uint64_t) crand_i32(&dist->rng) * r); + } + return dist->offset + (m >> 32); +} + +/* Marsaglia polar method for gaussian distribution. */ +STC_API double crand_normal_f64(crand_normal_f64_t* dist) { + double u1, u2, s, m; + if (dist->has_next) { + dist->has_next = false; + return dist->next * dist->stddev + dist->mean; } do { - u1 = 2.0 * crandom_f64(rng) - 1.0; - u2 = 2.0 * crandom_f64(rng) - 1.0; + u1 = 2.0 * crand_f64(&dist->rng) - 1.0; + u2 = 2.0 * crand_f64(&dist->rng) - 1.0; s = u1*u1 + u2*u2; } while (s >= 1.0 || s == 0.0); m = sqrt(-2.0 * log(s) / s); - u2 *= m; spare = true; - return (u1 * m) * dist.range + dist.offset; + dist->next = u2 * m, dist->has_next = true; + return (u1 * m) * dist->stddev + dist->mean; } #endif -- cgit v1.2.3