From 59d74d181e44dd05a8570b42fc6284745e225664 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Tue, 28 Mar 2023 19:29:05 +0200 Subject: Example changes. Added crand.h possible replacement for crandom.h --- include/stc/crand.h | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 include/stc/crand.h (limited to 'include/stc/crand.h') diff --git a/include/stc/crand.h b/include/stc/crand.h new file mode 100644 index 00000000..4ebc402d --- /dev/null +++ b/include/stc/crand.h @@ -0,0 +1,194 @@ +/* MIT License + * + * Copyright (c) 2023 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "ccommon.h" + +#ifndef CRAND_H_INCLUDED +#define CRAND_H_INCLUDED +/* +// crand: Pseudo-random number generator +#include "stc/crand.h" + +int main() { + uint64_t seed = 123456789; + crand_t rng = crand_from(seed); + crand_unif_t dist1 = crand_unif_from(1, 6); + crandf_unif_t dist2 = crandf_unif_from(1.0, 10.0); + crandf_norm_t dist3 = crandf_norm_from(1.0, 10.0); + + uint64_t i = crand_r(&rng); + int64_t iu = crand_unif(&rng, &dist1); + double xu = crandf_unif(&rng, &dist2); + double xn = crandf_norm(&rng, &dist3); +} +*/ +#include +#include + +typedef struct crand { uint64_t state[5]; } crand_t; +typedef struct crand_unif { int64_t lower; uint64_t range, threshold; } crand_unif_t; +typedef struct crandf_unif { double lower, range; } crandf_unif_t; +typedef struct crandf_norm { double mean, stddev, next; int has_next; } crandf_norm_t; + +/* PRNG crand_t. + * Very fast PRNG suited for parallel usage with Weyl-sequence parameter. + * 320-bit state, 256 bit is mutable. + * Noticable faster than xoshiro and pcg, slighly slower than wyrand64 and + * Romu, but these have restricted capacity for larger parallel jobs or unknown minimum periods. + * crand_t supports 2^63 unique threads with a minimum 2^64 period lengths each. + * Passes all statistical tests, e.g PractRand and correlation tests, i.e. interleaved + * streams with one-bit diff state. Even the 16-bit version (LR=6, RS=5, LS=3) passes + * PractRand to multiple TB input. + */ + +/* Global crand_t PRNGs */ +STC_API void csrand(uint64_t seed); +STC_API uint64_t crand(void); +STC_API double crandf(void); + +/* Init crand_t prng with a seed */ +STC_API crand_t crand_from(uint64_t seed); + +/* Unbiased bounded uniform distribution. range [low, high] */ +STC_API crand_unif_t crand_unif_from(int64_t low, int64_t high); +STC_API int64_t crand_unif(crand_t* rng, crand_unif_t* dist); + +/* Normal distribution PRNG */ +STC_API double crandf_norm(crand_t* rng, crandf_norm_t* dist); + + +/* Main crand_t prng */ +STC_INLINE uint64_t crand_r(crand_t* rng) { + enum {LR=24, RS=11, LS=3}; uint64_t *s = rng->state; + const uint64_t out = (s[0] ^ (s[3] += s[4])) + s[1]; + s[0] = s[1] ^ (s[1] >> RS); + s[1] = s[2] + (s[2] << LS); + s[2] = ((s[2] << LR) | (s[2] >> (64 - LR))) + out; + return out; +} + +/* Float64 random number in range [0.0, 1.0). */ +STC_INLINE double crandf_r(crand_t* rng) { + union {uint64_t i; double f;} u = {0x3FF0000000000000U | (crand_r(rng) >> 12)}; + return u.f - 1.0; +} + +/* Float64 uniform distributed RNG, range [low, high). */ +STC_INLINE double crandf_unif(crand_t* rng, crandf_unif_t* dist) { + return crandf_r(rng)*dist->range + dist->lower; +} + +/* Init uniform distributed float64 RNG, range [low, high). */ +STC_INLINE crandf_unif_t crandf_unif_from(double low, double high) { + return c_LITERAL(crandf_unif_t){low, high - low}; +} + +/* Marsaglia polar method for gaussian/normal distribution, float64. */ +STC_INLINE crandf_norm_t crandf_norm_from(double mean, double stddev) { + return c_LITERAL(crandf_norm_t){mean, stddev, 0.0, 0}; +} + +/* -------------------------- IMPLEMENTATION ------------------------- */ +#if defined(i_implement) + +/* Global random() */ +static crand_t crand_global = {{ + 0x26aa069ea2fb1a4d, 0x70c72c95cd592d04, + 0x504f333d3aa0b359, 0x9e3779b97f4a7c15, + 0x6a09e667a754166b +}}; + +STC_DEF void csrand(uint64_t seed) { + crand_global = crand_from(seed); +} + +STC_DEF uint64_t crand(void) { + return crand_r(&crand_global); +} + +STC_DEF double crandf(void) { + return crandf_r(&crand_global); +} + +/* rng.state[4] must be odd */ +STC_DEF crand_t crand_from(uint64_t seed) { + crand_t rng = {{seed + 0x26aa069ea2fb1a4d, + seed*0x9e3779b97f4a7c15 + 0x70c72c95cd592d04, + seed + 0x504f333d3aa0b359, + seed*0x6a09e667a754166b, seed<<1 | 1}}; + return rng; +} + +/* Init unbiased uniform uint RNG with bounds [low, high] */ +STC_DEF crand_unif_t crand_unif_from(int64_t low, int64_t high) { + crand_unif_t dist = {low, (uint64_t) (high - low + 1)}; + dist.threshold = (uint64_t)(0 - dist.range) % dist.range; + return dist; +} + +#if defined(__SIZEOF_INT128__) + #define c_umul128(a, b, lo, hi) \ + do { __uint128_t _z = (__uint128_t)(a)*(b); \ + *(lo) = (uint64_t)_z, *(hi) = (uint64_t)(_z >> 64U); } while(0) +#elif defined(_MSC_VER) && defined(_WIN64) + #include + #define c_umul128(a, b, lo, hi) ((void)(*(lo) = _umul128(a, b, hi))) +#elif defined(__x86_64__) + #define c_umul128(a, b, lo, hi) \ + asm("mulq %3" : "=a"(*(lo)), "=d"(*(hi)) : "a"(a), "rm"(b)) +#endif + +/* Int uniform distributed RNG, range [low, high]. */ +STC_DEF int64_t crand_unif(crand_t* rng, crand_unif_t* d) { +#ifdef c_umul128 + uint64_t lo, hi; + do { c_umul128(crand_r(rng), d->range, &lo, &hi); } while (lo < d->threshold); + return d->lower + (int64_t)hi; +#else + uint64_t x, r; + do { x = crand_r(rng); r = x % d->range; } while (x - r > -d->range); + return d->lower + r; +#endif +} + +/* Normal distribution PRNG */ +STC_DEF double crandf_norm(crand_t* rng, crandf_norm_t* dist) { + double u1, u2, s, m; + if (dist->has_next++ & 1) + return dist->next * dist->stddev + dist->mean; + do { + u1 = 2.0 * crandf_r(rng) - 1.0; + u2 = 2.0 * crandf_r(rng) - 1.0; + s = u1*u1 + u2*u2; + } while (s >= 1.0 || s == 0.0); + m = sqrt(-2.0 * log(s) / s); + dist->next = u2 * m; + return (u1 * m) * dist->stddev + dist->mean; +} + +#endif +#endif +#undef i_opt +#undef i_static +#undef i_header +#undef i_implement +#undef i_extern -- cgit v1.2.3 From 9a88ddd9cbf4c33664de258bcb5bcef6a746149a Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Wed, 29 Mar 2023 18:23:54 +0200 Subject: Some optimizations in hash func. --- include/stc/ccommon.h | 22 +++---- include/stc/crand.h | 105 +++++++++++++++------------------- misc/benchmarks/shootout_hashmaps.cpp | 1 + 3 files changed, 58 insertions(+), 70 deletions(-) (limited to 'include/stc/crand.h') diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h index 5e163875..362b09ce 100644 --- a/include/stc/ccommon.h +++ b/include/stc/ccommon.h @@ -140,20 +140,20 @@ typedef const char* crawstr; #define c_ROTL(x, k) (x << (k) | x >> (8*sizeof(x) - (k))) STC_INLINE uint64_t cfasthash(const void* key, intptr_t len) { - const uint8_t *x = (const uint8_t*) key; - uint64_t u8, h = 1; intptr_t n = len >> 3; - uint32_t u4; + uint32_t u4; uint64_t u8; + switch (len) { + case 8: memcpy(&u8, key, 8); return u8*0xc6a4a7935bd1e99d; + case 4: memcpy(&u4, key, 4); return u4*0xc6a4a7935bd1e99d; + case 0: return 1; + } + const uint8_t *x = (const uint8_t*)key; + uint64_t h = *x, n = (uint64_t)len >> 3; + len &= 7; while (n--) { memcpy(&u8, x, 8), x += 8; - h += (c_ROTL(u8, 26) ^ u8)*0xc6a4a7935bd1e99d; - } - switch (len &= 7) { - case 0: return h; - case 4: memcpy(&u4, x, 4); - return h + u4*0xc6a4a7935bd1e99d; + h += u8*0xc6a4a7935bd1e99d; } - h += *x++; - while (--len) h = (h << 10) - h + *x++; + while (len--) h = (h << 10) - h - *x++; return c_ROTL(h, 26) ^ h; } diff --git a/include/stc/crand.h b/include/stc/crand.h index 4ebc402d..f46f2bd5 100644 --- a/include/stc/crand.h +++ b/include/stc/crand.h @@ -30,15 +30,13 @@ int main() { uint64_t seed = 123456789; - crand_t rng = crand_from(seed); - crand_unif_t dist1 = crand_unif_from(1, 6); - crandf_unif_t dist2 = crandf_unif_from(1.0, 10.0); - crandf_norm_t dist3 = crandf_norm_from(1.0, 10.0); + crand_t rng = crand_init(seed); + crand_unif_t dist1 = crand_unif_init(1, 6); + crand_norm_t dist3 = crand_norm_init(1.0, 10.0); - uint64_t i = crand_r(&rng); + uint64_t i = crand_u64(&rng); int64_t iu = crand_unif(&rng, &dist1); - double xu = crandf_unif(&rng, &dist2); - double xn = crandf_norm(&rng, &dist3); + double xn = crand_norm(&rng, &dist3); } */ #include @@ -46,8 +44,7 @@ int main() { typedef struct crand { uint64_t state[5]; } crand_t; typedef struct crand_unif { int64_t lower; uint64_t range, threshold; } crand_unif_t; -typedef struct crandf_unif { double lower, range; } crandf_unif_t; -typedef struct crandf_norm { double mean, stddev, next; int has_next; } crandf_norm_t; +typedef struct crand_norm { double mean, stddev, next; int has_next; } crand_norm_t; /* PRNG crand_t. * Very fast PRNG suited for parallel usage with Weyl-sequence parameter. @@ -61,23 +58,25 @@ typedef struct crandf_norm { double mean, stddev, next; int has_next; } crandf_n */ /* Global crand_t PRNGs */ -STC_API void csrand(uint64_t seed); +STC_API void csrand(uint64_t seed); STC_API uint64_t crand(void); -STC_API double crandf(void); +STC_API double crandf(void); /* Init crand_t prng with a seed */ -STC_API crand_t crand_from(uint64_t seed); +STC_API crand_t crand_init(uint64_t seed); /* Unbiased bounded uniform distribution. range [low, high] */ -STC_API crand_unif_t crand_unif_from(int64_t low, int64_t high); +STC_API crand_unif_t crand_unif_init(int64_t low, int64_t high); STC_API int64_t crand_unif(crand_t* rng, crand_unif_t* dist); -/* Normal distribution PRNG */ -STC_API double crandf_norm(crand_t* rng, crandf_norm_t* dist); +/* Normal/gaussian distribution. */ +STC_INLINE crand_norm_t crand_norm_init(double mean, double stddev) + { crand_norm_t r = {mean, stddev, 0.0, 0}; return r; } +STC_API double crand_norm(crand_t* rng, crand_norm_t* dist); /* Main crand_t prng */ -STC_INLINE uint64_t crand_r(crand_t* rng) { +STC_INLINE uint64_t crand_u64(crand_t* rng) { enum {LR=24, RS=11, LS=3}; uint64_t *s = rng->state; const uint64_t out = (s[0] ^ (s[3] += s[4])) + s[1]; s[0] = s[1] ^ (s[1] >> RS); @@ -87,26 +86,11 @@ STC_INLINE uint64_t crand_r(crand_t* rng) { } /* Float64 random number in range [0.0, 1.0). */ -STC_INLINE double crandf_r(crand_t* rng) { - union {uint64_t i; double f;} u = {0x3FF0000000000000U | (crand_r(rng) >> 12)}; +STC_INLINE double crand_f64(crand_t* rng) { + union {uint64_t i; double f;} u = {0x3FF0000000000000U | (crand_u64(rng) >> 12)}; return u.f - 1.0; } -/* Float64 uniform distributed RNG, range [low, high). */ -STC_INLINE double crandf_unif(crand_t* rng, crandf_unif_t* dist) { - return crandf_r(rng)*dist->range + dist->lower; -} - -/* Init uniform distributed float64 RNG, range [low, high). */ -STC_INLINE crandf_unif_t crandf_unif_from(double low, double high) { - return c_LITERAL(crandf_unif_t){low, high - low}; -} - -/* Marsaglia polar method for gaussian/normal distribution, float64. */ -STC_INLINE crandf_norm_t crandf_norm_from(double mean, double stddev) { - return c_LITERAL(crandf_norm_t){mean, stddev, 0.0, 0}; -} - /* -------------------------- IMPLEMENTATION ------------------------- */ #if defined(i_implement) @@ -117,29 +101,34 @@ static crand_t crand_global = {{ 0x6a09e667a754166b }}; -STC_DEF void csrand(uint64_t seed) { - crand_global = crand_from(seed); -} +STC_DEF void csrand(uint64_t seed) + { crand_global = crand_init(seed); } -STC_DEF uint64_t crand(void) { - return crand_r(&crand_global); -} +STC_DEF uint64_t crand(void) + { return crand_u64(&crand_global); } + +STC_DEF double crandf(void) + { return crand_f64(&crand_global); } -STC_DEF double crandf(void) { - return crandf_r(&crand_global); +STC_INLINE uint64_t splitmix64(uint64_t s[1]) { + uint64_t z = (s[0] += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); } -/* rng.state[4] must be odd */ -STC_DEF crand_t crand_from(uint64_t seed) { +STC_DEF crand_t crand_init(uint64_t seed) { + /* rng.state[4] must be odd */ crand_t rng = {{seed + 0x26aa069ea2fb1a4d, seed*0x9e3779b97f4a7c15 + 0x70c72c95cd592d04, seed + 0x504f333d3aa0b359, - seed*0x6a09e667a754166b, seed<<1 | 1}}; + seed, seed<<1 | 1}}; + crand_u64(&rng); return rng; } /* Init unbiased uniform uint RNG with bounds [low, high] */ -STC_DEF crand_unif_t crand_unif_from(int64_t low, int64_t high) { +STC_DEF crand_unif_t crand_unif_init(int64_t low, int64_t high) { crand_unif_t dist = {low, (uint64_t) (high - low + 1)}; dist.threshold = (uint64_t)(0 - dist.range) % dist.range; return dist; @@ -157,32 +146,30 @@ STC_DEF crand_unif_t crand_unif_from(int64_t low, int64_t high) { asm("mulq %3" : "=a"(*(lo)), "=d"(*(hi)) : "a"(a), "rm"(b)) #endif -/* Int uniform distributed RNG, range [low, high]. */ +/* Int64 uniform distributed RNG, range [low, high]. */ STC_DEF int64_t crand_unif(crand_t* rng, crand_unif_t* d) { -#ifdef c_umul128 uint64_t lo, hi; - do { c_umul128(crand_r(rng), d->range, &lo, &hi); } while (lo < d->threshold); - return d->lower + (int64_t)hi; +#ifdef c_umul128 + do { c_umul128(crand_u64(rng), d->range, &lo, &hi); } while (lo < d->threshold); #else - uint64_t x, r; - do { x = crand_r(rng); r = x % d->range; } while (x - r > -d->range); - return d->lower + r; + do { lo = crand_u64(rng); hi = lo % d->range; } while (lo - hi > -d->range); #endif + return d->lower + (int64_t)hi; } -/* Normal distribution PRNG */ -STC_DEF double crandf_norm(crand_t* rng, crandf_norm_t* dist) { +/* Normal distribution PRNG. Marsaglia polar method */ +STC_DEF double crand_norm(crand_t* rng, crand_norm_t* dist) { double u1, u2, s, m; if (dist->has_next++ & 1) - return dist->next * dist->stddev + dist->mean; + return dist->next*dist->stddev + dist->mean; do { - u1 = 2.0 * crandf_r(rng) - 1.0; - u2 = 2.0 * crandf_r(rng) - 1.0; + u1 = 2.0 * crand_f64(rng) - 1.0; + u2 = 2.0 * crand_f64(rng) - 1.0; s = u1*u1 + u2*u2; } while (s >= 1.0 || s == 0.0); m = sqrt(-2.0 * log(s) / s); - dist->next = u2 * m; - return (u1 * m) * dist->stddev + dist->mean; + dist->next = u2*m; + return (u1*m)*dist->stddev + dist->mean; } #endif diff --git a/misc/benchmarks/shootout_hashmaps.cpp b/misc/benchmarks/shootout_hashmaps.cpp index 39ad1786..947a35b4 100644 --- a/misc/benchmarks/shootout_hashmaps.cpp +++ b/misc/benchmarks/shootout_hashmaps.cpp @@ -35,6 +35,7 @@ KHASH_MAP_INIT_INT64(ii, IValue) // cmap template expansion #define i_key IKey #define i_val IValue +#define i_ssize int32_t // enable 2^K buckets like the rest. #define i_tag ii #define i_max_load_factor MAX_LOAD_FACTOR / 100.0f #include -- cgit v1.2.3 From 32df5677c9906661e91aad294e45a258e2eaab18 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Thu, 30 Mar 2023 08:11:29 +0200 Subject: removed unneeded code --- include/stc/crand.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/stc/crand.h') diff --git a/include/stc/crand.h b/include/stc/crand.h index f46f2bd5..191d578a 100644 --- a/include/stc/crand.h +++ b/include/stc/crand.h @@ -110,13 +110,6 @@ STC_DEF uint64_t crand(void) STC_DEF double crandf(void) { return crand_f64(&crand_global); } -STC_INLINE uint64_t splitmix64(uint64_t s[1]) { - uint64_t z = (s[0] += 0x9e3779b97f4a7c15); - z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; - z = (z ^ (z >> 27)) * 0x94d049bb133111eb; - return z ^ (z >> 31); -} - STC_DEF crand_t crand_init(uint64_t seed) { /* rng.state[4] must be odd */ crand_t rng = {{seed + 0x26aa069ea2fb1a4d, -- cgit v1.2.3 From 4f1d00baf916ceaa27b1a29a80117abdb662d656 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Fri, 31 Mar 2023 14:00:13 +0200 Subject: Small change in crand_u64(). Use - instead of ^ in result. --- include/stc/crand.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/stc/crand.h') diff --git a/include/stc/crand.h b/include/stc/crand.h index 191d578a..122c1f21 100644 --- a/include/stc/crand.h +++ b/include/stc/crand.h @@ -77,12 +77,12 @@ STC_API double crand_norm(crand_t* rng, crand_norm_t* dist); /* Main crand_t prng */ STC_INLINE uint64_t crand_u64(crand_t* rng) { - enum {LR=24, RS=11, LS=3}; uint64_t *s = rng->state; - const uint64_t out = (s[0] ^ (s[3] += s[4])) + s[1]; - s[0] = s[1] ^ (s[1] >> RS); - s[1] = s[2] + (s[2] << LS); - s[2] = ((s[2] << LR) | (s[2] >> (64 - LR))) + out; - return out; + uint64_t *s = rng->state; + const uint64_t result = s[0] + s[1] - (s[3] += s[4]); + s[0] = s[1] ^ (s[1] >> 11); + s[1] = s[2] + (s[2] << 3); + s[2] = ((s[2] << 24) | (s[2] >> (64 - 24))) + result; + return result; } /* Float64 random number in range [0.0, 1.0). */ -- cgit v1.2.3 From 56c394ede691143a32d53f4094df37dc49dc0a29 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Fri, 31 Mar 2023 18:58:36 +0200 Subject: Change in crand. --- include/stc/crand.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/stc/crand.h') diff --git a/include/stc/crand.h b/include/stc/crand.h index 122c1f21..a1b7250d 100644 --- a/include/stc/crand.h +++ b/include/stc/crand.h @@ -78,7 +78,7 @@ STC_API double crand_norm(crand_t* rng, crand_norm_t* dist); /* Main crand_t prng */ STC_INLINE uint64_t crand_u64(crand_t* rng) { uint64_t *s = rng->state; - const uint64_t result = s[0] + s[1] - (s[3] += s[4]); + const uint64_t result = (s[0] ^ (s[3] += s[4])) + s[1]; s[0] = s[1] ^ (s[1] >> 11); s[1] = s[2] + (s[2] << 3); s[2] = ((s[2] << 24) | (s[2] >> (64 - 24))) + result; @@ -111,12 +111,12 @@ STC_DEF double crandf(void) { return crand_f64(&crand_global); } STC_DEF crand_t crand_init(uint64_t seed) { - /* rng.state[4] must be odd */ - crand_t rng = {{seed + 0x26aa069ea2fb1a4d, - seed*0x9e3779b97f4a7c15 + 0x70c72c95cd592d04, - seed + 0x504f333d3aa0b359, - seed, seed<<1 | 1}}; - crand_u64(&rng); + crand_t rng; uint64_t* s = rng.state; + s[0] = seed + 0x9e3779b97f4a7c15; + s[1] = (s[0] ^ (s[0] >> 30))*0xbf58476d1ce4e5b9; + s[2] = (s[1] ^ (s[1] >> 27))*0x94d049bb133111eb; + s[3] = (s[2] ^ (s[2] >> 31)); + s[4] = ((seed + 0x6aa069ea2fb1a4d) << 1) | 1; return rng; } -- cgit v1.2.3