diff options
| -rw-r--r-- | docs/crandom_api.md | 28 | ||||
| -rw-r--r-- | misc/benchmarks/various/prng_bench.cpp | 31 | ||||
| -rw-r--r-- | misc/examples/smartpointers/arcvec_erase.c | 3 |
3 files changed, 28 insertions, 34 deletions
diff --git a/docs/crandom_api.md b/docs/crandom_api.md index 88924784..c6491243 100644 --- a/docs/crandom_api.md +++ b/docs/crandom_api.md @@ -8,27 +8,23 @@ See [random](https://en.cppreference.com/w/cpp/header/random) for similar c++ fu ## Description -**crand64** is a novel, very fast PRNG, suited for parallel usage. It features a -Weyl-sequence as part of its state. It is based on *sfc64*, but has a different output function -and state size. +**crand64** is a very fast PRNG, suited for parallel usage. It is based on *sfc64*, but has a +different output function and state size. It features a Weyl-sequence as part of its state. -**sfc64** is the fastest among *pcg*, *xoshiro`**`*, and *lehmer*. It is equally fast or faster than -*sfc64* on most platforms. *wyrand* is faster on platforms with fast 128-bit multiplication, and has -2^64 period (https://github.com/lemire/SwiftWyhash/issues/10). *wyrand* is not suited for massive -parallel usage due to its limited minimal period. +**crand64** is faster or equally fast as *wyrand*, *xoshiro\*\**, *sfc64*, and *romu_trio* +with both **clang 16.0** and **gcc 13.1** from the [prng_bench.c](../misc/benchmarks/various/prng_bench.cpp) +on windows 11, Ryzen 7 5700X. (clang does not optimize *xoshiro\*\** and *sfc64* as well as gcc does). -**crand64** does not require multiplication or 128-bit integer operations. It has 320 bit state, -where 64-bits are constant per prng instance created. - -There is no *jump function*, but each odd number Weyl-increment (state[4]) starts a new +**crand64** has no jump *function*, but each odd number Weyl-increment (state[4]) starts a new unique 2^64 *minimum* length period, i.e. virtually unlimitied number of unique threads. +In contrast, *wyrand* and *sfc64* have only a (total) minimum period of 2^64 (*romu_trio* has +no guarantees), and may therefore not be suited for massive parallel usage (for purists). -**crand64** passes *PractRand* (tested up to 8TB output), Vigna's Hamming weight test, and simple -correlation tests, i.e. *n* interleaved streams with only one-bit differences in initial state. -Also 32-bit and 16-bit versions passes PractRand up to their size limits. +**crand64** does not require multiplication or 128-bit integer operations. It has 320 bit state, +where 64-bits are constant per instance. -For more, see the PRNG shootout by Vigna: http://prng.di.unimi.it and a debate between the authors of -xoshiro and pcg (Vigna/O'Neill) PRNGs: https://www.pcg-random.org/posts/on-vignas-pcg-critique.html +**crand64** passes *PractRand* (tested up to 8TB output), Vigna's Hamming weight test, and simple +correlation tests. The 16- and 32-bit variants also passes PractRand up to their size limits. ## Header file diff --git a/misc/benchmarks/various/prng_bench.cpp b/misc/benchmarks/various/prng_bench.cpp index 234e3805..cd43ff36 100644 --- a/misc/benchmarks/various/prng_bench.cpp +++ b/misc/benchmarks/various/prng_bench.cpp @@ -66,7 +66,7 @@ uint32_t pcg32(uint32_t s[2]) { } -/* xoshiro128+ */ +/* xo(ro)shiro */ uint64_t xoroshiro128plus(uint64_t s[2]) { const uint64_t s0 = s[0]; @@ -80,9 +80,6 @@ uint64_t xoroshiro128plus(uint64_t s[2]) { return result; } - -/* xoshiro256** */ - static inline uint64_t xoshiro256starstar(uint64_t s[4]) { const uint64_t result = rotl64(s[1] * 5, 7) * 9; const uint64_t t = s[1] << 17; @@ -95,7 +92,7 @@ static inline uint64_t xoshiro256starstar(uint64_t s[4]) { return result; } -// wyrand - 2020-12-07 +/* wyrand - 2020-12-07 */ static inline void _wymum(uint64_t *A, uint64_t *B){ #if defined(__SIZEOF_INT128__) __uint128_t r = *A; r *= *B; @@ -136,44 +133,44 @@ int main(void) for (size_t ti = 0; ti < 2; ti++) { init_state(rng.state, 12345123); cout << endl << "ROUND " << ti+1 << " ---------" << endl; - +/* beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = romu_trio(rng.state); + recipient[i] = sfc32((uint32_t *)rng.state); end = clock(); - cout << "romu_trio:\t" + cout << "sfc32:\t\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = wyrand64(rng.state); + recipient[i] = stc32((uint32_t *)rng.state); end = clock(); - cout << "wyrand64:\t" + cout << "stc32:\t\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = sfc32((uint32_t *)rng.state); + recipient[i] = pcg32((uint32_t *)rng.state); end = clock(); - cout << "sfc32:\t\t" + cout << "pcg32:\t\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; - +*/ beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = stc32((uint32_t *)rng.state); + recipient[i] = romu_trio(rng.state); end = clock(); - cout << "stc32:\t\t" + cout << "romu_trio:\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = pcg32((uint32_t *)rng.state); + recipient[i] = wyrand64(rng.state); end = clock(); - cout << "pcg32:\t\t" + cout << "wyrand64:\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; diff --git a/misc/examples/smartpointers/arcvec_erase.c b/misc/examples/smartpointers/arcvec_erase.c index ba54c1c7..9d757533 100644 --- a/misc/examples/smartpointers/arcvec_erase.c +++ b/misc/examples/smartpointers/arcvec_erase.c @@ -19,7 +19,8 @@ int main(void) // clone the second 2012 and push it back. // note: cloning make sure that vec.data[2] has ref count 2. - Vec_push(&vec, Arc_clone(vec.data[2])); + Vec_push(&vec, Arc_clone(vec.data[2])); // => share vec.data[2] + Vec_emplace(&vec, *vec.data[2].get); // => deep-copy vec.data[2] printf("vec before erase :"); c_foreach (i, Vec, vec) |
