summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authortylov <[email protected]>2023-07-28 11:20:25 +0200
committertylov <[email protected]>2023-07-28 11:20:25 +0200
commit078f20f2e378543f078e86f8ad256887378ce92b (patch)
tree8d23525993717b1b2bc7130a39f99b60c308281b
parenta64d171f0eb76c8f208ffc7d8501baa8222634d3 (diff)
downloadSTC-modified-078f20f2e378543f078e86f8ad256887378ce92b.tar.gz
STC-modified-078f20f2e378543f078e86f8ad256887378ce92b.zip
Improved crand docs and commented out the irrelevant 32-bit variants in the benchmark.
-rw-r--r--docs/crandom_api.md28
-rw-r--r--misc/benchmarks/various/prng_bench.cpp31
-rw-r--r--misc/examples/smartpointers/arcvec_erase.c3
3 files changed, 28 insertions, 34 deletions
diff --git a/docs/crandom_api.md b/docs/crandom_api.md
index 88924784..c6491243 100644
--- a/docs/crandom_api.md
+++ b/docs/crandom_api.md
@@ -8,27 +8,23 @@ See [random](https://en.cppreference.com/w/cpp/header/random) for similar c++ fu
## Description
-**crand64** is a novel, very fast PRNG, suited for parallel usage. It features a
-Weyl-sequence as part of its state. It is based on *sfc64*, but has a different output function
-and state size.
+**crand64** is a very fast PRNG, suited for parallel usage. It is based on *sfc64*, but has a
+different output function and state size. It features a Weyl-sequence as part of its state.
-**sfc64** is the fastest among *pcg*, *xoshiro`**`*, and *lehmer*. It is equally fast or faster than
-*sfc64* on most platforms. *wyrand* is faster on platforms with fast 128-bit multiplication, and has
-2^64 period (https://github.com/lemire/SwiftWyhash/issues/10). *wyrand* is not suited for massive
-parallel usage due to its limited minimal period.
+**crand64** is faster or equally fast as *wyrand*, *xoshiro\*\**, *sfc64*, and *romu_trio*
+with both **clang 16.0** and **gcc 13.1** from the [prng_bench.c](../misc/benchmarks/various/prng_bench.cpp)
+on windows 11, Ryzen 7 5700X. (clang does not optimize *xoshiro\*\** and *sfc64* as well as gcc does).
-**crand64** does not require multiplication or 128-bit integer operations. It has 320 bit state,
-where 64-bits are constant per prng instance created.
-
-There is no *jump function*, but each odd number Weyl-increment (state[4]) starts a new
+**crand64** has no jump *function*, but each odd number Weyl-increment (state[4]) starts a new
unique 2^64 *minimum* length period, i.e. virtually unlimitied number of unique threads.
+In contrast, *wyrand* and *sfc64* have only a (total) minimum period of 2^64 (*romu_trio* has
+no guarantees), and may therefore not be suited for massive parallel usage (for purists).
-**crand64** passes *PractRand* (tested up to 8TB output), Vigna's Hamming weight test, and simple
-correlation tests, i.e. *n* interleaved streams with only one-bit differences in initial state.
-Also 32-bit and 16-bit versions passes PractRand up to their size limits.
+**crand64** does not require multiplication or 128-bit integer operations. It has 320 bit state,
+where 64-bits are constant per instance.
-For more, see the PRNG shootout by Vigna: http://prng.di.unimi.it and a debate between the authors of
-xoshiro and pcg (Vigna/O'Neill) PRNGs: https://www.pcg-random.org/posts/on-vignas-pcg-critique.html
+**crand64** passes *PractRand* (tested up to 8TB output), Vigna's Hamming weight test, and simple
+correlation tests. The 16- and 32-bit variants also passes PractRand up to their size limits.
## Header file
diff --git a/misc/benchmarks/various/prng_bench.cpp b/misc/benchmarks/various/prng_bench.cpp
index 234e3805..cd43ff36 100644
--- a/misc/benchmarks/various/prng_bench.cpp
+++ b/misc/benchmarks/various/prng_bench.cpp
@@ -66,7 +66,7 @@ uint32_t pcg32(uint32_t s[2]) {
}
-/* xoshiro128+ */
+/* xo(ro)shiro */
uint64_t xoroshiro128plus(uint64_t s[2]) {
const uint64_t s0 = s[0];
@@ -80,9 +80,6 @@ uint64_t xoroshiro128plus(uint64_t s[2]) {
return result;
}
-
-/* xoshiro256** */
-
static inline uint64_t xoshiro256starstar(uint64_t s[4]) {
const uint64_t result = rotl64(s[1] * 5, 7) * 9;
const uint64_t t = s[1] << 17;
@@ -95,7 +92,7 @@ static inline uint64_t xoshiro256starstar(uint64_t s[4]) {
return result;
}
-// wyrand - 2020-12-07
+/* wyrand - 2020-12-07 */
static inline void _wymum(uint64_t *A, uint64_t *B){
#if defined(__SIZEOF_INT128__)
__uint128_t r = *A; r *= *B;
@@ -136,44 +133,44 @@ int main(void)
for (size_t ti = 0; ti < 2; ti++) {
init_state(rng.state, 12345123);
cout << endl << "ROUND " << ti+1 << " ---------" << endl;
-
+/*
beg = clock();
for (size_t i = 0; i < N; i++)
- recipient[i] = romu_trio(rng.state);
+ recipient[i] = sfc32((uint32_t *)rng.state);
end = clock();
- cout << "romu_trio:\t"
+ cout << "sfc32:\t\t"
<< (float(end - beg) / CLOCKS_PER_SEC)
<< "s: " << recipient[312] << endl;
beg = clock();
for (size_t i = 0; i < N; i++)
- recipient[i] = wyrand64(rng.state);
+ recipient[i] = stc32((uint32_t *)rng.state);
end = clock();
- cout << "wyrand64:\t"
+ cout << "stc32:\t\t"
<< (float(end - beg) / CLOCKS_PER_SEC)
<< "s: " << recipient[312] << endl;
beg = clock();
for (size_t i = 0; i < N; i++)
- recipient[i] = sfc32((uint32_t *)rng.state);
+ recipient[i] = pcg32((uint32_t *)rng.state);
end = clock();
- cout << "sfc32:\t\t"
+ cout << "pcg32:\t\t"
<< (float(end - beg) / CLOCKS_PER_SEC)
<< "s: " << recipient[312] << endl;
-
+*/
beg = clock();
for (size_t i = 0; i < N; i++)
- recipient[i] = stc32((uint32_t *)rng.state);
+ recipient[i] = romu_trio(rng.state);
end = clock();
- cout << "stc32:\t\t"
+ cout << "romu_trio:\t"
<< (float(end - beg) / CLOCKS_PER_SEC)
<< "s: " << recipient[312] << endl;
beg = clock();
for (size_t i = 0; i < N; i++)
- recipient[i] = pcg32((uint32_t *)rng.state);
+ recipient[i] = wyrand64(rng.state);
end = clock();
- cout << "pcg32:\t\t"
+ cout << "wyrand64:\t"
<< (float(end - beg) / CLOCKS_PER_SEC)
<< "s: " << recipient[312] << endl;
diff --git a/misc/examples/smartpointers/arcvec_erase.c b/misc/examples/smartpointers/arcvec_erase.c
index ba54c1c7..9d757533 100644
--- a/misc/examples/smartpointers/arcvec_erase.c
+++ b/misc/examples/smartpointers/arcvec_erase.c
@@ -19,7 +19,8 @@ int main(void)
// clone the second 2012 and push it back.
// note: cloning make sure that vec.data[2] has ref count 2.
- Vec_push(&vec, Arc_clone(vec.data[2]));
+ Vec_push(&vec, Arc_clone(vec.data[2])); // => share vec.data[2]
+ Vec_emplace(&vec, *vec.data[2].get); // => deep-copy vec.data[2]
printf("vec before erase :");
c_foreach (i, Vec, vec)