summaryrefslogtreecommitdiffhomepage
path: root/stc
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2020-08-29 22:46:05 +0200
committerTyge Løvset <[email protected]>2020-08-29 22:46:05 +0200
commit8efecc5d6b8d4dcd6a7bdf9540a11355b4631782 (patch)
tree701373837cfce4bd8dd0115a76517656a79c9c32 /stc
parent47de9987a623e8374915af40d67d72dcb56e4283 (diff)
downloadSTC-modified-8efecc5d6b8d4dcd6a7bdf9540a11355b4631782.tar.gz
STC-modified-8efecc5d6b8d4dcd6a7bdf9540a11355b4631782.zip
Updated crandom.h API! update to benchmark.c . Optimized cmap iter.
Diffstat (limited to 'stc')
-rw-r--r--stc/cdefs.h2
-rw-r--r--stc/cmap.h27
-rw-r--r--stc/crandom.h170
3 files changed, 107 insertions, 92 deletions
diff --git a/stc/cdefs.h b/stc/cdefs.h
index 5fc9cfc3..c8b961a4 100644
--- a/stc/cdefs.h
+++ b/stc/cdefs.h
@@ -30,7 +30,7 @@
#define STC_INLINE static inline
#if defined(_MSC_VER)
#define STC_FORCE_INLINE static __forceinline
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) || defined(__clang__)
#define STC_FORCE_INLINE static inline __attribute((always_inline))
#else
#define STC_FORCE_INLINE static inline
diff --git a/stc/cmap.h b/stc/cmap.h
index 76edc94b..86ed5f75 100644
--- a/stc/cmap.h
+++ b/stc/cmap.h
@@ -233,10 +233,17 @@ STC_API bool \
ctype##_##tag##_erase_entry(ctype##_##tag* self, ctype##_##tag##_entry_t* entry); \
STC_API bool \
ctype##_##tag##_erase(ctype##_##tag* self, ctype##_##tag##_rawkey_t rawKey); \
-STC_API ctype##_##tag##_iter_t \
-ctype##_##tag##_begin(ctype##_##tag* map); \
-STC_API void \
-ctype##_##tag##_next(ctype##_##tag##_iter_t* it); \
+ \
+STC_FORCE_INLINE ctype##_##tag##_iter_t \
+ctype##_##tag##_begin(ctype##_##tag* map) { \
+ ctype##_##tag##_iter_t it = {map->table, map->table + map->bucket_count, map->_hashx}; \
+ if (it._hx) while (*it._hx == 0) ++it.item, ++it._hx; \
+ return it; \
+} \
+STC_FORCE_INLINE void \
+ctype##_##tag##_next(ctype##_##tag##_iter_t* it) { \
+ while ((++it->item, *++it->_hx == 0)) ; \
+} \
\
STC_API uint32_t c_default_hash16(const void *data, size_t len); \
STC_API uint32_t c_default_hash32(const void* data, size_t len); \
@@ -386,18 +393,6 @@ ctype##_##tag##_erase(ctype##_##tag* self, ctype##_##tag##_rawkey_t rawKey) { \
uint32_t hx; \
size_t i = ctype##_##tag##_bucket(self, &rawKey, &hx); \
return ctype##_##tag##_erase_entry(self, self->table + i); \
-} \
- \
-STC_API ctype##_##tag##_iter_t \
-ctype##_##tag##_begin(ctype##_##tag* map) { \
- ctype##_##tag##_iter_t it = {map->table, map->table + map->bucket_count, map->_hashx}; \
- if (it._hx) while (*it._hx == 0) ++it.item, ++it._hx; \
- return it; \
-} \
- \
-STC_API void \
-ctype##_##tag##_next(ctype##_##tag##_iter_t* it) { \
- while ((++it->item, *++it->_hx == 0)) ; \
}
/* https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ */
diff --git a/stc/crandom.h b/stc/crandom.h
index f4b609b7..5821b4e7 100644
--- a/stc/crandom.h
+++ b/stc/crandom.h
@@ -28,131 +28,138 @@
#include <string.h>
#include <math.h>
/*
- crandom_eng32_t eng = crandom_eng32_init(seed);
- crandom_distrib_f32_t fdist = crandom_uniform_f32_init(1.0f, 6.0f);
- crandom_distrib_i32_t idist = crandom_uniform_i32_init(1, 6);
+ crand_rng32_t rng = crand_rng32_init(seed);
+ crand_uniform_f32_t fdist = crand_uniform_f32_init(rng, 1.0f, 6.0f);
+ crand_uniform_i32_t idist = crand_uniform_i32_init(rng, 1, 6);
- uint32_t i = crandom_i32(&eng);
- int j = crandom_uniform_i32(&eng, idist);
- float r = crandom_uniform_f32(&eng, fdist);
+ uint32_t i = crand_i32(&rng);
+ int j = crand_uniform_i32(&idist);
+ float r = crand_uniform_f32(&fdist);
*/
-typedef struct {uint64_t state[2];} crandom_eng32_t;
-typedef struct {int32_t offset, range;} crandom_distrib_i32_t;
-typedef struct {float offset, range;} crandom_distrib_f32_t;
+/* 32-BIT RANDOM NUMBER GENERATOR */
-/* 32 bit random number generator engine */
-STC_API crandom_eng32_t crandom_eng32_with_seq(uint64_t seed, uint64_t seq);
-STC_INLINE crandom_eng32_t crandom_eng32_init(uint64_t seed) {
- return crandom_eng32_with_seq(seed, seed);
+typedef struct {uint64_t state[2];} crand_rng32_t;
+typedef struct {crand_rng32_t rng; int32_t offset; uint32_t range;} crand_uniform_i32_t;
+typedef struct {crand_rng32_t rng; float offset, range;} crand_uniform_f32_t;
+
+/* engine initializers */
+STC_API crand_rng32_t crand_rng32_with_seq(uint64_t seed, uint64_t seq);
+STC_INLINE crand_rng32_t crand_rng32_init(uint64_t seed) {
+ return crand_rng32_with_seq(seed, seed);
}
/* int random number generator, range [0, 2^32) */
-STC_API uint32_t crandom_i32(crandom_eng32_t* rng);
+STC_API uint32_t crand_i32(crand_rng32_t* rng);
-STC_INLINE float crandom_f32(crandom_eng32_t* rng) {
- union {uint32_t i; float f;} u = {0x3F800000u | (crandom_i32(rng) >> 9)};
+STC_INLINE float crand_f32(crand_rng32_t* rng) {
+ union {uint32_t i; float f;} u = {0x3F800000u | (crand_i32(rng) >> 9)};
return u.f - 1.0f;
}
/* int random number generator in range [low, high] */
-STC_INLINE crandom_distrib_i32_t crandom_uniform_i32_init(int32_t low, int32_t high) {
- crandom_distrib_i32_t dist = {low, high - low + 1}; return dist;
+STC_INLINE crand_uniform_i32_t crand_uniform_i32_init(crand_rng32_t rng, int32_t low, int32_t high) {
+ crand_uniform_i32_t dist = {rng, low, high - low + 1}; return dist;
}
-STC_INLINE int32_t crandom_uniform_i32(crandom_eng32_t* rng, crandom_distrib_i32_t dist) {
- return dist.offset + (int32_t) (((uint64_t) crandom_i32(rng) * dist.range) >> 32);
+STC_INLINE int32_t crand_uniform_i32(crand_uniform_i32_t* dist) {
+ return dist->offset + (int32_t) (((uint64_t) crand_i32(&dist->rng) * dist->range) >> 32);
}
+/* https://github.com/lemire/fastrange */
+STC_API uint32_t crand_unbiased_i32(crand_uniform_i32_t* dist);
/* float random number in range [low, high). Note: 23 bit resolution. */
-STC_INLINE crandom_distrib_f32_t crandom_uniform_f32_init(float low, float high) {
- crandom_distrib_f32_t dist = {low, high - low}; return dist;
+STC_INLINE crand_uniform_f32_t crand_uniform_f32_init(crand_rng32_t rng, float low, float high) {
+ crand_uniform_f32_t dist = {rng, low, high - low}; return dist;
}
-STC_INLINE float crandom_uniform_f32(crandom_eng32_t* rng, crandom_distrib_f32_t dist) {
- return dist.offset + crandom_f32(rng) * dist.range;
+STC_INLINE float crand_uniform_f32(crand_uniform_f32_t* dist) {
+ return dist->offset + crand_f32(&dist->rng) * dist->range;
}
-typedef struct {uint64_t state[4];} crandom_eng64_t;
-typedef struct {int64_t offset, range;} crandom_distrib_i64_t;
-typedef struct {double offset, range;} crandom_distrib_f64_t;
+/* 64 BIT RANDOM NUMBER GENERATOR */
+
+typedef struct {uint64_t state[4];} crand_rng64_t;
+typedef struct {crand_rng64_t rng; int64_t offset; uint64_t range;} crand_uniform_i64_t;
+typedef struct {crand_rng64_t rng; double offset, range;} crand_uniform_f64_t;
+typedef struct {crand_rng64_t rng; double mean, stddev, next; bool has_next;} crand_normal_f64_t;
+
-/* 64 bit random number generator engine */
-STC_API crandom_eng64_t crandom_eng64_with_seq(uint64_t seed, uint64_t seq);
-STC_INLINE crandom_eng64_t crandom_eng64_init(uint64_t seed) {
- return crandom_eng64_with_seq(seed, 1);
+/* engine initializers */
+STC_API crand_rng64_t crand_rng64_with_seq(uint64_t seed, uint64_t seq);
+STC_INLINE crand_rng64_t crand_rng64_init(uint64_t seed) {
+ return crand_rng64_with_seq(seed, 1);
}
-/* int random number generator, range [0, 2^64) */
-STC_API uint64_t crandom_i64(crandom_eng64_t* rng);
+/* int random number generator, range [0, 2^64). PRNG copyright Tyge Løvset, NORCE Research, 2020 */
+STC_API uint64_t crand_i64(crand_rng64_t* rng);
/* double random number in range [low, high). 52 bit resolution. */
-STC_INLINE double crandom_f64(crandom_eng64_t* rng) {
- union {uint64_t i; double f;} u = {0x3FF0000000000000ull | (crandom_i64(rng) >> 12)};
+STC_INLINE double crand_f64(crand_rng64_t* rng) {
+ union {uint64_t i; double f;} u = {0x3FF0000000000000ull | (crand_i64(rng) >> 12)};
return u.f - 1.0;
}
/* int random number generator in range [low, high] */
-STC_INLINE crandom_distrib_i64_t crandom_uniform_i64_init(int64_t low, int64_t high) {
- crandom_distrib_i64_t dist = {low, high - low + 1}; return dist;
+STC_INLINE crand_uniform_i64_t crand_uniform_i64_init(crand_rng64_t rng, int64_t low, int64_t high) {
+ crand_uniform_i64_t dist = {rng, low, high - low + 1}; return dist;
}
#if defined(_MSC_VER) && defined(_WIN64)
#include <intrin.h>
#endif
-STC_INLINE int64_t crandom_uniform_i64(crandom_eng64_t* rng, crandom_distrib_i64_t dist) {
+STC_INLINE int64_t crand_uniform_i64(crand_uniform_i64_t* dist) {
#if defined(__SIZEOF_INT128__)
- return dist.offset + (int64_t) (((__uint128_t) crandom_i64(rng) * dist.range) >> 64);
+ return dist->offset + (int64_t) (((__uint128_t) crand_i64(&dist->rng) * dist->range) >> 64);
#elif defined(_MSC_VER) && defined(_WIN64)
- uint64_t hi; _umul128(crandom_i64(rng), dist.range, &hi); return dist.offset + hi;
+ uint64_t hi; _umul128(crand_i64(&dist->rng), dist->range, &hi); return dist->offset + hi;
#else
- return dist.offset + crandom_i64(rng) % dist.range; // slower
+ return dist->offset + crand_i64(&dist->rng) % dist->range; // slower
#endif
}
-STC_INLINE crandom_distrib_f64_t crandom_uniform_f64_init(double low, double high) {
- crandom_distrib_f64_t dist = {low, high - low}; return dist;
+STC_INLINE crand_uniform_f64_t crand_uniform_f64_init(crand_rng64_t rng, double low, double high) {
+ crand_uniform_f64_t dist = {rng, low, high - low}; return dist;
}
-STC_INLINE double crandom_uniform_f64(crandom_eng64_t* rng, crandom_distrib_f64_t dist) {
- return dist.offset + crandom_f64(rng) * dist.range;
+STC_INLINE double crand_uniform_f64(crand_uniform_f64_t* dist) {
+ return dist->offset + crand_f64(&dist->rng) * dist->range;
}
-STC_INLINE crandom_distrib_f64_t crandom_normal_f64_init(double mean, double std_dev) {
- crandom_distrib_f64_t dist = {mean, std_dev}; return dist;
+STC_INLINE crand_normal_f64_t crand_normal_f64_init(crand_rng64_t rng, double mean, double stddev) {
+ crand_normal_f64_t dist = {rng, mean, stddev, 0.0, false}; return dist;
}
-STC_API double crandom_normal_f64(crandom_eng64_t* rng, crandom_distrib_f64_t dist);
+STC_API double crand_normal_f64(crand_normal_f64_t* dist);
#if !defined(STC_HEADER) || defined(STC_IMPLEMENTATION)
-/* PCG32 random number generator: https://www.pcg-random.org/download.html */
-
-STC_API crandom_eng32_t crandom_eng32_with_seq(uint64_t seed, uint64_t seq) {
- crandom_eng32_t rng = {0u, (seq << 1u) | 1u}; /* inc must be odd */
- crandom_i32(&rng);
+STC_API crand_rng32_t crand_rng32_with_seq(uint64_t seed, uint64_t seq) {
+ crand_rng32_t rng = {0u, (seq << 1u) | 1u}; /* inc must be odd */
+ crand_i32(&rng);
rng.state[0] += seed;
- crandom_i32(&rng);
+ crand_i32(&rng);
return rng;
}
-STC_API uint32_t crandom_i32(crandom_eng32_t* rng) {
+/* PCG32 https://www.pcg-random.org/download.html */
+STC_API uint32_t crand_i32(crand_rng32_t* rng) {
uint64_t old = rng->state[0];
rng->state[0] = old * 6364136223846793005ull + rng->state[1];
- uint32_t xors = ((old >> 18u) ^ old) >> 27u;
+ uint32_t xors = (uint32_t) (((old >> 18u) ^ old) >> 27u);
uint32_t rot = old >> 59u;
return (xors >> rot) | (xors << ((-rot) & 31));
}
-STC_API crandom_eng64_t crandom_eng64_with_seq(uint64_t seed, uint64_t seq) {
- crandom_eng64_t rng = {seed, seed, seed, (seq << 1u) | 1u}; /* increment must be odd */
- for (int i = 0; i < 12; ++i) crandom_i64(&rng);
- return rng;
-}
-
/* PRNG copyright Tyge Løvset, NORCE Research, 2020 */
/* Extremely fast PRNG suited for parallel usage with Weyl-sequence parameter. */
-/* Updates only 192bit state. Parallel: Ensures unique sequence per seq (2^63) */
-/* Minimum period is 2^64 per seq, but high average per Weyl seq. */
-STC_API uint64_t crandom_i64(crandom_eng64_t* rng) {
+/* Even faster than sfc64: updates only 192bit state. Better for parallel processing: */
+/* Guarantees 2^63 unique threads with minimum 2^64 period length ~ 2^160 average period. */
+/* Tested with PractRand to 8 TB output: no issues */
+STC_API crand_rng64_t crand_rng64_with_seq(uint64_t seed, uint64_t seq) {
+ crand_rng64_t rng = {seed, seed, seed, (seq << 1u) | 1u}; /* increment must be odd */
+ for (int i = 0; i < 12; ++i) crand_i64(&rng);
+ return rng;
+}
+STC_API uint64_t crand_i64(crand_rng64_t* rng) {
enum {LROT = 24, RSHIFT = 11, LSHIFT = 3};
uint64_t *s = rng->state;
const uint64_t b = s[1], result = s[0] ^ (s[2] += s[3]|1);
@@ -161,21 +168,34 @@ STC_API uint64_t crandom_i64(crandom_eng64_t* rng) {
return result;
}
-STC_API double crandom_normal_f64(crandom_eng64_t* rng, crandom_distrib_f64_t dist) {
- static bool spare = false; /* Marsaglia polar method: */
- static double u2; double u1, s, m;
- if (spare) {
- spare = false;
- return u2 * dist.range + dist.offset;
+/* Unbiased uniform https://github.com/lemire/fastrange */
+STC_API uint32_t crand_unbiased_i32(crand_uniform_i32_t* dist) {
+ uint32_t r = dist->range;
+ uint64_t m = (uint64_t) crand_i32(&dist->rng) * r;
+ uint32_t l = (uint32_t) m;
+ if (l < r) {
+ uint32_t t = -r;
+ if (t >= r) if ((t -= r) >= r) t %= r;
+ while (l < t) l = (uint32_t) (m = (uint64_t) crand_i32(&dist->rng) * r);
+ }
+ return dist->offset + (m >> 32);
+}
+
+/* Marsaglia polar method for gaussian distribution. */
+STC_API double crand_normal_f64(crand_normal_f64_t* dist) {
+ double u1, u2, s, m;
+ if (dist->has_next) {
+ dist->has_next = false;
+ return dist->next * dist->stddev + dist->mean;
}
do {
- u1 = 2.0 * crandom_f64(rng) - 1.0;
- u2 = 2.0 * crandom_f64(rng) - 1.0;
+ u1 = 2.0 * crand_f64(&dist->rng) - 1.0;
+ u2 = 2.0 * crand_f64(&dist->rng) - 1.0;
s = u1*u1 + u2*u2;
} while (s >= 1.0 || s == 0.0);
m = sqrt(-2.0 * log(s) / s);
- u2 *= m; spare = true;
- return (u1 * m) * dist.range + dist.offset;
+ dist->next = u2 * m, dist->has_next = true;
+ return (u1 * m) * dist->stddev + dist->mean;
}
#endif