From 900295256d825fc323149cd223c49787f32a3696 Mon Sep 17 00:00:00 2001 From: tylov Date: Thu, 20 Jul 2023 15:09:10 +0200 Subject: Moved examples to sub-directories. Added cotask1.c cotask2.c examples. --- misc/examples/spans/multidim.c | 67 +++++++++++++++++++++++++++++++++++++++++ misc/examples/spans/printspan.c | 52 ++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 misc/examples/spans/multidim.c create mode 100644 misc/examples/spans/printspan.c (limited to 'misc/examples/spans') diff --git a/misc/examples/spans/multidim.c b/misc/examples/spans/multidim.c new file mode 100644 index 00000000..798a1126 --- /dev/null +++ b/misc/examples/spans/multidim.c @@ -0,0 +1,67 @@ +// Example based on https://en.cppreference.com/w/cpp/container/mdspan +#define i_val int +#include +#include +#include + +using_cspan3(ispan, int); + +int main(void) +{ + cstack_int v = c_init(cstack_int, {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}); + + // View data as contiguous memory representing 24 ints + ispan ms1 = cspan_from(&v); + + // View the same data as a 3D array 2 x 3 x 4 + ispan3 ms3 = cspan_md(v.data, 2, 3, 4); + + puts("ms3:"); + for (int i=0; i != ms3.shape[0]; i++) { + for (int j=0; j != ms3.shape[1]; j++) { + for (int k=0; k != ms3.shape[2]; k++) { + printf(" %2d", *cspan_at(&ms3, i, j, k)); + } + puts(""); + } + puts(""); + } + puts("ss3 = ms3[:, 1:3, 1:3]"); + ispan3 ss3 = ms3; + ss3 = cspan_slice(ispan3, &ms3, {c_ALL}, {1,3}, {1,3}); + + for (int i=0; i != ss3.shape[0]; i++) { + for (int j=0; j != ss3.shape[1]; j++) { + for (int k=0; k != ss3.shape[2]; k++) { + printf(" %2d", *cspan_at(&ss3, i, j, k)); + } + puts(""); + } + puts(""); + } + + puts("Iterate ss3 flat:"); + c_foreach (i, ispan3, ss3) + printf(" %d", *i.ref); + puts(""); + + ispan2 ms2 = cspan_submd3(&ms3, 0); + + // write data using 2D view + for (int i=0; i != ms2.shape[0]; i++) + for (int j=0; j != ms2.shape[1]; j++) + *cspan_at(&ms2, i, j) = i*1000 + j; + + puts("\nview data as 1D view:"); + for (int i=0; i != cspan_size(&ms1); i++) + printf(" %d", *cspan_at(&ms1, i)); + puts(""); + + puts("iterate subspan ms3[1]:"); + ispan2 sub = cspan_submd3(&ms3, 1); + c_foreach (i, ispan2, sub) + printf(" %d", *i.ref); + puts(""); + + cstack_int_drop(&v); +} diff --git a/misc/examples/spans/printspan.c b/misc/examples/spans/printspan.c new file mode 100644 index 00000000..cd3c5f4f --- /dev/null +++ b/misc/examples/spans/printspan.c @@ -0,0 +1,52 @@ +// printspan.c + +#include +#define i_implement +#include +#define i_key int +#include +#define i_key int +#include +#define i_key_str +#include + +#include +using_cspan(intspan, int, 1); + +void printMe(intspan container) { + printf("%d:", (int)cspan_size(&container)); + c_foreach (e, intspan, container) + printf(" %d", *e.ref); + puts(""); +} + +int main(void) +{ + intspan sp1 = cspan_init(intspan, {1, 2}); + printMe( sp1 ); + + printMe( c_init(intspan, {1, 2, 3}) ); + + int arr[] = {1, 2, 3, 4, 5, 6}; + intspan sp2 = cspan_from_array(arr); + printMe( c_LITERAL(intspan)cspan_subspan(&sp2, 1, 4) ); + + cvec_int vec = c_init(cvec_int, {1, 2, 3, 4, 5}); + printMe( c_LITERAL(intspan)cspan_from(&vec) ); + + printMe( sp2 ); + + cstack_int stk = c_init(cstack_int, {1, 2, 3, 4, 5, 6, 7}); + printMe( c_LITERAL(intspan)cspan_from(&stk) ); + + csset_str set = c_init(csset_str, {"5", "7", "4", "3", "8", "2", "1", "9", "6"}); + printf("%d:", (int)csset_str_size(&set)); + c_foreach (e, csset_str, set) + printf(" %s", cstr_str(e.ref)); + puts(""); + + // cleanup + cvec_int_drop(&vec); + cstack_int_drop(&stk); + csset_str_drop(&set); +} -- cgit v1.2.3 From 2d67f4040f6eecd41f1b864b43c62823ed75aff0 Mon Sep 17 00:00:00 2001 From: tylov Date: Fri, 21 Jul 2023 00:37:28 +0200 Subject: Renamed badly abbreviated names in crand.h. Moved coroutine.h from algo subfolder to stc. Updated coroutine.h and docs. --- docs/ccommon_api.md | 7 +- docs/cpque_api.md | 4 +- docs/crandom_api.md | 47 +++-- include/stc/algo/coroutine.h | 274 ------------------------- include/stc/algorithm.h | 8 + include/stc/calgo.h | 9 - include/stc/coroutine.h | 273 ++++++++++++++++++++++++ include/stc/crand.h | 43 ++-- misc/examples/algorithms/forfilter.c | 3 +- misc/examples/algorithms/forloops.c | 2 +- misc/examples/algorithms/random.c | 20 +- misc/examples/bitsets/prime.c | 22 +- misc/examples/coroutines/cointerleave.c | 2 +- misc/examples/coroutines/coread.c | 2 +- misc/examples/coroutines/coroutines.c | 2 +- misc/examples/coroutines/cotasks1.c | 2 +- misc/examples/coroutines/cotasks2.c | 12 +- misc/examples/coroutines/dining_philosophers.c | 2 +- misc/examples/coroutines/generator.c | 2 +- misc/examples/coroutines/scheduler.c | 2 +- misc/examples/coroutines/triples.c | 2 +- misc/examples/linkedlists/list.c | 2 +- misc/examples/make.sh | 8 +- misc/examples/priorityqueues/priority.c | 6 +- misc/examples/queues/new_queue.c | 6 +- misc/examples/queues/queue.c | 8 +- misc/examples/sortedmaps/gauss2.c | 4 +- misc/examples/spans/mdspan.c | 51 +++++ 28 files changed, 436 insertions(+), 389 deletions(-) delete mode 100644 include/stc/algo/coroutine.h create mode 100644 include/stc/algorithm.h delete mode 100644 include/stc/calgo.h create mode 100644 include/stc/coroutine.h create mode 100644 misc/examples/spans/mdspan.c (limited to 'misc/examples/spans') diff --git a/docs/ccommon_api.md b/docs/ccommon_api.md index 1e1ae1aa..9189d7e8 100644 --- a/docs/ccommon_api.md +++ b/docs/ccommon_api.md @@ -392,8 +392,7 @@ cco_routine scope; Use `if-else-if` constructs instead. | | `cco_return;` | Return from coroutine (inside cco_routine) | | | Task objects: | | | | `cco_task_struct(Name, ...);` | Define a coroutine task struct | -| | `cco_await_task(task, ...);` | Await for task to finish or optionally yield a value | -| | `cco_block_task(task);` | Run blocking until task is finished (stackless) | +| | `cco_task_await(task, ...);` | Await for task to finish or optionally yield a value | | | Semaphores: | | | | `cco_sem` | Semaphore type | | `cco_sem` | `cco_sem_from(long value)` | Create semaphore | @@ -414,7 +413,9 @@ cco_routine scope; Use `if-else-if` constructs instead. | `void` | `cco_stop(co)` | Next call of coroutine finalizes | | `void` | `cco_reset(co)` | Reset state to initial (for reuse) | | `void` | `cco_block_on(cocall) { }` | Run blocking until cocall is finished | -| `void` | `cco_block_on(cocall, int *result) { }`| Run blocking until cocall is finished | +| `void` | `cco_block_on(cocall, int *result) {}`| Run blocking until cocall is finished | +| | `cco_task_block_on(task) {}` | Run blocking until task is finished | +| | `cco_task_block_on(task, rt, STACKSZ) {}`| Run blocking until task is finished | | | Time functions: | | | `double` | `cco_time(void)` | Return secs with usec prec. since Epoch | | | `cco_sleep(double sec)` | Sleep for seconds (msec or usec prec.) | diff --git a/docs/cpque_api.md b/docs/cpque_api.md index 4cde927b..247424b4 100644 --- a/docs/cpque_api.md +++ b/docs/cpque_api.md @@ -72,14 +72,14 @@ int main(void) { intptr_t N = 10000000; crand_t rng = crand_init(1234); - crand_unif_t dist = crand_unif_init(0, N * 10); + crand_uniform_t dist = crand_uniform_init(0, N * 10); // Define heap cpque_i heap = {0}; // Push ten million random numbers to priority queue. c_forrange (N) - cpque_i_push(&heap, crand_unif(&rng, &dist)); + cpque_i_push(&heap, crand_uniform(&rng, &dist)); // Add some negative ones. int nums[] = {-231, -32, -873, -4, -343}; diff --git a/docs/crandom_api.md b/docs/crandom_api.md index 22a4f4dd..88924784 100644 --- a/docs/crandom_api.md +++ b/docs/crandom_api.md @@ -1,30 +1,29 @@ # STC [crand](../include/stc/crand.h): Pseudo Random Number Generator ![Random](pics/random.jpg) -This features a *64-bit PRNG* named **stc64**, and can generate bounded uniform and normal +This features a *64-bit PRNG* named **crand64**, and can generate bounded uniform and normal distributed random numbers. See [random](https://en.cppreference.com/w/cpp/header/random) for similar c++ functionality. ## Description -**stc64** is a novel, extremely fast PRNG by Tyge Løvset, suited for parallel usage. It features -Weyl-sequences as part of its state. It is inspired on *sfc64*, but has a different output function +**crand64** is a novel, very fast PRNG, suited for parallel usage. It features a +Weyl-sequence as part of its state. It is based on *sfc64*, but has a different output function and state size. -**sfc64** is the fastest among *pcg*, *xoshiro`**`*, and *lehmer*. It is equally fast as *sfc64* on -most platforms. *wyrand* is faster on platforms with fast 128-bit multiplication, and has 2^64 period -length (https://github.com/lemire/SwiftWyhash/issues/10). However, *wyrand* is not suited for massive -parallel usage due to its limited total minimal period length. +**sfc64** is the fastest among *pcg*, *xoshiro`**`*, and *lehmer*. It is equally fast or faster than +*sfc64* on most platforms. *wyrand* is faster on platforms with fast 128-bit multiplication, and has +2^64 period (https://github.com/lemire/SwiftWyhash/issues/10). *wyrand* is not suited for massive +parallel usage due to its limited minimal period. -**stc64** does not require multiplication or 128-bit integer operations. It has 320 bit state, -but updates only 256 bit per generated number. +**crand64** does not require multiplication or 128-bit integer operations. It has 320 bit state, +where 64-bits are constant per prng instance created. There is no *jump function*, but each odd number Weyl-increment (state[4]) starts a new -unique 2^64 *minimum* length period. For a single thread, a minimum period of 2^127 is generated -when the Weyl-increment is incremented by 2 every 2^64 output. +unique 2^64 *minimum* length period, i.e. virtually unlimitied number of unique threads. -**stc64** passes *PractRand* (tested up to 8TB output), Vigna's Hamming weight test, and simple +**crand64** passes *PractRand* (tested up to 8TB output), Vigna's Hamming weight test, and simple correlation tests, i.e. *n* interleaved streams with only one-bit differences in initial state. Also 32-bit and 16-bit versions passes PractRand up to their size limits. @@ -41,27 +40,27 @@ All crand definitions and prototypes are available by including a single header ## Methods ```c -void csrand(uint64_t seed); // seed global stc64 prng +void csrand(uint64_t seed); // seed global crand64 prng uint64_t crand(void); // global crand_u64(rng) double crandf(void); // global crand_f64(rng) -crand_t crand_init(uint64_t seed); // stc64_init(s) is deprecated +crand_t crand_init(uint64_t seed); uint64_t crand_u64(crand_t* rng); // range [0, 2^64 - 1] double crand_f64(crand_t* rng); // range [0.0, 1.0) -crand_unif_t crand_unif_init(int64_t low, int64_t high); // uniform-distribution -int64_t crand_unif(crand_t* rng, crand_unif_t* dist); // range [low, high] +crand_uniform_t crand_uniform_init(int64_t low, int64_t high); // uniform-distribution range +int64_t crand_uniform(crand_t* rng, crand_uniform_t* dist); -crand_norm_t crand_norm_init(double mean, double stddev); // normal-distribution -double crand_norm(crand_t* rng, crand_norm_t* dist); +crand_normal_t crand_normal_init(double mean, double stddev); // normal-gauss distribution +double crand_normal(crand_t* rng, crand_normal_t* dist); ``` ## Types | Name | Type definition | Used to represent... | |:-------------------|:------------------------------------------|:-----------------------------| | `crand_t` | `struct {uint64_t state[4];}` | The PRNG engine type | -| `crand_unif_t` | `struct {int64_t lower; uint64_t range;}` | Integer uniform distribution | -| `crand_norm_t` | `struct {double mean, stddev;}` | Normal distribution type | +| `crand_uniform_t` | `struct {int64_t lower; uint64_t range;}` | Integer uniform distribution | +| `crand_normal_t` | `struct {double mean, stddev;}` | Normal distribution type | ## Example ```c @@ -86,17 +85,17 @@ int main(void) // Setup random engine with normal distribution. uint64_t seed = time(NULL); crand_t rng = crand_init(seed); - crand_norm_t dist = crand_norm_init(Mean, StdDev); + crand_normal_t dist = crand_normal_init(Mean, StdDev); // Create histogram map - csmap_i mhist = csmap_i_init(); + csmap_i mhist = {0}; c_forrange (N) { - int index = (int)round(crand_norm(&rng, &dist)); + int index = (int)round(crand_normal(&rng, &dist)); csmap_i_emplace(&mhist, index, 0).ref->second += 1; } // Print the gaussian bar chart - cstr bar = cstr_init(); + cstr bar = {0}; c_foreach (i, csmap_i, mhist) { int n = (int)(i.ref->second * StdDev * Scale * 2.5 / N); if (n > 0) { diff --git a/include/stc/algo/coroutine.h b/include/stc/algo/coroutine.h deleted file mode 100644 index 7c6989c3..00000000 --- a/include/stc/algo/coroutine.h +++ /dev/null @@ -1,274 +0,0 @@ -/* MIT License - * - * Copyright (c) 2023 Tyge Løvset - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef STC_COROUTINE_INCLUDED -#define STC_COROUTINE_INCLUDED -/* -#include -#include - -struct iterpair { - int max_x, max_y; - int x, y; - int cco_state; // required member -}; - -int iterpair(struct iterpair* I) { - cco_routine(I) { - for (I->x = 0; I->x < I->max_x; I->x++) - for (I->y = 0; I->y < I->max_y; I->y++) - cco_yield(); - - cco_cleanup: // required if there is cleanup code - puts("final"); - } - return 0; // CCO_DONE -} - -int main(void) { - struct iterpair it = {.max_x=3, .max_y=3}; - int n = 0; - while (iterpair(&it)) - { - printf("%d %d\n", it.x, it.y); - // example of early stop: - if (++n == 7) cco_stop(&it); // signal to stop/finalize in next - } - return 0; -} -*/ -#include "../ccommon.h" - -enum { - CCO_STATE_CLEANUP = -1, - CCO_STATE_DONE = -2, -}; -typedef enum { - CCO_DONE = 0, - CCO_AWAIT = 1<<0, - CCO_YIELD = 1<<1, -} cco_result; - -#define cco_initial(co) ((co)->cco_state == 0) -#define cco_suspended(co) ((co)->cco_state > 0) -#define cco_done(co) ((co)->cco_state == CCO_STATE_DONE) - -#define cco_routine(co) \ - for (int *_state = &(co)->cco_state; *_state != CCO_STATE_DONE; *_state = CCO_STATE_DONE) \ - _resume: switch (*_state) case 0: // thanks, @liigo! - -#define cco_yield() cco_yield_v(CCO_YIELD) -#define cco_yield_v(ret) \ - do { \ - *_state = __LINE__; return ret; goto _resume; \ - case __LINE__:; \ - } while (0) - -#define cco_await(promise) cco_await_v_2(promise, CCO_AWAIT) -#define cco_await_v(...) c_MACRO_OVERLOAD(cco_await_v, __VA_ARGS__) -#define cco_await_v_1(promise) cco_await_v_2(promise, ) -#define cco_await_v_2(promise, ret) \ - do { \ - *_state = __LINE__; \ - case __LINE__: if (!(promise)) {return ret; goto _resume;} \ - } while (0) - -/* cco_await_on(): assumes coroutine returns a cco_result value (int) */ -#define cco_await_on(corocall) \ - do { \ - *_state = __LINE__; \ - case __LINE__: { int _r = corocall; if (_r != CCO_DONE) {return _r; goto _resume;} } \ - } while (0) - -/* cco_block_on(): assumes coroutine returns a cco_result value (int) */ -#define cco_block_on(...) c_MACRO_OVERLOAD(cco_block_on, __VA_ARGS__) -#define cco_block_on_1(corocall) while ((corocall) != CCO_DONE) -#define cco_block_on_2(corocall, result) while ((*(result) = (corocall)) != CCO_DONE) - -#define cco_cleanup \ - *_state = CCO_STATE_CLEANUP; case CCO_STATE_CLEANUP - -#define cco_return \ - do { \ - *_state = *_state >= 0 ? CCO_STATE_CLEANUP : CCO_STATE_DONE; \ - goto _resume; \ - } while (0) - -#define cco_yield_final() cco_yield_final_v(CCO_YIELD) -#define cco_yield_final_v(value) \ - do { \ - *_state = *_state >= 0 ? CCO_STATE_CLEANUP : CCO_STATE_DONE; \ - return value; \ - } while (0) - -#define cco_stop(co) \ - do { \ - int* _s = &(co)->cco_state; \ - if (*_s > 0) *_s = CCO_STATE_CLEANUP; \ - else if (*_s == 0) *_s = CCO_STATE_DONE; \ - } while (0) - -#define cco_reset(co) \ - (void)((co)->cco_state = 0) - -/* - * Tasks (optional) - */ - -struct cco_runtime; - -#define cco_task_struct(Name, ...) \ - struct Name { \ - int (*cco_fn)(struct Name*, struct cco_runtime*); \ - int cco_state, cco_expect; \ - __VA_ARGS__ \ - } - -typedef cco_task_struct(cco_task, /**/) cco_task; - -typedef struct cco_runtime { - int result, top; - cco_task* stack[]; -} cco_runtime; - -#define cco_cast_task(task) \ - ((cco_task *)(task) + 0*sizeof((task)->cco_fn(task, (cco_runtime*)0) + ((int*)0 == &(task)->cco_state))) - -#define cco_resume(task, rt) \ - (task)->cco_fn(task, rt) - -#define cco_block_task(...) c_MACRO_OVERLOAD(cco_block_task, __VA_ARGS__) -#define cco_block_task_1(task) cco_block_task_3(task, rt, 16) -#define cco_block_task_3(task, rt, STACKDEPTH) \ - for (struct { int result, top; cco_task* stack[STACKDEPTH]; } rt = {.stack={cco_cast_task(task)}}; \ - (((rt.result = cco_resume(rt.stack[rt.top], (cco_runtime*)&rt)) & rt.stack[rt.top]->cco_expect) || --rt.top >= 0); ) - -#define cco_await_task(...) c_MACRO_OVERLOAD(cco_await_task, __VA_ARGS__) -#define cco_await_task_2(task, rt) cco_await_task_3(task, rt, CCO_DONE) -#define cco_await_task_3(task, rt, resultbits) \ - do { \ - cco_runtime* _rt = rt; \ - (_rt->stack[++_rt->top] = cco_cast_task(task))->cco_expect = ~(resultbits); \ - cco_yield_v(CCO_AWAIT); \ - } while (0) - -/* - * Semaphore - */ - -typedef struct { intptr_t count; } cco_sem; - -#define cco_sem_await(sem) cco_sem_await_v_2(sem, CCO_AWAIT) -#define cco_sem_await_v(...) c_MACRO_OVERLOAD(cco_sem_await_v, __VA_ARGS__) -#define cco_sem_await_v_1(sem) cco_sem_await_v_2(sem, ) -#define cco_sem_await_v_2(sem, ret) \ - do { \ - cco_await_v_2((sem)->count > 0, ret); \ - --(sem)->count; \ - } while (0) - -#define cco_sem_release(sem) ++(sem)->count -#define cco_sem_from(value) ((cco_sem){value}) -#define cco_sem_set(sem, value) ((sem)->count = value) - -/* - * Timer - */ - -#ifdef _WIN32 - #ifdef __cplusplus - #define _c_LINKC extern "C" __declspec(dllimport) - #else - #define _c_LINKC __declspec(dllimport) - #endif - #if 1 // _WIN32_WINNT < _WIN32_WINNT_WIN8 || defined __TINYC__ - #define _c_getsystime GetSystemTimeAsFileTime - #else - #define _c_getsystime GetSystemTimePreciseAsFileTime - #endif - struct _FILETIME; - _c_LINKC void _c_getsystime(struct _FILETIME*); - _c_LINKC void Sleep(unsigned long); - - static inline double cco_time(void) { /* seconds since epoch */ - unsigned long long quad; /* 64-bit value representing 1/10th usecs since Jan 1 1601, 00:00 UTC */ - _c_getsystime((struct _FILETIME*)&quad); - return (double)(quad - 116444736000000000ULL)*1e-7; /* time diff Jan 1 1601-Jan 1 1970 in 1/10th usecs */ - } - - static inline void cco_sleep(double sec) { - Sleep((unsigned long)(sec*1000.0)); - } -#else - #include - static inline double cco_time(void) { /* seconds since epoch */ - struct timeval tv; - gettimeofday(&tv, NULL); - return (double)tv.tv_sec + (double)tv.tv_usec*1e-6; - } - - static inline void cco_sleep(double sec) { - struct timeval tv; - tv.tv_sec = (time_t)sec; - tv.tv_usec = (suseconds_t)((sec - (double)(long)sec)*1e6); - select(0, NULL, NULL, NULL, &tv); - } -#endif - -typedef struct { double interval, start; } cco_timer; - -#define cco_timer_await(tm, sec) cco_timer_await_v_3(tm, sec, CCO_AWAIT) -#define cco_timer_await_v(...) c_MACRO_OVERLOAD(cco_timer_await_v, __VA_ARGS__) -#define cco_timer_await_v_2(tm, sec) cco_timer_await_v_3(tm, sec, ) -#define cco_timer_await_v_3(tm, sec, ret) \ - do { \ - cco_timer_start(tm, sec); \ - cco_await_v_2(cco_timer_expired(tm), ret); \ - } while (0) - -static inline void cco_timer_start(cco_timer* tm, double sec) { - tm->interval = sec; - tm->start = cco_time(); -} - -static inline cco_timer cco_timer_from(double sec) { - cco_timer tm = {.interval=sec, .start=cco_time()}; - return tm; -} - -static inline void cco_timer_restart(cco_timer* tm) { - tm->start = cco_time(); -} - -static inline bool cco_timer_expired(cco_timer* tm) { - return cco_time() - tm->start >= tm->interval; -} - -static inline double cco_timer_elapsed(cco_timer* tm) { - return cco_time() - tm->start; -} - -static inline double cco_timer_remaining(cco_timer* tm) { - return tm->start + tm->interval - cco_time(); -} - -#endif diff --git a/include/stc/algorithm.h b/include/stc/algorithm.h new file mode 100644 index 00000000..cf3ab328 --- /dev/null +++ b/include/stc/algorithm.h @@ -0,0 +1,8 @@ +#ifndef STC_CALGO_INCLUDED +#define STC_CALGO_INCLUDED + +#include "algo/raii.h" +#include "algo/crange.h" +#include "algo/filter.h" + +#endif diff --git a/include/stc/calgo.h b/include/stc/calgo.h deleted file mode 100644 index 63ef97b9..00000000 --- a/include/stc/calgo.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef STC_CALGO_INCLUDED -#define STC_CALGO_INCLUDED - -#include "algo/raii.h" -#include "algo/crange.h" -#include "algo/filter.h" -#include "algo/coroutine.h" - -#endif diff --git a/include/stc/coroutine.h b/include/stc/coroutine.h new file mode 100644 index 00000000..f89d20af --- /dev/null +++ b/include/stc/coroutine.h @@ -0,0 +1,273 @@ +/* MIT License + * + * Copyright (c) 2023 Tyge Løvset + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef STC_COROUTINE_INCLUDED +#define STC_COROUTINE_INCLUDED +/* +#include +#include + +struct iterpair { + int max_x, max_y; + int x, y; + int cco_state; // required member +}; + +int iterpair(struct iterpair* I) { + cco_routine(I) { + for (I->x = 0; I->x < I->max_x; I->x++) + for (I->y = 0; I->y < I->max_y; I->y++) + cco_yield(); + + cco_cleanup: // required if there is cleanup code + puts("final"); + } + return 0; // CCO_DONE +} + +int main(void) { + struct iterpair it = {.max_x=3, .max_y=3}; + int n = 0; + while (iterpair(&it)) + { + printf("%d %d\n", it.x, it.y); + // example of early stop: + if (++n == 7) cco_stop(&it); // signal to stop/finalize in next + } + return 0; +} +*/ +#include "ccommon.h" + +enum { + CCO_STATE_CLEANUP = -1, + CCO_STATE_DONE = -2, +}; +typedef enum { + CCO_DONE = 0, + CCO_AWAIT = 1<<0, + CCO_YIELD = 1<<1, +} cco_result; + +#define cco_initial(co) ((co)->cco_state == 0) +#define cco_suspended(co) ((co)->cco_state > 0) +#define cco_done(co) ((co)->cco_state == CCO_STATE_DONE) + +#define cco_routine(co) \ + for (int* _state = &(co)->cco_state; *_state != CCO_STATE_DONE; *_state = CCO_STATE_DONE) \ + _resume: switch (*_state) case 0: // thanks, @liigo! + +#define cco_yield() cco_yield_v(CCO_YIELD) +#define cco_yield_v(ret) \ + do { \ + *_state = __LINE__; return ret; goto _resume; \ + case __LINE__:; \ + } while (0) + +#define cco_await(promise) cco_await_v_2(promise, CCO_AWAIT) +#define cco_await_v(...) c_MACRO_OVERLOAD(cco_await_v, __VA_ARGS__) +#define cco_await_v_1(promise) cco_await_v_2(promise, ) +#define cco_await_v_2(promise, ret) \ + do { \ + *_state = __LINE__; \ + case __LINE__: if (!(promise)) {return ret; goto _resume;} \ + } while (0) + +/* cco_await_on(): assumes coroutine returns a cco_result value (int) */ +#define cco_await_on(corocall) \ + do { \ + *_state = __LINE__; \ + case __LINE__: { int _r = corocall; if (_r != CCO_DONE) {return _r; goto _resume;} } \ + } while (0) + +/* cco_block_on(): assumes coroutine returns a cco_result value (int) */ +#define cco_block_on(...) c_MACRO_OVERLOAD(cco_block_on, __VA_ARGS__) +#define cco_block_on_1(corocall) while ((corocall) != CCO_DONE) +#define cco_block_on_2(corocall, result) while ((*(result) = (corocall)) != CCO_DONE) + +#define cco_cleanup \ + *_state = CCO_STATE_CLEANUP; case CCO_STATE_CLEANUP + +#define cco_return \ + do { \ + *_state = *_state >= 0 ? CCO_STATE_CLEANUP : CCO_STATE_DONE; \ + goto _resume; \ + } while (0) + +#define cco_yield_final() cco_yield_final_v(CCO_YIELD) +#define cco_yield_final_v(value) \ + do { \ + *_state = *_state >= 0 ? CCO_STATE_CLEANUP : CCO_STATE_DONE; \ + return value; \ + } while (0) + +#define cco_stop(co) \ + do { \ + int* _s = &(co)->cco_state; \ + if (*_s > 0) *_s = CCO_STATE_CLEANUP; \ + else if (*_s == 0) *_s = CCO_STATE_DONE; \ + } while (0) + +#define cco_reset(co) \ + (void)((co)->cco_state = 0) + +/* + * Tasks (optional) + */ + +struct cco_runtime; + +#define cco_task_struct(Name, ...) \ + struct Name { \ + int (*cco_func)(struct Name*, struct cco_runtime*); \ + int cco_state, cco_expect; \ + __VA_ARGS__ \ + } + +typedef cco_task_struct(cco_task, /**/) cco_task; + +typedef struct cco_runtime { + int result, top; cco_task* stack[]; +} cco_runtime; + +#define cco_cast_task(task) \ + ((cco_task *)(task) + 0*sizeof((task)->cco_func(task, (cco_runtime*)0) + ((int*)0 == &(task)->cco_state))) + +#define cco_resume(task, rt) \ + (task)->cco_func(task, rt) + +#define cco_task_await(...) c_MACRO_OVERLOAD(cco_task_await, __VA_ARGS__) +#define cco_task_await_2(task, rt) cco_task_await_3(task, rt, CCO_DONE) +#define cco_task_await_3(task, rt, resultbits) \ + do { \ + cco_runtime* _rt = rt; \ + (_rt->stack[++_rt->top] = cco_cast_task(task))->cco_expect = (resultbits); \ + cco_yield_v(CCO_AWAIT); \ + } while (0) + +#define cco_task_block_on(...) c_MACRO_OVERLOAD(cco_task_block_on, __VA_ARGS__) +#define cco_task_block_on_1(task) cco_task_block_on_3(task, _rt, 16) +#define cco_task_block_on_3(task, rt, STACKDEPTH) \ + for (struct { int result, top; cco_task* stack[STACKDEPTH]; } rt = {.stack={cco_cast_task(task)}}; \ + (((rt.result = cco_resume(rt.stack[rt.top], (cco_runtime*)&rt)) & ~rt.stack[rt.top]->cco_expect) || --rt.top >= 0); ) + +/* + * Semaphore + */ + +typedef struct { intptr_t count; } cco_sem; + +#define cco_sem_await(sem) cco_sem_await_v_2(sem, CCO_AWAIT) +#define cco_sem_await_v(...) c_MACRO_OVERLOAD(cco_sem_await_v, __VA_ARGS__) +#define cco_sem_await_v_1(sem) cco_sem_await_v_2(sem, ) +#define cco_sem_await_v_2(sem, ret) \ + do { \ + cco_await_v_2((sem)->count > 0, ret); \ + --(sem)->count; \ + } while (0) + +#define cco_sem_release(sem) ++(sem)->count +#define cco_sem_from(value) ((cco_sem){value}) +#define cco_sem_set(sem, value) ((sem)->count = value) + +/* + * Timer + */ + +#ifdef _WIN32 + #ifdef __cplusplus + #define _c_LINKC extern "C" __declspec(dllimport) + #else + #define _c_LINKC __declspec(dllimport) + #endif + #if 1 // _WIN32_WINNT < _WIN32_WINNT_WIN8 || defined __TINYC__ + #define _c_getsystime GetSystemTimeAsFileTime + #else + #define _c_getsystime GetSystemTimePreciseAsFileTime + #endif + struct _FILETIME; + _c_LINKC void _c_getsystime(struct _FILETIME*); + _c_LINKC void Sleep(unsigned long); + + static inline double cco_time(void) { /* seconds since epoch */ + unsigned long long quad; /* 64-bit value representing 1/10th usecs since Jan 1 1601, 00:00 UTC */ + _c_getsystime((struct _FILETIME*)&quad); + return (double)(quad - 116444736000000000ULL)*1e-7; /* time diff Jan 1 1601-Jan 1 1970 in 1/10th usecs */ + } + + static inline void cco_sleep(double sec) { + Sleep((unsigned long)(sec*1000.0)); + } +#else + #include + static inline double cco_time(void) { /* seconds since epoch */ + struct timeval tv; + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec*1e-6; + } + + static inline void cco_sleep(double sec) { + struct timeval tv; + tv.tv_sec = (time_t)sec; + tv.tv_usec = (suseconds_t)((sec - (double)(long)sec)*1e6); + select(0, NULL, NULL, NULL, &tv); + } +#endif + +typedef struct { double interval, start; } cco_timer; + +#define cco_timer_await(tm, sec) cco_timer_await_v_3(tm, sec, CCO_AWAIT) +#define cco_timer_await_v(...) c_MACRO_OVERLOAD(cco_timer_await_v, __VA_ARGS__) +#define cco_timer_await_v_2(tm, sec) cco_timer_await_v_3(tm, sec, ) +#define cco_timer_await_v_3(tm, sec, ret) \ + do { \ + cco_timer_start(tm, sec); \ + cco_await_v_2(cco_timer_expired(tm), ret); \ + } while (0) + +static inline void cco_timer_start(cco_timer* tm, double sec) { + tm->interval = sec; + tm->start = cco_time(); +} + +static inline cco_timer cco_timer_from(double sec) { + cco_timer tm = {.interval=sec, .start=cco_time()}; + return tm; +} + +static inline void cco_timer_restart(cco_timer* tm) { + tm->start = cco_time(); +} + +static inline bool cco_timer_expired(cco_timer* tm) { + return cco_time() - tm->start >= tm->interval; +} + +static inline double cco_timer_elapsed(cco_timer* tm) { + return cco_time() - tm->start; +} + +static inline double cco_timer_remaining(cco_timer* tm) { + return tm->start + tm->interval - cco_time(); +} + +#endif diff --git a/include/stc/crand.h b/include/stc/crand.h index 0a6aa9e0..32722762 100644 --- a/include/stc/crand.h +++ b/include/stc/crand.h @@ -32,20 +32,20 @@ int main(void) { uint64_t seed = 123456789; crand_t rng = crand_init(seed); - crand_unif_t dist1 = crand_unif_init(1, 6); - crand_norm_t dist3 = crand_norm_init(1.0, 10.0); + crand_uniform_t dist1 = crand_uniform_init(1, 6); + crand_normal_t dist3 = crand_normal_init(1.0, 10.0); uint64_t i = crand_u64(&rng); - int64_t iu = crand_unif(&rng, &dist1); - double xn = crand_norm(&rng, &dist3); + int64_t iu = crand_uniform(&rng, &dist1); + double xn = crand_normal(&rng, &dist3); } */ #include #include typedef struct crand { uint64_t state[5]; } crand_t; -typedef struct crand_unif { int64_t lower; uint64_t range, threshold; } crand_unif_t; -typedef struct crand_norm { double mean, stddev, next; int has_next; } crand_norm_t; +typedef struct crand_uniform { int64_t lower; uint64_t range, threshold; } crand_uniform_t; +typedef struct crand_normal { double mean, stddev, next; int has_next; } crand_normal_t; /* PRNG crand_t. * Very fast PRNG suited for parallel usage with Weyl-sequence parameter. @@ -67,14 +67,14 @@ STC_API double crandf(void); STC_API crand_t crand_init(uint64_t seed); /* Unbiased bounded uniform distribution. range [low, high] */ -STC_API crand_unif_t crand_unif_init(int64_t low, int64_t high); -STC_API int64_t crand_unif(crand_t* rng, crand_unif_t* dist); +STC_API crand_uniform_t crand_uniform_init(int64_t low, int64_t high); +STC_API int64_t crand_uniform(crand_t* rng, crand_uniform_t* dist); /* Normal/gaussian distribution. */ -STC_INLINE crand_norm_t crand_norm_init(double mean, double stddev) - { crand_norm_t r = {mean, stddev, 0.0, 0}; return r; } +STC_INLINE crand_normal_t crand_normal_init(double mean, double stddev) + { crand_normal_t r = {mean, stddev, 0.0, 0}; return r; } -STC_API double crand_norm(crand_t* rng, crand_norm_t* dist); +STC_API double crand_normal(crand_t* rng, crand_normal_t* dist); /* Main crand_t prng */ STC_INLINE uint64_t crand_u64(crand_t* rng) { @@ -95,11 +95,10 @@ STC_INLINE double crand_f64(crand_t* rng) { /* -------------------------- IMPLEMENTATION ------------------------- */ #if defined(i_implement) || defined(i_static) -/* Global random() */ -static crand_t crand_global = {{ - 0x26aa069ea2fb1a4d, 0x70c72c95cd592d04, - 0x504f333d3aa0b359, 0x9e3779b97f4a7c15, - 0x6a09e667a754166b +/* Global random seed */ +static crand_t crand_global = {{ // csrand(0) + 0x9e3779b97f4a7c15, 0x6f68261b57e7a770, + 0xe220a838bf5c9dde, 0x7c17d1800457b1ba, 0x1, }}; STC_DEF void csrand(uint64_t seed) @@ -116,20 +115,20 @@ STC_DEF crand_t crand_init(uint64_t seed) { s[0] = seed + 0x9e3779b97f4a7c15; s[1] = (s[0] ^ (s[0] >> 30))*0xbf58476d1ce4e5b9; s[2] = (s[1] ^ (s[1] >> 27))*0x94d049bb133111eb; - s[3] = (s[2] ^ (s[2] >> 31)); - s[4] = ((seed + 0x6aa069ea2fb1a4d) << 1) | 1; + s[3] = s[0] ^ s[2] ^ (s[2] >> 31); + s[4] = (seed << 1) | 1; return rng; } /* Init unbiased uniform uint RNG with bounds [low, high] */ -STC_DEF crand_unif_t crand_unif_init(int64_t low, int64_t high) { - crand_unif_t dist = {low, (uint64_t) (high - low + 1)}; +STC_DEF crand_uniform_t crand_uniform_init(int64_t low, int64_t high) { + crand_uniform_t dist = {low, (uint64_t) (high - low + 1)}; dist.threshold = (uint64_t)(0 - dist.range) % dist.range; return dist; } /* Int64 uniform distributed RNG, range [low, high]. */ -STC_DEF int64_t crand_unif(crand_t* rng, crand_unif_t* d) { +STC_DEF int64_t crand_uniform(crand_t* rng, crand_uniform_t* d) { uint64_t lo, hi; #ifdef c_umul128 do { c_umul128(crand_u64(rng), d->range, &lo, &hi); } while (lo < d->threshold); @@ -140,7 +139,7 @@ STC_DEF int64_t crand_unif(crand_t* rng, crand_unif_t* d) { } /* Normal distribution PRNG. Marsaglia polar method */ -STC_DEF double crand_norm(crand_t* rng, crand_norm_t* dist) { +STC_DEF double crand_normal(crand_t* rng, crand_normal_t* dist) { double u1, u2, s, m; if (dist->has_next++ & 1) return dist->next*dist->stddev + dist->mean; diff --git a/misc/examples/algorithms/forfilter.c b/misc/examples/algorithms/forfilter.c index f3c008b3..644b8459 100644 --- a/misc/examples/algorithms/forfilter.c +++ b/misc/examples/algorithms/forfilter.c @@ -3,8 +3,7 @@ #include #define i_implement #include -#include -#include +#include #define i_type IVec #define i_key int diff --git a/misc/examples/algorithms/forloops.c b/misc/examples/algorithms/forloops.c index 72d745f8..300eee18 100644 --- a/misc/examples/algorithms/forloops.c +++ b/misc/examples/algorithms/forloops.c @@ -1,5 +1,5 @@ #include -#include +#include #define i_type IVec #define i_key int diff --git a/misc/examples/algorithms/random.c b/misc/examples/algorithms/random.c index b7c0f277..ccd0711d 100644 --- a/misc/examples/algorithms/random.c +++ b/misc/examples/algorithms/random.c @@ -4,11 +4,11 @@ int main(void) { - const int N = 1000000000; + long long N = 1000000000; const uint64_t seed = (uint64_t)time(NULL), range = 1000000; crand_t rng = crand_init(seed); - int64_t sum; + long long sum; clock_t diff, before; printf("Compare speed of full and unbiased ranged random numbers...\n"); @@ -18,19 +18,19 @@ int main(void) sum += (uint32_t)crand_u64(&rng); } diff = clock() - before; - printf("full range\t\t: %f secs, %d, avg: %f\n", - (double)diff/CLOCKS_PER_SEC, N, (double)sum/N); + printf("full range\t\t: %f secs, %lld, avg: %f\n", + (double)diff/CLOCKS_PER_SEC, N, (double)(sum/N)); - crand_unif_t dist1 = crand_unif_init(0, range); + crand_uniform_t dist1 = crand_uniform_init(0, range); rng = crand_init(seed); sum = 0; before = clock(); c_forrange (N) { - sum += crand_unif(&rng, &dist1); // unbiased + sum += crand_uniform(&rng, &dist1); // unbiased } diff = clock() - before; - printf("unbiased 0-%" PRIu64 "\t: %f secs, %d, avg: %f\n", - range, (double)diff/CLOCKS_PER_SEC, N, (double)sum/N); + printf("unbiased 0-%" PRIu64 "\t: %f secs, %lld, avg: %f\n", + range, (double)diff/CLOCKS_PER_SEC, N, (double)(sum/N)); sum = 0; rng = crand_init(seed); @@ -39,7 +39,7 @@ int main(void) sum += (int64_t)(crand_u64(&rng) % (range + 1)); // biased } diff = clock() - before; - printf("biased 0-%" PRIu64 " \t: %f secs, %d, avg: %f\n", - range, (double)diff/CLOCKS_PER_SEC, N, (double)sum/N); + printf("biased 0-%" PRIu64 " \t: %f secs, %lld, avg: %f\n", + range, (double)diff/CLOCKS_PER_SEC, N, (double)(sum/N)); } diff --git a/misc/examples/bitsets/prime.c b/misc/examples/bitsets/prime.c index cb3b095a..e5764d83 100644 --- a/misc/examples/bitsets/prime.c +++ b/misc/examples/bitsets/prime.c @@ -2,23 +2,23 @@ #include #include #include -#include -#include +#include +typedef long long llong; -cbits sieveOfEratosthenes(int64_t n) +cbits sieveOfEratosthenes(llong n) { cbits bits = cbits_with_size(n/2 + 1, true); - int64_t q = (int64_t)sqrt((double) n) + 1; - for (int64_t i = 3; i < q; i += 2) { - int64_t j = i; + llong q = (llong)sqrt((double) n) + 1; + for (llong i = 3; i < q; i += 2) { + llong j = i; for (; j < n; j += 2) { if (cbits_test(&bits, j>>1)) { i = j; break; } } - for (int64_t j = i*i; j < n; j += i*2) + for (llong j = i*i; j < n; j += i*2) cbits_reset(&bits, j>>1); } return bits; @@ -26,12 +26,12 @@ cbits sieveOfEratosthenes(int64_t n) int main(void) { - int n = 1000000000; - printf("Computing prime numbers up to %d\n", n); + llong n = 100000000; + printf("Computing prime numbers up to %lld\n", n); clock_t t = clock(); cbits primes = sieveOfEratosthenes(n + 1); - int np = (int)cbits_count(&primes); + llong np = cbits_count(&primes); t = clock() - t; puts("Show all the primes in the range [2, 1000):"); @@ -50,7 +50,7 @@ int main(void) printf("%lld ", *i.ref); if (c_flt_getcount(i) % 10 == 0) puts(""); } - printf("Number of primes: %d, time: %.2f\n\n", np, (double)t/CLOCKS_PER_SEC); + printf("Number of primes: %lld, time: %.2f\n\n", np, (double)t/CLOCKS_PER_SEC); cbits_drop(&primes); } diff --git a/misc/examples/coroutines/cointerleave.c b/misc/examples/coroutines/cointerleave.c index 599ceaab..ea0d4dac 100644 --- a/misc/examples/coroutines/cointerleave.c +++ b/misc/examples/coroutines/cointerleave.c @@ -1,6 +1,6 @@ // https://www.youtube.com/watch?v=8sEe-4tig_A #include -#include +#include #define i_type IVec #define i_key int #include diff --git a/misc/examples/coroutines/coread.c b/misc/examples/coroutines/coread.c index a13f6be5..56248108 100644 --- a/misc/examples/coroutines/coread.c +++ b/misc/examples/coroutines/coread.c @@ -1,6 +1,6 @@ #define i_implement #include -#include +#include #include // Read file line by line using coroutines: diff --git a/misc/examples/coroutines/coroutines.c b/misc/examples/coroutines/coroutines.c index b8dfaa13..de0fcda5 100644 --- a/misc/examples/coroutines/coroutines.c +++ b/misc/examples/coroutines/coroutines.c @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/misc/examples/coroutines/cotasks1.c b/misc/examples/coroutines/cotasks1.c index c87582f1..27999ccf 100644 --- a/misc/examples/coroutines/cotasks1.c +++ b/misc/examples/coroutines/cotasks1.c @@ -4,7 +4,7 @@ #include #define i_static #include -#include +#include struct next_value { int val; diff --git a/misc/examples/coroutines/cotasks2.c b/misc/examples/coroutines/cotasks2.c index 293583bc..9ca69bda 100644 --- a/misc/examples/coroutines/cotasks2.c +++ b/misc/examples/coroutines/cotasks2.c @@ -4,7 +4,7 @@ #include #define i_static #include -#include +#include cco_task_struct (next_value, int val; @@ -45,7 +45,7 @@ int produce_items(struct produce_items* p, cco_runtime* rt) while (true) { // await for next CCO_YIELD in next_value() - cco_await_task(&p->next, rt, CCO_YIELD); + cco_task_await(&p->next, rt, CCO_YIELD); cstr_printf(&p->str, "item %d", p->next.val); print_time(); printf("produced %s\n", cstr_str(&p->str)); @@ -71,7 +71,7 @@ int consume_items(struct consume_items* c, cco_runtime* rt) for (c->i = 1; c->i <= c->n; ++c->i) { printf("consume #%d\n", c->i); - cco_await_task(&c->produce, rt, CCO_YIELD); + cco_task_await(&c->produce, rt, CCO_YIELD); print_time(); printf("consumed %s\n", cstr_str(&c->produce.str)); } @@ -87,12 +87,12 @@ int main(void) { struct consume_items consume = { .n=5, - .cco_fn=consume_items, - .produce={.cco_fn=produce_items, .next={.cco_fn=next_value}}, + .cco_func=consume_items, + .produce={.cco_func=produce_items, .next={.cco_func=next_value}}, }; int count = 0; - cco_block_task(&consume) + cco_task_block_on(&consume) { ++count; //cco_sleep(0.001); diff --git a/misc/examples/coroutines/dining_philosophers.c b/misc/examples/coroutines/dining_philosophers.c index a5063a42..abe09204 100644 --- a/misc/examples/coroutines/dining_philosophers.c +++ b/misc/examples/coroutines/dining_philosophers.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include // Define the number of philosophers and forks enum { diff --git a/misc/examples/coroutines/generator.c b/misc/examples/coroutines/generator.c index a15f9ba5..f9e59fea 100644 --- a/misc/examples/coroutines/generator.c +++ b/misc/examples/coroutines/generator.c @@ -1,7 +1,7 @@ // https://quuxplusone.github.io/blog/2019/03/06/pythagorean-triples/ -#include #include +#include typedef struct { int size; diff --git a/misc/examples/coroutines/scheduler.c b/misc/examples/coroutines/scheduler.c index 38defd0f..78461277 100644 --- a/misc/examples/coroutines/scheduler.c +++ b/misc/examples/coroutines/scheduler.c @@ -1,6 +1,6 @@ // https://www.youtube.com/watch?v=8sEe-4tig_A #include -#include +#include struct Task { int (*fn)(struct Task*); diff --git a/misc/examples/coroutines/triples.c b/misc/examples/coroutines/triples.c index 9f2fcc1e..fe1ca7c3 100644 --- a/misc/examples/coroutines/triples.c +++ b/misc/examples/coroutines/triples.c @@ -1,7 +1,7 @@ // https://quuxplusone.github.io/blog/2019/03/06/pythagorean-triples/ -#include #include +#include int gcd(int a, int b) { while (b) { diff --git a/misc/examples/linkedlists/list.c b/misc/examples/linkedlists/list.c index ad8bebb8..09591314 100644 --- a/misc/examples/linkedlists/list.c +++ b/misc/examples/linkedlists/list.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #define i_type DList diff --git a/misc/examples/make.sh b/misc/examples/make.sh index 7135ffdf..b362f275 100755 --- a/misc/examples/make.sh +++ b/misc/examples/make.sh @@ -38,8 +38,8 @@ fi if [ $run = 0 ] ; then for i in */*.c ; do - #out=$(basename $i .c).exe - out=$(dirname $i)/$(basename $i .c).exe + out=$(basename $i .c).exe + #out=$(dirname $i)/$(basename $i .c).exe echo $comp -I../../include $i $clibs $oflag$out $comp -I../../include $i $clibs $oflag$out done @@ -47,8 +47,8 @@ else for i in */*.c ; do echo $comp -I../../include $i $clibs $comp -I../../include $i $clibs - #out=$(basename $i .c).exe - out=$(dirname $i)/$(basename $i .c).exe + out=$(basename $i .c).exe + #out=$(dirname $i)/$(basename $i .c).exe if [ -f $out ]; then ./$out; fi done fi diff --git a/misc/examples/priorityqueues/priority.c b/misc/examples/priorityqueues/priority.c index bf2e188a..18684e73 100644 --- a/misc/examples/priorityqueues/priority.c +++ b/misc/examples/priorityqueues/priority.c @@ -11,21 +11,21 @@ int main(void) { intptr_t N = 10000000; crand_t rng = crand_init((uint64_t)time(NULL)); - crand_unif_t dist = crand_unif_init(0, N * 10); + crand_uniform_t dist = crand_uniform_init(0, N * 10); cpque_i heap = {0}; // Push ten million random numbers to priority queue printf("Push %" c_ZI " numbers\n", N); c_forrange (N) - cpque_i_push(&heap, crand_unif(&rng, &dist)); + cpque_i_push(&heap, crand_uniform(&rng, &dist)); // push some negative numbers too. c_forlist (i, int, {-231, -32, -873, -4, -343}) cpque_i_push(&heap, *i.ref); c_forrange (N) - cpque_i_push(&heap, crand_unif(&rng, &dist)); + cpque_i_push(&heap, crand_uniform(&rng, &dist)); puts("Extract the hundred smallest."); c_forrange (100) { diff --git a/misc/examples/queues/new_queue.c b/misc/examples/queues/new_queue.c index f3592df6..3904c50c 100644 --- a/misc/examples/queues/new_queue.c +++ b/misc/examples/queues/new_queue.c @@ -23,19 +23,19 @@ int point_cmp(const Point* a, const Point* b) { int main(void) { int n = 50000000; crand_t rng = crand_init((uint64_t)time(NULL)); - crand_unif_t dist = crand_unif_init(0, n); + crand_uniform_t dist = crand_uniform_init(0, n); IQ Q = {0}; // Push 50'000'000 random numbers onto the queue. c_forrange (n) - IQ_push(&Q, (int)crand_unif(&rng, &dist)); + IQ_push(&Q, (int)crand_uniform(&rng, &dist)); // Push or pop on the queue 50 million times printf("befor: size %" c_ZI ", capacity %" c_ZI "\n", IQ_size(&Q), IQ_capacity(&Q)); c_forrange (n) { - int r = (int)crand_unif(&rng, &dist); + int r = (int)crand_uniform(&rng, &dist); if (r & 3) IQ_push(&Q, r); else diff --git a/misc/examples/queues/queue.c b/misc/examples/queues/queue.c index 56b5beb9..913524cc 100644 --- a/misc/examples/queues/queue.c +++ b/misc/examples/queues/queue.c @@ -7,20 +7,20 @@ int main(void) { int n = 100000000; - crand_unif_t dist; + crand_uniform_t dist; crand_t rng = crand_init(1234); - dist = crand_unif_init(0, n); + dist = crand_uniform_init(0, n); cqueue_i queue = {0}; // Push ten million random numbers onto the queue. c_forrange (n) - cqueue_i_push(&queue, (int)crand_unif(&rng, &dist)); + cqueue_i_push(&queue, (int)crand_uniform(&rng, &dist)); // Push or pop on the queue ten million times printf("%d\n", n); c_forrange (n) { // forrange uses initial n only. - int r = (int)crand_unif(&rng, &dist); + int r = (int)crand_uniform(&rng, &dist); if (r & 1) ++n, cqueue_i_push(&queue, r); else diff --git a/misc/examples/sortedmaps/gauss2.c b/misc/examples/sortedmaps/gauss2.c index 1ab8ade5..02ce4bc5 100644 --- a/misc/examples/sortedmaps/gauss2.c +++ b/misc/examples/sortedmaps/gauss2.c @@ -21,14 +21,14 @@ int main(void) printf("Mean %f, StdDev %f\n", Mean, StdDev); // Setup random engine with normal distribution. - crand_norm_t dist = crand_norm_init(Mean, StdDev); + crand_normal_t dist = crand_normal_init(Mean, StdDev); // Create and init histogram map with defered destruct csmap_int hist = {0}; cstr bar = {0}; c_forrange (N) { - int index = (int)round(crand_norm(&rng, &dist)); + int index = (int)round(crand_normal(&rng, &dist)); csmap_int_insert(&hist, index, 0).ref->second += 1; } diff --git a/misc/examples/spans/mdspan.c b/misc/examples/spans/mdspan.c new file mode 100644 index 00000000..4427299c --- /dev/null +++ b/misc/examples/spans/mdspan.c @@ -0,0 +1,51 @@ +#include +#include +#include + +using_cspan3(DSpan, double); + +int main(void) { + const int nx=5, ny=4, nz=3; + double* data = c_new_n(double, nx*ny*nz); + + printf("\nMultidim span ms[5, 4, 3], fortran ordered"); + DSpan3 ms = cspan_md_order('F', data, nx, ny, nz); // Fortran, not 'C' + + int idx = 0; + c_forrange (i, ms.shape[0]) + c_forrange (j, ms.shape[1]) + c_forrange (k, ms.shape[2]) + *cspan_at(&ms, i, j, k) = ++idx; + + cspan_transpose(&ms); + + printf(", transposed:\n\n"); + c_forrange (i, ms.shape[0]) { + c_forrange (j, ms.shape[1]) { + c_forrange (k, ms.shape[2]) + printf(" %3g", *cspan_at(&ms, i, j, k)); + puts(""); + } + puts(""); + } + + DSpan2 sub; + + puts("Slicing:"); + printf("\nms[0, :, :] "); + sub = cspan_slice(DSpan2, &ms, {0}, {c_ALL}, {c_ALL}); + c_foreach (i, DSpan2, sub) printf(" %g", *i.ref); + puts(""); + + printf("\nms[:, 0, :] "); + sub = cspan_slice(DSpan2, &ms, {c_ALL}, {0}, {c_ALL}); + c_foreach (i, DSpan2, sub) printf(" %g", *i.ref); + puts(""); + + sub = cspan_slice(DSpan2, &ms, {c_ALL}, {c_ALL}, {0}); + printf("\nms[:, :, 0] "); + c_foreach (i, DSpan2, sub) printf(" %g", *i.ref); + puts(""); + + free(data); +} -- cgit v1.2.3 From dbcc13635402bd466675f4f41e865d02abc6f918 Mon Sep 17 00:00:00 2001 From: tylov Date: Fri, 21 Jul 2023 10:49:45 +0200 Subject: NB! Changed some coroutine API for consistency/simplicity: Added full task support. --- docs/ccommon_api.md | 30 ++++++-------- include/stc/ccommon.h | 2 +- include/stc/coroutine.h | 44 ++++++++++---------- misc/examples/coroutines/cointerleave.c | 2 +- misc/examples/coroutines/coread.c | 2 +- misc/examples/coroutines/coroutines.c | 8 ++-- misc/examples/coroutines/cotasks1.c | 2 +- misc/examples/coroutines/cotasks2.c | 4 +- misc/examples/smartpointers/map_box.c | 34 +++++++++++++++ misc/examples/smartpointers/map_ptr.c | 34 +++++++++++++++ misc/examples/smartpointers/rawptr_elements.c | 59 --------------------------- misc/examples/spans/mdspan.c | 12 +++--- 12 files changed, 119 insertions(+), 114 deletions(-) create mode 100644 misc/examples/smartpointers/map_box.c create mode 100644 misc/examples/smartpointers/map_ptr.c delete mode 100644 misc/examples/smartpointers/rawptr_elements.c (limited to 'misc/examples/spans') diff --git a/docs/ccommon_api.md b/docs/ccommon_api.md index 9189d7e8..0752beb5 100644 --- a/docs/ccommon_api.md +++ b/docs/ccommon_api.md @@ -374,36 +374,32 @@ cco_routine scope; Use `if-else-if` constructs instead. | | Function / operator | Description | |:----------|:-------------------------------------|:----------------------------------------| -| | Function / 'keywords': | | -|`cco_result` | Enum `CCO_DONE=0`, `CCO_YIELD`, `CCO_AWAIT` | Recommended return values in coroutines | -| | Function / 'keywords': | | +|`cco_result` | `CCO_DONE`, `CCO_AWAIT`, `CCO_YIELD` | Default set of return values from coroutines | | | `cco_cleanup:` | Label for cleanup position in coroutine | | `bool` | `cco_done(co)` | Is coroutine done? | -| | `cco_routine(co) { }` | The coroutine scope | +| | `cco_routine(co) {}` | The coroutine scope | | | `cco_yield();` | Yield/suspend execution (return CCO_YIELD)| -| | `cco_yield_v();` | Yield/suspend execution (return void) | | | `cco_yield_v(ret);` | Yield/suspend execution (return ret) | -| | `cco_yield_final();` | Yield final time, enables cleanup-state | -| | `cco_yield_final(ret);` | Yield a final value (e.g. CCO_ERROR) | +| | `cco_yield_final();` | Yield final suspend, enter cleanup-state | +| | `cco_yield_final(ret);` | Yield a final value | | | `cco_await(condition);` | Suspend until condition is true (return CCO_AWAIT)| -| | `cco_await_v(condition);` | Suspend until condition is true (return void) | -| | `cco_await_v(condition, ret);` | Suspend until condition is true (return ret)| -| | `cco_await_on(cocall);` | Await on sub-coroutine to finish (return its ret) | +| | `cco_call_await(cocall);` | Await for subcoro to finish (returns its ret value) | +| | `cco_call_await(cocall, retbit);` | Await for subcoro's return to be in (retbit \| CCO_DONE) | | | `cco_return;` | Return from coroutine (inside cco_routine) | | | Task objects: | | | | `cco_task_struct(Name, ...);` | Define a coroutine task struct | -| | `cco_task_await(task, ...);` | Await for task to finish or optionally yield a value | +| | `cco_task_await(task, cco_runtime* rt);`| Await for task to finish | +| | `cco_task_await(task, rt, retbit);` | Await for task's return to be in (retbit \| CCO_DONE) | +|`cco_result`| `cco_task_resume(task, rt);` | Resume suspended task | | | Semaphores: | | | | `cco_sem` | Semaphore type | | `cco_sem` | `cco_sem_from(long value)` | Create semaphore | | | `cco_sem_set(sem, long value)` | Set semaphore value | | | `cco_sem_await(sem)` | Await for the semaphore count > 0 | -| | `cco_sem_await(sem, ret)` | Await with ret on the semaphore | | | `cco_sem_release(sem)` | Signal the semaphore (count += 1) | | | Timers: | | | | `cco_timer` | Timer type | | | `cco_timer_await(tm, double sec)` | Await secs for timer to expire (usec prec.)| -| | `cco_timer_await(tm, double sec, ret)`| Await secs for timer with ret value | | | `cco_timer_start(tm, double sec)` | Start timer for secs duration | | | `cco_timer_restart(tm)` | Restart timer with same duration | | `bool` | `cco_timer_expired(tm)` | Return true if timer is expired | @@ -412,10 +408,10 @@ cco_routine scope; Use `if-else-if` constructs instead. | | From caller side: | | | `void` | `cco_stop(co)` | Next call of coroutine finalizes | | `void` | `cco_reset(co)` | Reset state to initial (for reuse) | -| `void` | `cco_block_on(cocall) { }` | Run blocking until cocall is finished | -| `void` | `cco_block_on(cocall, int *result) {}`| Run blocking until cocall is finished | -| | `cco_task_block_on(task) {}` | Run blocking until task is finished | -| | `cco_task_block_on(task, rt, STACKSZ) {}`| Run blocking until task is finished | +| `void` | `cco_call_blocking(cocall) {}` | Run blocking until cocall is finished | +| `void` | `cco_call_blocking(cocall, int* outres) {}`| Run blocking until cocall is finished | +| | `cco_task_blocking(task) {}` | Run blocking until task is finished | +| | `cco_task_blocking(task, rt, STACKSZ) {}`| Run blocking until task is finished | | | Time functions: | | | `double` | `cco_time(void)` | Return secs with usec prec. since Epoch | | | `cco_sleep(double sec)` | Sleep for seconds (msec or usec prec.) | diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h index 1f9ea80d..316a8ee7 100644 --- a/include/stc/ccommon.h +++ b/include/stc/ccommon.h @@ -69,7 +69,7 @@ typedef long long _llong; #define c_new(T, ...) ((T*)memcpy(malloc(sizeof(T)), ((T[]){__VA_ARGS__}), sizeof(T))) #define c_LITERAL(T) (T) #endif -#define c_new_n(T, n) ((T*)malloc(sizeof(T)*(n))) +#define c_new_n(T, n) ((T*)malloc(sizeof(T)*(size_t)(n))) #define c_malloc(sz) malloc(c_i2u(sz)) #define c_calloc(n, sz) calloc(c_i2u(n), c_i2u(sz)) #define c_realloc(p, sz) realloc(p, c_i2u(sz)) diff --git a/include/stc/coroutine.h b/include/stc/coroutine.h index f89d20af..42905744 100644 --- a/include/stc/coroutine.h +++ b/include/stc/coroutine.h @@ -83,26 +83,27 @@ typedef enum { case __LINE__:; \ } while (0) -#define cco_await(promise) cco_await_v_2(promise, CCO_AWAIT) -#define cco_await_v(...) c_MACRO_OVERLOAD(cco_await_v, __VA_ARGS__) -#define cco_await_v_1(promise) cco_await_v_2(promise, ) -#define cco_await_v_2(promise, ret) \ +#define cco_await(promise) cco_await_and_return(promise, CCO_AWAIT) +#define cco_await_v(promise) cco_await_and_return(promise, ) +#define cco_await_and_return(promise, ret) \ do { \ *_state = __LINE__; \ case __LINE__: if (!(promise)) {return ret; goto _resume;} \ } while (0) -/* cco_await_on(): assumes coroutine returns a cco_result value (int) */ -#define cco_await_on(corocall) \ +/* cco_call_await(): assumes coroutine returns a cco_result value (int) */ +#define cco_call_await(...) c_MACRO_OVERLOAD(cco_call_await, __VA_ARGS__) +#define cco_call_await_1(corocall) cco_call_await_2(corocall, CCO_DONE) +#define cco_call_await_2(corocall, resultbits) \ do { \ *_state = __LINE__; \ - case __LINE__: { int _r = corocall; if (_r != CCO_DONE) {return _r; goto _resume;} } \ + case __LINE__: { int _r = corocall; if (!(_r & ~(resultbits))) {return _r; goto _resume;} } \ } while (0) -/* cco_block_on(): assumes coroutine returns a cco_result value (int) */ -#define cco_block_on(...) c_MACRO_OVERLOAD(cco_block_on, __VA_ARGS__) -#define cco_block_on_1(corocall) while ((corocall) != CCO_DONE) -#define cco_block_on_2(corocall, result) while ((*(result) = (corocall)) != CCO_DONE) +/* cco_call_blocking(): assumes coroutine returns a cco_result value (int) */ +#define cco_call_blocking(...) c_MACRO_OVERLOAD(cco_call_blocking, __VA_ARGS__) +#define cco_call_blocking_1(corocall) while ((corocall) != CCO_DONE) +#define cco_call_blocking_2(corocall, result) while ((*(result) = (corocall)) != CCO_DONE) #define cco_cleanup \ *_state = CCO_STATE_CLEANUP; case CCO_STATE_CLEANUP @@ -152,7 +153,7 @@ typedef struct cco_runtime { #define cco_cast_task(task) \ ((cco_task *)(task) + 0*sizeof((task)->cco_func(task, (cco_runtime*)0) + ((int*)0 == &(task)->cco_state))) -#define cco_resume(task, rt) \ +#define cco_task_resume(task, rt) \ (task)->cco_func(task, rt) #define cco_task_await(...) c_MACRO_OVERLOAD(cco_task_await, __VA_ARGS__) @@ -164,11 +165,11 @@ typedef struct cco_runtime { cco_yield_v(CCO_AWAIT); \ } while (0) -#define cco_task_block_on(...) c_MACRO_OVERLOAD(cco_task_block_on, __VA_ARGS__) -#define cco_task_block_on_1(task) cco_task_block_on_3(task, _rt, 16) -#define cco_task_block_on_3(task, rt, STACKDEPTH) \ +#define cco_task_blocking(...) c_MACRO_OVERLOAD(cco_task_blocking, __VA_ARGS__) +#define cco_task_blocking_1(task) cco_task_blocking_3(task, _rt, 16) +#define cco_task_blocking_3(task, rt, STACKDEPTH) \ for (struct { int result, top; cco_task* stack[STACKDEPTH]; } rt = {.stack={cco_cast_task(task)}}; \ - (((rt.result = cco_resume(rt.stack[rt.top], (cco_runtime*)&rt)) & ~rt.stack[rt.top]->cco_expect) || --rt.top >= 0); ) + (((rt.result = cco_task_resume(rt.stack[rt.top], (cco_runtime*)&rt)) & ~rt.stack[rt.top]->cco_expect) || --rt.top >= 0); ) /* * Semaphore @@ -176,12 +177,11 @@ typedef struct cco_runtime { typedef struct { intptr_t count; } cco_sem; -#define cco_sem_await(sem) cco_sem_await_v_2(sem, CCO_AWAIT) -#define cco_sem_await_v(...) c_MACRO_OVERLOAD(cco_sem_await_v, __VA_ARGS__) -#define cco_sem_await_v_1(sem) cco_sem_await_v_2(sem, ) -#define cco_sem_await_v_2(sem, ret) \ +#define cco_sem_await(sem) cco_sem_await_and_return(sem, CCO_AWAIT) +#define cco_sem_await_v(sem) cco_sem_await_and_return(sem, ) +#define cco_sem_await_and_return(sem, ret) \ do { \ - cco_await_v_2((sem)->count > 0, ret); \ + cco_await_and_return((sem)->count > 0, ret); \ --(sem)->count; \ } while (0) @@ -241,7 +241,7 @@ typedef struct { double interval, start; } cco_timer; #define cco_timer_await_v_3(tm, sec, ret) \ do { \ cco_timer_start(tm, sec); \ - cco_await_v_2(cco_timer_expired(tm), ret); \ + cco_await_and_return(cco_timer_expired(tm), ret); \ } while (0) static inline void cco_timer_start(cco_timer* tm, double sec) { diff --git a/misc/examples/coroutines/cointerleave.c b/misc/examples/coroutines/cointerleave.c index ea0d4dac..f3710ba3 100644 --- a/misc/examples/coroutines/cointerleave.c +++ b/misc/examples/coroutines/cointerleave.c @@ -49,7 +49,7 @@ void Use(void) struct Generator g = {{&a}, {&b}}; - cco_block_on(interleaved(&g)) { + cco_call_blocking(interleaved(&g)) { printf("%d ", g.value); } puts(""); diff --git a/misc/examples/coroutines/coread.c b/misc/examples/coroutines/coread.c index 56248108..ebaaf19d 100644 --- a/misc/examples/coroutines/coread.c +++ b/misc/examples/coroutines/coread.c @@ -33,7 +33,7 @@ int main(void) { struct file_read g = {__FILE__}; int n = 0; - cco_block_on(file_read(&g)) + cco_call_blocking(file_read(&g)) { printf("%3d %s\n", ++n, cstr_str(&g.line)); //if (n == 10) cco_stop(&g); diff --git a/misc/examples/coroutines/coroutines.c b/misc/examples/coroutines/coroutines.c index de0fcda5..489c3ed6 100644 --- a/misc/examples/coroutines/coroutines.c +++ b/misc/examples/coroutines/coroutines.c @@ -84,13 +84,13 @@ struct combined { int combined(struct combined* g) { cco_routine(g) { - cco_await_on(prime(&g->prm)); - cco_await_on(fibonacci(&g->fib)); + cco_call_await(prime(&g->prm)); + cco_call_await(fibonacci(&g->fib)); // Reuse the g->prm context and extend the count: g->prm.count = 8, g->prm.result += 2; cco_reset(&g->prm); - cco_await_on(prime(&g->prm)); + cco_call_await(prime(&g->prm)); cco_cleanup: puts("final combined"); @@ -103,7 +103,7 @@ int main(void) struct combined c = {.prm={.count=8}, .fib={14}}; int res; - cco_block_on(combined(&c), &res) { + cco_call_blocking(combined(&c), &res) { if (res == CCO_YIELD) printf("Prime(%d)=%lld, Fib(%d)=%lld\n", c.prm.idx, c.prm.result, diff --git a/misc/examples/coroutines/cotasks1.c b/misc/examples/coroutines/cotasks1.c index 27999ccf..e4afbe2b 100644 --- a/misc/examples/coroutines/cotasks1.c +++ b/misc/examples/coroutines/cotasks1.c @@ -88,7 +88,7 @@ int main(void) struct consume_items consume = {.n=5}; int count = 0; - cco_block_on(consume_items(&consume, &produce)) + cco_call_blocking(consume_items(&consume, &produce)) { ++count; //cco_sleep(0.001); diff --git a/misc/examples/coroutines/cotasks2.c b/misc/examples/coroutines/cotasks2.c index 9ca69bda..24a9f23f 100644 --- a/misc/examples/coroutines/cotasks2.c +++ b/misc/examples/coroutines/cotasks2.c @@ -77,7 +77,7 @@ int consume_items(struct consume_items* c, cco_runtime* rt) } cco_cleanup: cco_stop(&c->produce); - cco_resume(&c->produce, rt); + cco_task_resume(&c->produce, rt); puts("done consume"); } return 0; @@ -92,7 +92,7 @@ int main(void) }; int count = 0; - cco_task_block_on(&consume) + cco_task_blocking(&consume) { ++count; //cco_sleep(0.001); diff --git a/misc/examples/smartpointers/map_box.c b/misc/examples/smartpointers/map_box.c new file mode 100644 index 00000000..f651b302 --- /dev/null +++ b/misc/examples/smartpointers/map_box.c @@ -0,0 +1,34 @@ +#include +#include +#define i_implement +#include + +#define i_type IBox +#define i_key long +#include // unique_ptr alike. + +// cmap of cstr => IBox +#define i_type Boxmap +#define i_key_str +#define i_valboxed IBox // i_valboxed: use properties from IBox automatically +#include + + +int main(void) +{ + Boxmap map = {0}; + + puts("Map cstr => IBox:"); + Boxmap_insert(&map, cstr_from("Test1"), IBox_make(1)); + Boxmap_insert(&map, cstr_from("Test2"), IBox_make(2)); + + // Simpler: emplace() implicitly creates cstr from const char* and IBox from long! + Boxmap_emplace(&map, "Test3", 3); + Boxmap_emplace(&map, "Test4", 4); + + c_forpair (name, number, Boxmap, map) + printf("%s: %ld\n", cstr_str(_.name), *_.number->get); + puts(""); + + Boxmap_drop(&map); +} diff --git a/misc/examples/smartpointers/map_ptr.c b/misc/examples/smartpointers/map_ptr.c new file mode 100644 index 00000000..453322c5 --- /dev/null +++ b/misc/examples/smartpointers/map_ptr.c @@ -0,0 +1,34 @@ +#include +#include +#define i_implement +#include + +// cmap of cstr => long* +#define i_type Ptrmap +#define i_key_str +#define i_val long* +#define i_valraw long +#define i_valfrom(raw) c_new(long, raw) +#define i_valto(x) **x +#define i_valclone(x) c_new(long, *x) +#define i_valdrop(x) c_free(*x) +#include + +int main(void) +{ + Ptrmap map = {0}; + + puts("Map cstr => long*:"); + Ptrmap_insert(&map, cstr_from("Test1"), c_new(long, 1)); + Ptrmap_insert(&map, cstr_from("Test2"), c_new(long, 2)); + + // Simple: emplace() implicitly creates cstr from const char* and an owned long* from long! + Ptrmap_emplace(&map, "Test3", 3); + Ptrmap_emplace(&map, "Test4", 4); + + c_forpair (name, number, Ptrmap, map) + printf("%s: %ld\n", cstr_str(_.name), **_.number); + puts(""); + + Ptrmap_drop(&map); +} diff --git a/misc/examples/smartpointers/rawptr_elements.c b/misc/examples/smartpointers/rawptr_elements.c deleted file mode 100644 index 694ce12e..00000000 --- a/misc/examples/smartpointers/rawptr_elements.c +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#define i_implement -#include - -// Create cmap of cstr => long* -#define i_type SIPtrMap -#define i_key_str -#define i_val long* -#define i_valraw long -#define i_valfrom(raw) c_new(long, raw) -#define i_valto(x) **x -#define i_valclone(x) c_new(long, *x) -#define i_valdrop(x) c_free(*x) -#include - -// Alternatively, using cbox: -#define i_type IBox -#define i_key long -#include // unique_ptr alike. - -// cmap of cstr => IBox -#define i_type SIBoxMap -#define i_key_str -#define i_valboxed IBox // i_valboxed: use properties from IBox automatically -#include - -int main(void) -{ - // These have the same behaviour, except IBox has a get member: - SIPtrMap map1 = {0}; - SIBoxMap map2 = {0}; - - printf("\nMap cstr => long*:\n"); - SIPtrMap_insert(&map1, cstr_from("Test1"), c_new(long, 1)); - SIPtrMap_insert(&map1, cstr_from("Test2"), c_new(long, 2)); - - // Emplace implicitly creates cstr from const char* and an owned long* from long! - SIPtrMap_emplace(&map1, "Test3", 3); - SIPtrMap_emplace(&map1, "Test4", 4); - - c_forpair (name, number, SIPtrMap, map1) - printf("%s: %ld\n", cstr_str(_.name), **_.number); - - puts("\nMap cstr => IBox:"); - SIBoxMap_insert(&map2, cstr_from("Test1"), IBox_make(1)); - SIBoxMap_insert(&map2, cstr_from("Test2"), IBox_make(2)); - - // Emplace implicitly creates cstr from const char* and IBox from long! - SIBoxMap_emplace(&map2, "Test3", 3); - SIBoxMap_emplace(&map2, "Test4", 4); - - c_forpair (name, number, SIBoxMap, map2) - printf("%s: %ld\n", cstr_str(_.name), *_.number->get); - puts(""); - - SIPtrMap_drop(&map1); - SIBoxMap_drop(&map2); -} diff --git a/misc/examples/spans/mdspan.c b/misc/examples/spans/mdspan.c index 4427299c..db601850 100644 --- a/misc/examples/spans/mdspan.c +++ b/misc/examples/spans/mdspan.c @@ -12,17 +12,17 @@ int main(void) { DSpan3 ms = cspan_md_order('F', data, nx, ny, nz); // Fortran, not 'C' int idx = 0; - c_forrange (i, ms.shape[0]) - c_forrange (j, ms.shape[1]) - c_forrange (k, ms.shape[2]) + for (int i = 0; i < ms.shape[0]; ++i) + for (int j = 0; j < ms.shape[1]; ++j) + for (int k = 0; k < ms.shape[2]; ++k) *cspan_at(&ms, i, j, k) = ++idx; cspan_transpose(&ms); printf(", transposed:\n\n"); - c_forrange (i, ms.shape[0]) { - c_forrange (j, ms.shape[1]) { - c_forrange (k, ms.shape[2]) + for (int i = 0; i < ms.shape[0]; ++i) { + for (int j = 0; j < ms.shape[1]; ++j) { + for (int k = 0; k < ms.shape[2]; ++k) printf(" %3g", *cspan_at(&ms, i, j, k)); puts(""); } -- cgit v1.2.3 From f1f0c01e798eb3217e62a43de660723173984547 Mon Sep 17 00:00:00 2001 From: tylov Date: Mon, 24 Jul 2023 12:54:09 +0200 Subject: Improved an issue with cspan. --- docs/cspan_api.md | 10 ++++++++++ include/stc/cspan.h | 18 +++++++++++------- misc/examples/spans/printspan.c | 2 +- 3 files changed, 22 insertions(+), 8 deletions(-) (limited to 'misc/examples/spans') diff --git a/docs/cspan_api.md b/docs/cspan_api.md index 1312ae6d..51d72856 100644 --- a/docs/cspan_api.md +++ b/docs/cspan_api.md @@ -29,6 +29,7 @@ by default (define `STC_NDEBUG` or `NDEBUG` to disable). ```c SpanType cspan_init(TYPE SpanType, {v1, v2, ...}); // make a 1-d cspan from values SpanType cspan_from(STCContainer* cnt); // make a 1-d cspan from a cvec, cstack, cpque (heap) +SpanType cspan_from_n(ValueType* ptr, intptr_t n); // make a 1-d cspan from a pointer and length SpanType cspan_from_array(ValueType array[]); // make a 1-d cspan from a C array intptr_t cspan_size(const SpanTypeN* self); // return number of elements @@ -144,6 +145,7 @@ Slicing cspan without and with reducing the rank: ```c #define i_implement #include +#include #include using_cspan3(Span, int); // Shorthand to define Span, Span2, and Span3 @@ -164,6 +166,14 @@ int main(void) fmt_print(" {}", *i.ref); puts(""); + // create span on-the-fly + int array[] = {3, 65, 4, 3, 7, 87, 45}; + c_forfilter (i, ISpan, (ISpan)cspan_from_array(array), + c_flt_skip(i, 2) && + c_flt_take(i, 3)) + fmt_print(" {}", *i.ref); + puts(""); + // slice without reducing rank: Span3 ss3 = cspan_slice(Span3, &span3, {c_ALL}, {3,4}, {c_ALL}); diff --git a/include/stc/cspan.h b/include/stc/cspan.h index 08045010..1b57d4d4 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -26,7 +26,7 @@ #include #include using_cspan(Span2f, float, 2); -using_cspan(Intspan, int, 1); +using_cspan(Intspan, int); int demo1() { float raw[4*5]; @@ -65,7 +65,11 @@ int demo2() { #define using_cspan(...) c_MACRO_OVERLOAD(using_cspan, __VA_ARGS__) #define using_cspan_2(Self, T) \ - using_cspan_3(Self, T, 1) + using_cspan_3(Self, T, 1); \ + STC_INLINE Self Self##_from_n(Self##_raw* raw, const intptr_t n) { \ + return (Self){.data=raw, .shape={(int32_t)n}, .stride={.d={1}}}; \ + } \ + struct stc_nostruct #define using_cspan_3(Self, T, RANK) \ typedef T Self##_value; typedef T Self##_raw; \ @@ -77,9 +81,6 @@ int demo2() { \ typedef struct { Self##_value *ref; int32_t pos[RANK]; const Self *_s; } Self##_iter; \ \ - STC_INLINE Self Self##_from_n(Self##_raw* raw, const intptr_t n) { \ - return (Self){.data=raw, .shape={(int32_t)n}}; \ - } \ STC_INLINE Self Self##_slice_(Self##_value* d, const int32_t shape[], const int32_t stri[], \ const int rank, const int32_t a[][2]) { \ Self s; int outrank; \ @@ -104,7 +105,7 @@ int demo2() { } \ struct stc_nostruct -#define using_cspan2(Self, T) using_cspan_3(Self, T, 1); using_cspan_3(Self##2, T, 2) +#define using_cspan2(Self, T) using_cspan_2(Self, T); using_cspan_3(Self##2, T, 2) #define using_cspan3(Self, T) using_cspan2(Self, T); using_cspan_3(Self##3, T, 3) #define using_cspan4(Self, T) using_cspan3(Self, T); using_cspan_3(Self##4, T, 4) #define using_cspan_tuple(N) typedef struct { int32_t d[N]; } cspan_tuple##N @@ -124,8 +125,11 @@ using_cspan_tuple(7); using_cspan_tuple(8); #define cspan_from(container) \ {.data=(container)->data, .shape={(int32_t)(container)->_len}, .stride={.d={1}}} +#define cspan_from_n(ptr, n) \ + {.data=(ptr), .shape={n}, .stride={.d={1}}} + #define cspan_from_array(array) \ - {.data=(array), .shape={c_arraylen(array)}, .stride={.d={1}}} + cspan_from_n(array, c_arraylen(array)) #define cspan_size(self) _cspan_size((self)->shape, cspan_rank(self)) #define cspan_rank(self) c_arraylen((self)->shape) diff --git a/misc/examples/spans/printspan.c b/misc/examples/spans/printspan.c index cd3c5f4f..eb9d80e3 100644 --- a/misc/examples/spans/printspan.c +++ b/misc/examples/spans/printspan.c @@ -11,7 +11,7 @@ #include #include -using_cspan(intspan, int, 1); +using_cspan(intspan, int); void printMe(intspan container) { printf("%d:", (int)cspan_size(&container)); -- cgit v1.2.3 From b6f7896ff21002e58e9af12fd553da32bda5f6d1 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Mon, 14 Aug 2023 21:19:28 +0200 Subject: Simplified printspan.c example --- misc/examples/spans/printspan.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) (limited to 'misc/examples/spans') diff --git a/misc/examples/spans/printspan.c b/misc/examples/spans/printspan.c index eb9d80e3..b6999b61 100644 --- a/misc/examples/spans/printspan.c +++ b/misc/examples/spans/printspan.c @@ -1,18 +1,16 @@ -// printspan.c +// https://www.modernescpp.com/index.php/c-20-std-span/ #include -#define i_implement -#include #define i_key int #include + #define i_key int #include -#define i_key_str -#include #include using_cspan(intspan, int); + void printMe(intspan container) { printf("%d:", (int)cspan_size(&container)); c_foreach (e, intspan, container) @@ -20,33 +18,24 @@ void printMe(intspan container) { puts(""); } + int main(void) { - intspan sp1 = cspan_init(intspan, {1, 2}); - printMe( sp1 ); + printMe( c_init(intspan, {1, 2, 3, 4}) ); - printMe( c_init(intspan, {1, 2, 3}) ); + int arr[] = {1, 2, 3, 4, 5}; + printMe( (intspan)cspan_from_array(arr) ); - int arr[] = {1, 2, 3, 4, 5, 6}; - intspan sp2 = cspan_from_array(arr); - printMe( c_LITERAL(intspan)cspan_subspan(&sp2, 1, 4) ); - - cvec_int vec = c_init(cvec_int, {1, 2, 3, 4, 5}); - printMe( c_LITERAL(intspan)cspan_from(&vec) ); - - printMe( sp2 ); + cvec_int vec = c_init(cvec_int, {1, 2, 3, 4, 5, 6}); + printMe( (intspan)cspan_from(&vec) ); cstack_int stk = c_init(cstack_int, {1, 2, 3, 4, 5, 6, 7}); - printMe( c_LITERAL(intspan)cspan_from(&stk) ); + printMe( (intspan)cspan_from(&stk) ); - csset_str set = c_init(csset_str, {"5", "7", "4", "3", "8", "2", "1", "9", "6"}); - printf("%d:", (int)csset_str_size(&set)); - c_foreach (e, csset_str, set) - printf(" %s", cstr_str(e.ref)); - puts(""); + intspan spn = c_init(intspan, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + printMe( (intspan)cspan_subspan(&spn, 2, 8) ); // cleanup cvec_int_drop(&vec); cstack_int_drop(&stk); - csset_str_drop(&set); } -- cgit v1.2.3 From 541ce2af6bda0bb21393bdee3fed1e70f9ce40f1 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Wed, 16 Aug 2023 17:15:37 +0200 Subject: Added recursive matrix multiplication example for cspan. --- include/c11/fmt.h | 6 +-- misc/examples/spans/matmult.c | 90 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 misc/examples/spans/matmult.c (limited to 'misc/examples/spans') diff --git a/include/c11/fmt.h b/include/c11/fmt.h index d7c10cbe..df96bae3 100644 --- a/include/c11/fmt.h +++ b/include/c11/fmt.h @@ -25,7 +25,7 @@ void fmt_close(fmt_stream* ss); * C11 or higher required. * MAX 255 chars fmt string by default. MAX 12 arguments after fmt string. -* Define FMT_IMPLEMENT or i_implement prior to #include in one translation unit. +* Define FMT_IMPLEMENT, STC_IMPLEMENT or i_implement prior to #include in one translation unit. * Define FMT_SHORTS to add print(), println() and printd() macros, without fmt_ prefix. * (c) operamint, 2022, MIT License. ----------------------------------------------------------------------------------- @@ -84,7 +84,7 @@ int main(void) { #define _fmt_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \ _14, _15, _16, N, ...) N -#if defined FMT_NDEBUG || defined NDEBUG +#if defined FMT_NDEBUG || defined STC_NDEBUG || defined NDEBUG # define fmt_OK(exp) (void)(exp) #else # define fmt_OK(exp) assert(exp) @@ -196,7 +196,7 @@ void _fmt_bprint(fmt_stream*, const char* fmt, ...); const wchar_t*: "ls", \ const void*: "p") -#if defined FMT_IMPLEMENT || defined i_implement +#if defined FMT_IMPLEMENT || defined STC_IMPLEMENT || defined i_implement #include #include diff --git a/misc/examples/spans/matmult.c b/misc/examples/spans/matmult.c new file mode 100644 index 00000000..62c0c26b --- /dev/null +++ b/misc/examples/spans/matmult.c @@ -0,0 +1,90 @@ +// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p2642r2.html +// C99: +#include +#include +#include + +using_cspan3(Mat, double); +typedef Mat2 OutMat; +typedef struct { Mat2 m00, m01, m10, m11; } Partition; + +Partition partition(Mat2 A) +{ + int32_t M = A.shape[0]; + int32_t N = A.shape[1]; + return (Partition){ + .m00 = cspan_slice(Mat2, &A, {0, M/2}, {0, N/2}), + .m01 = cspan_slice(Mat2, &A, {0, M/2}, {N/2, N}), + .m10 = cspan_slice(Mat2, &A, {M/2, M}, {0, N/2}), + .m11 = cspan_slice(Mat2, &A, {M/2, M}, {N/2, N}), + }; +} + +// Slow generic implementation +void base_case_matrix_product(Mat2 A, Mat2 B, OutMat C) +{ + for (int j = 0; j < C.shape[1]; ++j) { + for (int i = 0; i < C.shape[0]; ++i) { + Mat2_value C_ij = 0; + for (int k = 0; k < A.shape[1]; ++k) { + C_ij += *cspan_at(&A, i,k) * *cspan_at(&B, k,j); + } + *cspan_at(&C, i,j) += C_ij; + } + } +} + +void recursive_matrix_product(Mat2 A, Mat2 B, OutMat C) +{ + // Some hardware-dependent constant + enum {recursion_threshold = 16}; + if (C.shape[0] <= recursion_threshold || C.shape[1] <= recursion_threshold) { + base_case_matrix_product(A, B, C); + } else { + Partition c = partition(C), + a = partition(A), + b = partition(B); + recursive_matrix_product(a.m00, b.m00, c.m00); + recursive_matrix_product(a.m01, b.m10, c.m00); + recursive_matrix_product(a.m10, b.m00, c.m10); + recursive_matrix_product(a.m11, b.m10, c.m10); + recursive_matrix_product(a.m00, b.m01, c.m01); + recursive_matrix_product(a.m01, b.m11, c.m01); + recursive_matrix_product(a.m10, b.m01, c.m11); + recursive_matrix_product(a.m11, b.m11, c.m11); + } +} + + +#define i_type Values +#define i_val double +#include +#include + +int main(void) +{ + enum {N = 10, D1 = 256, D2 = D1}; + + Values values = {0}; + for (int i=0; i < N*D1*D2; ++i) + Values_push(&values, (crandf() - 0.5f)*4.0f); + + double out[D1*D2]; + Mat3 data = cspan_md_order('C', values.data, N, D1, D2); + OutMat c = cspan_md_order('C', out, D1, D2); + Mat2 a = cspan_submd3(&data, 0); + double sum = 0.0; + clock_t t = clock(); + + for (int i=1; i Date: Wed, 16 Aug 2023 17:25:08 +0200 Subject: matmult.c cleanup. --- misc/examples/spans/matmult.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'misc/examples/spans') diff --git a/misc/examples/spans/matmult.c b/misc/examples/spans/matmult.c index 62c0c26b..b28e6459 100644 --- a/misc/examples/spans/matmult.c +++ b/misc/examples/spans/matmult.c @@ -67,7 +67,7 @@ int main(void) Values values = {0}; for (int i=0; i < N*D1*D2; ++i) - Values_push(&values, (crandf() - 0.5f)*4.0f); + Values_push(&values, (crandf() - 0.5)*4.0); double out[D1*D2]; Mat3 data = cspan_md_order('C', values.data, N, D1, D2); @@ -85,6 +85,6 @@ int main(void) } t = clock() - t; - printf("%.16g: %f\n", sum, (float)t*1000.0f/CLOCKS_PER_SEC); + printf("%.16g: %f\n", sum, (double)t*1000.0/CLOCKS_PER_SEC); Values_drop(&values); } -- cgit v1.2.3 From e51376c9b72448dad947c3cd3760ab013ca8e4a5 Mon Sep 17 00:00:00 2001 From: Tyge Lovset Date: Sat, 19 Aug 2023 08:57:53 +0200 Subject: Moved cspan_next() to shared implementation (if chosen). --- include/stc/cspan.h | 26 ++++++++++++++------------ misc/examples/spans/matmult.c | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) (limited to 'misc/examples/spans') diff --git a/include/stc/cspan.h b/include/stc/cspan.h index cca5486a..b8b191f1 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -223,18 +223,7 @@ STC_INLINE intptr_t _cspan_idxN(int rank, const int32_t shape[], const int32_t s return off; } -STC_INLINE intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done) { - int i, inc; - if (stride[0] < stride[rank - 1]) i = rank - 1, inc = -1; else i = 0, inc = 1; - intptr_t off = stride[i]; - ++pos[i]; - for (; --rank && pos[i] == shape[i]; i += inc) { - pos[i] = 0; ++pos[i + inc]; - off += stride[i + inc] - stride[i]*shape[i]; - } - *done = pos[i] == shape[i]; - return off; -} +STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done); #define _cspan_next1(pos, shape, stride, rank, done) (*done = ++pos[0]==shape[0], stride[0]) #define _cspan_next3 _cspan_next2 #define _cspan_next4 _cspan_next2 @@ -253,6 +242,19 @@ STC_API int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank); /* --------------------- IMPLEMENTATION --------------------- */ #if defined(i_implement) || defined(i_static) +STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done) { + int i, inc; + if (stride[0] < stride[rank - 1]) i = rank - 1, inc = -1; else i = 0, inc = 1; + intptr_t off = stride[i]; + ++pos[i]; + for (; --rank && pos[i] == shape[i]; i += inc) { + pos[i] = 0; ++pos[i + inc]; + off += stride[i + inc] - stride[i]*shape[i]; + } + *done = pos[i] == shape[i]; + return off; +} + STC_DEF int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank) { int32_t k = 1, i, j, inc, s1, s2; if (order == 'F') i = 0, j = rank, inc = 1; diff --git a/misc/examples/spans/matmult.c b/misc/examples/spans/matmult.c index b28e6459..35dad7a9 100644 --- a/misc/examples/spans/matmult.c +++ b/misc/examples/spans/matmult.c @@ -41,7 +41,7 @@ void recursive_matrix_product(Mat2 A, Mat2 B, OutMat C) if (C.shape[0] <= recursion_threshold || C.shape[1] <= recursion_threshold) { base_case_matrix_product(A, B, C); } else { - Partition c = partition(C), + Partition c = partition(C), a = partition(A), b = partition(B); recursive_matrix_product(a.m00, b.m00, c.m00); -- cgit v1.2.3 From ac7afe963527eb66a12962c638242f0426b39575 Mon Sep 17 00:00:00 2001 From: tylov Date: Sat, 19 Aug 2023 18:55:41 +0200 Subject: Polishing cspan.h. Updated multidim.c cspan example. --- docs/cstr_api.md | 6 ++-- include/stc/cspan.h | 19 +++++++---- misc/examples/spans/multidim.c | 75 ++++++++++++++++++++++-------------------- 3 files changed, 55 insertions(+), 45 deletions(-) (limited to 'misc/examples/spans') diff --git a/docs/cstr_api.md b/docs/cstr_api.md index 5f6ce9e4..397634ec 100644 --- a/docs/cstr_api.md +++ b/docs/cstr_api.md @@ -120,14 +120,14 @@ cstr_iter cstr_advance(cstr_iter it, intptr_t n); // utf8 functions requires linking with src/utf8code.c symbols: bool cstr_valid_utf8(const cstr* self); // check if str is valid utf8 -cstr cstr_casefold_sv(csview sv); // returns new casefolded utf8 cstr +cstr cstr_casefold_sv(csview sv); // returns new casefolded utf8 cstr cstr cstr_tolower(const char* str); // returns new lowercase utf8 cstr -cstr cstr_tolower_sv(csview sv); // returns new lowercase utf8 cstr +cstr cstr_tolower_sv(csview sv); // returns new lowercase utf8 cstr void cstr_lowercase(cstr* self); // transform cstr to lowercase utf8 cstr cstr_toupper(const char* str); // returns new uppercase utf8 cstr -cstr cstr_toupper_sv(csview sv); // returns new uppercase utf8 cstr +cstr cstr_toupper_sv(csview sv); // returns new uppercase utf8 cstr void cstr_uppercase(cstr* self); // transform cstr to uppercase utf8 int cstr_icmp(const cstr* s1, const cstr* s2); // utf8 case-insensitive comparison diff --git a/include/stc/cspan.h b/include/stc/cspan.h index b8b191f1..32921390 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -244,24 +244,29 @@ STC_API int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank); STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done) { int i, inc; - if (stride[0] < stride[rank - 1]) i = rank - 1, inc = -1; else i = 0, inc = 1; + if (stride[0] < stride[rank - 1]) i = rank - 1, inc = -1; + else /* order 'C' */ i = 0, inc = 1; + intptr_t off = stride[i]; ++pos[i]; - for (; --rank && pos[i] == shape[i]; i += inc) { + while (--rank && pos[i] == shape[i]) { pos[i] = 0; ++pos[i + inc]; off += stride[i + inc] - stride[i]*shape[i]; + i += inc; } *done = pos[i] == shape[i]; return off; } STC_DEF int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank) { - int32_t k = 1, i, j, inc, s1, s2; - if (order == 'F') i = 0, j = rank, inc = 1; - else /* 'C' */ i = rank - 1, j = -1, inc = -1; - s1 = shape[i]; shape[i] = 1; + int i, inc; + if (order == 'F') i = 0, inc = 1; + else i = rank - 1, inc = -1; + int32_t k = 1, s1 = shape[i], s2; - for (i += inc; i != j; i += inc) { + shape[i] = 1; + while (--rank) { + i += inc; s2 = shape[i]; shape[i] = (k *= s1); s1 = s2; diff --git a/misc/examples/spans/multidim.c b/misc/examples/spans/multidim.c index 798a1126..ebc05a70 100644 --- a/misc/examples/spans/multidim.c +++ b/misc/examples/spans/multidim.c @@ -1,66 +1,71 @@ // Example based on https://en.cppreference.com/w/cpp/container/mdspan #define i_val int #include +#define i_implement #include #include using_cspan3(ispan, int); +void print2d(ispan2 ms2) { + for (int i=0; i < ms2.shape[0]; i++) { + for (int j=0; j < ms2.shape[1]; j++) + printf(" %3d", *cspan_at(&ms2, i, j)); + puts(""); + } +} + +void print3d(ispan3 ms3) { + for (int i=0; i < ms3.shape[0]; i++) { + for (int j=0; j < ms3.shape[1]; j++) { + for (int k=0; k < ms3.shape[2]; k++) + printf(" %3d", *cspan_at(&ms3, i, j, k)); + puts(""); + } + puts(""); + } +} + int main(void) { cstack_int v = c_init(cstack_int, {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}); - // View data as contiguous memory representing 24 ints + // Create 1d span from a compatibel container ispan ms1 = cspan_from(&v); - // View the same data as a 3D array 2 x 3 x 4 + // Create a 3D mdspan 2 x 3 x 4 ispan3 ms3 = cspan_md(v.data, 2, 3, 4); puts("ms3:"); - for (int i=0; i != ms3.shape[0]; i++) { - for (int j=0; j != ms3.shape[1]; j++) { - for (int k=0; k != ms3.shape[2]; k++) { - printf(" %2d", *cspan_at(&ms3, i, j, k)); - } - puts(""); - } - puts(""); - } - puts("ss3 = ms3[:, 1:3, 1:3]"); - ispan3 ss3 = ms3; - ss3 = cspan_slice(ispan3, &ms3, {c_ALL}, {1,3}, {1,3}); + print3d(ms3); - for (int i=0; i != ss3.shape[0]; i++) { - for (int j=0; j != ss3.shape[1]; j++) { - for (int k=0; k != ss3.shape[2]; k++) { - printf(" %2d", *cspan_at(&ss3, i, j, k)); - } - puts(""); - } - puts(""); - } + // Take a slice of md3 + ispan3 ss3 = cspan_slice(ispan3, &ms3, {c_ALL}, {1,3}, {1,3}); + puts("ss3 = ms3[:, 1:3, 1:3]"); + print3d(ss3); puts("Iterate ss3 flat:"); - c_foreach (i, ispan3, ss3) - printf(" %d", *i.ref); + c_foreach (i, ispan3, ss3) printf(" %d", *i.ref); puts(""); - ispan2 ms2 = cspan_submd3(&ms3, 0); + // submd3 span reduces rank depending on number of arguments + ispan2 ms2 = cspan_submd3(&ms3, 1); - // write data using 2D view + // Change data on the 2d subspan for (int i=0; i != ms2.shape[0]; i++) for (int j=0; j != ms2.shape[1]; j++) - *cspan_at(&ms2, i, j) = i*1000 + j; + *cspan_at(&ms2, i, j) = (i + 1)*100 + j; + + puts("\nms2 = ms3[1] with updated data:"); + print2d(ms2); + puts(""); - puts("\nview data as 1D view:"); - for (int i=0; i != cspan_size(&ms1); i++) - printf(" %d", *cspan_at(&ms1, i)); + puts("\nOriginal s1 span with updated data:"); + c_foreach (i, ispan, ms1) printf(" %d", *i.ref); puts(""); - puts("iterate subspan ms3[1]:"); - ispan2 sub = cspan_submd3(&ms3, 1); - c_foreach (i, ispan2, sub) - printf(" %d", *i.ref); + puts("\nOriginal ms3 span with updated data:"); + print3d(ms3); puts(""); cstack_int_drop(&v); -- cgit v1.2.3 From 7cf924a174a509bee2162169adc5c82d011c7116 Mon Sep 17 00:00:00 2001 From: Tyge Lovset Date: Mon, 21 Aug 2023 08:58:24 +0200 Subject: Added kokkos submdspan example (did not make it to c++23). --- misc/examples/spans/submdspan.c | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 misc/examples/spans/submdspan.c (limited to 'misc/examples/spans') diff --git a/misc/examples/spans/submdspan.c b/misc/examples/spans/submdspan.c new file mode 100644 index 00000000..fa0d5762 --- /dev/null +++ b/misc/examples/spans/submdspan.c @@ -0,0 +1,44 @@ +// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2630r0.html +// C99: +#include +#include + +using_cspan3(span, double); // define span, span2, span3 + +// Set all elements of a rank-2 mdspan to zero. +void zero_2d(span2 grid2d) { + c_static_assert(cspan_rank(&grid2d) == 2); + for (int i = 0; i < grid2d.shape[0]; ++i) { + for (int j = 0; j < grid2d.shape[1]; ++j) { + *cspan_at(&grid2d, i,j) = 0; + } + } +} + +void zero_surface(span3 grid3d) { + c_static_assert(cspan_rank(&grid3d) == 3); + zero_2d(cspan_slice(span2, &grid3d, {0}, {c_ALL}, {c_ALL})); + zero_2d(cspan_slice(span2, &grid3d, {c_ALL}, {0}, {c_ALL})); + zero_2d(cspan_slice(span2, &grid3d, {c_ALL}, {c_ALL}, {0})); + zero_2d(cspan_slice(span2, &grid3d, {grid3d.shape[0]-1}, {c_ALL}, {c_ALL})); + zero_2d(cspan_slice(span2, &grid3d, {c_ALL}, {grid3d.shape[1]-1}, {c_ALL})); + zero_2d(cspan_slice(span2, &grid3d, {c_ALL}, {c_ALL}, {grid3d.shape[2]-1})); +} + +int main() { + double arr[3*4*5]; + for (int i=0; i Date: Mon, 21 Aug 2023 18:26:59 +0200 Subject: Some cleanups. --- include/stc/forward.h | 20 ++++++++-------- misc/benchmarks/various/cspan_bench.c | 43 ++++++++--------------------------- misc/examples/spans/submdspan.c | 8 +++---- 3 files changed, 24 insertions(+), 47 deletions(-) (limited to 'misc/examples/spans') diff --git a/include/stc/forward.h b/include/stc/forward.h index 2372a618..2fbff034 100644 --- a/include/stc/forward.h +++ b/include/stc/forward.h @@ -41,13 +41,13 @@ // csview : non-null terminated string view typedef const char csview_value; -typedef struct csview { - csview_value* buf; +typedef struct csview { + csview_value* buf; intptr_t size; } csview; -typedef union { - csview_value* ref; +typedef union { + csview_value* ref; csview chr; struct { csview chr; csview_value* end; } u8; } csview_iter; @@ -55,13 +55,13 @@ typedef union { // crawstr : null-terminated string view typedef csview_value crawstr_value; -typedef struct crawstr { - crawstr_value* str; +typedef struct crawstr { + crawstr_value* str; intptr_t size; } crawstr; -typedef union { - crawstr_value* ref; +typedef union { + crawstr_value* ref; csview chr; struct { csview chr; } u8; // [deprecated] } crawstr_iter; @@ -75,8 +75,8 @@ typedef union cstr { struct { cstr_value* data; size_t size, ncap; } lon; } cstr; -typedef union { - cstr_value* ref; +typedef union { + cstr_value* ref; csview chr; struct { csview chr; } u8; // [deprecated] } cstr_iter; diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c index b5caca83..3b1c3132 100644 --- a/misc/benchmarks/various/cspan_bench.c +++ b/misc/benchmarks/various/cspan_bench.c @@ -1,3 +1,4 @@ +// ref: https://stackoverflow.com/questions/74382366/why-is-iterating-over-stdrangesviewsjoin-so-slow #define NDEBUG #include #include @@ -11,6 +12,7 @@ enum { ny = 64, nz = 64 }; +// subspan 15x5x10: int lx = 15, ly = 10, lz = 5; int hx = 30, hy = 15, hz = 15; @@ -20,27 +22,7 @@ double Vin[nx * ny * nz]; //, 1.23; // define some slice indices for each dimension -static void MDRanges_setup(intptr_t n) -{ - double sum = 0; - clock_t t = clock(); - - for (intptr_t s = 0; s < n; ++s) - { - MD3 r_in = cspan_md(Vin, nx, ny, nz); - MD3 r_out = cspan_md(Vout, nx, ny, nz); - - r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); - r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); - MD3_iter i = MD3_begin(&r_in); // can be iterated "flat". - MD3_iter o = MD3_begin(&r_out); - sum += Vin[s % nx]; - } - t = clock() - t; - printf("setup: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); -} - -static void TraditionalForLoop(intptr_t n) +static void Traditional_for_loop(intptr_t n) { clock_t t = clock(); double sum = 0; @@ -57,7 +39,7 @@ static void TraditionalForLoop(intptr_t n) } } t = clock() - t; - printf("forloop: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); + printf("forloop : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } static void MDRanges_nested_loop(intptr_t n) @@ -67,8 +49,6 @@ static void MDRanges_nested_loop(intptr_t n) MD3 r_out = cspan_md(Vout, nx, ny, nz); r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); - - // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; } double sum = 0; for (intptr_t s = 0; s < n; ++s) { @@ -76,27 +56,25 @@ static void MDRanges_nested_loop(intptr_t n) for (int y = 0; y < r_in.shape[1]; ++y) { for (int z = 0; z < r_in.shape[2]; ++z) { - double d = *cspan_at(&r_in, x, y, z); - *cspan_at(&r_out, x, y, z) += d; + double d = *cspan_at(&r_in, x,y,z); + *cspan_at(&r_out, x,y,z) += d; sum += d; } } } } t = clock() - t; - printf("nested: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); + printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } static void MDRanges_loop_over_joined(intptr_t n) { + clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); MD3 r_out = cspan_md(Vout, nx, ny, nz); r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); - - // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; } double sum = 0; - clock_t t = clock(); for (intptr_t s = 0; s < n; ++s) { MD3_iter i = MD3_begin(&r_in); @@ -109,7 +87,7 @@ static void MDRanges_loop_over_joined(intptr_t n) } } t = clock() - t; - printf("joined: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); + printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } int main(void) @@ -118,8 +96,7 @@ int main(void) for (int i = 0; i < nx * ny * nz; ++i) Vin[i] = i + 1.23; - MDRanges_setup(n); - TraditionalForLoop(n); + Traditional_for_loop(n); MDRanges_nested_loop(n); MDRanges_loop_over_joined(n); } diff --git a/misc/examples/spans/submdspan.c b/misc/examples/spans/submdspan.c index fa0d5762..0752dfa1 100644 --- a/misc/examples/spans/submdspan.c +++ b/misc/examples/spans/submdspan.c @@ -3,11 +3,11 @@ #include #include -using_cspan3(span, double); // define span, span2, span3 +using_cspan3(span, double); // shorthand for defining span, span2, span3 // Set all elements of a rank-2 mdspan to zero. void zero_2d(span2 grid2d) { - c_static_assert(cspan_rank(&grid2d) == 2); + (void)c_static_assert(cspan_rank(&grid2d) == 2); for (int i = 0; i < grid2d.shape[0]; ++i) { for (int j = 0; j < grid2d.shape[1]; ++j) { *cspan_at(&grid2d, i,j) = 0; @@ -16,7 +16,7 @@ void zero_2d(span2 grid2d) { } void zero_surface(span3 grid3d) { - c_static_assert(cspan_rank(&grid3d) == 3); + (void)c_static_assert(cspan_rank(&grid3d) == 3); zero_2d(cspan_slice(span2, &grid3d, {0}, {c_ALL}, {c_ALL})); zero_2d(cspan_slice(span2, &grid3d, {c_ALL}, {0}, {c_ALL})); zero_2d(cspan_slice(span2, &grid3d, {c_ALL}, {c_ALL}, {0})); @@ -41,4 +41,4 @@ int main() { } puts(""); } -} \ No newline at end of file +} -- cgit v1.2.3 From 2d33308d36063f3726f3652b0b0cbe3668b8bc68 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Mon, 21 Aug 2023 18:31:49 +0200 Subject: Changed 'order' to 'layout' in cspan md. Neigher the 'C' / 'F' convension from python, nor left / right from std::mdspan are great names => changed to c_ROWMAJOR / c_COLMAJOR like in matlab. --- docs/cspan_api.md | 18 +++++++++++------- include/stc/cspan.h | 23 ++++++++++++++--------- misc/examples/spans/matmult.c | 4 ++-- misc/examples/spans/mdspan.c | 2 +- 4 files changed, 28 insertions(+), 19 deletions(-) (limited to 'misc/examples/spans') diff --git a/docs/cspan_api.md b/docs/cspan_api.md index 39b97473..c3556dc3 100644 --- a/docs/cspan_api.md +++ b/docs/cspan_api.md @@ -44,12 +44,15 @@ SpanTypeN_iter SpanType_begin(const SpanTypeN* self); SpanTypeN_iter SpanType_end(const SpanTypeN* self); void SpanType_next(SpanTypeN_iter* it); -SpanTypeN cspan_md(ValueType* data, d1, d2, ...); // make a multi-dim cspan, row-major order. -SpanTypeN cspan_md_order(char order, ValueType* data, d1, d2, ...); // order='C': row-major, 'F': column-major (FORTRAN). + // make a multi-dim cspan +SpanTypeN cspan_md(ValueType* data, d1, d2, ...); // row-major +SpanTypeN cspan_md_layout(cspan_layout layout, ValueType* data, d1, d2, ...); - // transpose a md span (inverse axes). No changes to the underlying array. + // transpose a md span. Inverses layout and axes only. void cspan_transpose(const SpanTypeN* self); -bool cspan_is_order_F(const SpanTypeN* self); +cspan_layout cspan_get_layout(const SpanTypeN* self); +bool cspan_is_rowmajor(const SpanTypeN* self); +bool cspan_is_colmajor(const SpanTypeN* self); // create a subspan of input span rank. Like e.g. cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL}); SpanType cspan_subspan(const SpanType* span, intptr_t offset, intptr_t count); @@ -70,8 +73,9 @@ OutSpanN cspan_slice(TYPE OutSpanN, const SpanTypeM* parent, {x0,x1}, {y0 |:------------------|:----------------------------------------------------|:---------------------| | SpanTypeN | `struct { ValueType *data; uint32_t shape[N]; .. }` | SpanType with rank N | | SpanTypeN`_value` | `ValueType` | The ValueType | -| `c_ALL` | Use with `cspan_slice()`. | Full extent | -| `c_END` | " | End of extent | +| `c_ALL` | `cspan_slice(&md, {1,3}, {c_ALL})` | Full extent | +| `c_END` | `cspan_slice(&md, {1,c_END}, {2,c_END})` | End of extent | +| `cspan_layout` | `enum { c_ROWMAJOR, c_COLMAJOR }` | Multi-dim layout | ## Example 1 @@ -182,7 +186,7 @@ void print_span(myspan2 ms) { int main(void) { int arr[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24}; - myspan3 ms3 = cspan_md_order('C', arr, 2, 3, 4); // row-major ('F' column-major) + myspan3 ms3 = cspan_md(arr, 2, 3, 4); // row-major layout myspan3 ss3 = cspan_slice(myspan3, &ms3, {c_ALL}, {0,3}, {2,c_END}); myspan2 a = cspan_submd3(&ss3, 1); myspan2 b = a; diff --git a/include/stc/cspan.h b/include/stc/cspan.h index 6f8de8ec..f9f3b02a 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -98,7 +98,7 @@ int demo2() { } \ STC_INLINE void Self##_next(Self##_iter* it) { \ int i, inc, done; \ - if (it->_s->stride.d[0] < it->_s->stride.d[RANK - 1]) i=0, inc=1; else i=RANK-1, inc=-1; \ + if (cspan_is_colmajor(it->_s)) i=0, inc=1; else i=RANK-1, inc=-1; \ it->ref += _cspan_next##RANK(it->pos, it->_s->shape, it->_s->stride.d, RANK, i, inc, &done); \ if (done) it->ref = NULL; \ } \ @@ -115,6 +115,7 @@ using_cspan_tuple(7); using_cspan_tuple(8); #define c_END -1 #define c_ALL 0,c_END +typedef enum {c_ROWMAJOR, c_COLMAJOR} cspan_layout; /* Use cspan_init() for static initialization only. c_init() for non-static init. */ #define cspan_init(SpanType, ...) \ @@ -132,7 +133,9 @@ using_cspan_tuple(7); using_cspan_tuple(8); #define cspan_size(self) _cspan_size((self)->shape, cspan_rank(self)) #define cspan_rank(self) c_arraylen((self)->shape) -#define cspan_is_order_F(self) ((self)->stride.d[0] < (self)->stride.d[cspan_rank(self) - 1]) +#define cspan_is_colmajor(self) ((self)->stride.d[0] < (self)->stride.d[cspan_rank(self) - 1]) +#define cspan_is_rowmajor(self) (!cspan_is_colmajor(self)) +#define cspan_get_layout(self) (cspan_is_colmajor(self) ? c_COLMAJOR : c_ROWMAJOR) #define cspan_index(self, ...) c_PASTE(cspan_idx_, c_NUMARGS(__VA_ARGS__))(self, __VA_ARGS__) #define cspan_at(self, ...) ((self)->data + cspan_index(self, __VA_ARGS__)) #define cspan_front(self) ((self)->data) @@ -165,10 +168,10 @@ using_cspan_tuple(7); using_cspan_tuple(8); #define cspan_submd4_4(self, x, y, z) \ {.data=cspan_at(self, x, y, z, 0), .shape={(self)->shape[3]}, .stride=(cspan_tuple1){.d={(self)->stride.d[3]}}} -#define cspan_md(array, ...) cspan_md_order('C', array, __VA_ARGS__) -#define cspan_md_order(order, array, ...) /* order='C' or 'F' */ \ +#define cspan_md(array, ...) cspan_md_layout(c_ROWMAJOR, array, __VA_ARGS__) +#define cspan_md_layout(layout, array, ...) \ {.data=array, .shape={__VA_ARGS__}, \ - .stride=*(c_PASTE(cspan_tuple, c_NUMARGS(__VA_ARGS__))*)_cspan_shape2stride(order, ((int32_t[]){__VA_ARGS__}), c_NUMARGS(__VA_ARGS__))} + .stride=*(c_PASTE(cspan_tuple, c_NUMARGS(__VA_ARGS__))*)_cspan_shape2stride(layout, ((int32_t[]){__VA_ARGS__}), c_NUMARGS(__VA_ARGS__))} #define cspan_transpose(self) \ _cspan_transpose((self)->shape, (self)->stride.d, cspan_rank(self)) @@ -225,7 +228,7 @@ STC_INLINE intptr_t _cspan_idxN(int rank, const int32_t shape[], const int32_t s } STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc, int* done); -#define _cspan_next1(pos, shape, stride, rank, i, inc, done) (*done = ++pos[0]==shape[0], stride[0]) +#define _cspan_next1(pos, shape, stride, rank, i, inc, done) (*done = ++pos[0]==shape[0], (void)(i|inc), stride[0]) #define _cspan_next3 _cspan_next2 #define _cspan_next4 _cspan_next2 #define _cspan_next5 _cspan_next2 @@ -237,7 +240,7 @@ STC_API intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank, const int32_t shape[], const int32_t stride[], int rank, const int32_t a[][2]); -STC_API int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank); +STC_API int32_t* _cspan_shape2stride(cspan_layout layout, int32_t shape[], int rank); #endif // STC_CSPAN_H_INCLUDED /* --------------------- IMPLEMENTATION --------------------- */ @@ -246,6 +249,7 @@ STC_API int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank); STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc, int* done) { intptr_t off = stride[i]; ++pos[i]; + while (--rank && pos[i] == shape[i]) { pos[i] = 0; ++pos[i + inc]; off += stride[i + inc] - stride[i]*shape[i]; @@ -255,9 +259,9 @@ STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_ return off; } -STC_DEF int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank) { +STC_DEF int32_t* _cspan_shape2stride(cspan_layout layout, int32_t shape[], int rank) { int i, inc; - if (order == 'F') i = 0, inc = 1; + if (layout == c_COLMAJOR) i = 0, inc = 1; else i = rank - 1, inc = -1; int32_t k = 1, s1 = shape[i], s2; @@ -277,6 +281,7 @@ STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank, intptr_t off = 0; int i = 0, oi = 0; int32_t end; + for (; i < rank; ++i) { off += stride[i]*a[i][0]; switch (a[i][1]) { diff --git a/misc/examples/spans/matmult.c b/misc/examples/spans/matmult.c index 35dad7a9..266fa121 100644 --- a/misc/examples/spans/matmult.c +++ b/misc/examples/spans/matmult.c @@ -70,8 +70,8 @@ int main(void) Values_push(&values, (crandf() - 0.5)*4.0); double out[D1*D2]; - Mat3 data = cspan_md_order('C', values.data, N, D1, D2); - OutMat c = cspan_md_order('C', out, D1, D2); + Mat3 data = cspan_md_layout(c_ROWMAJOR, values.data, N, D1, D2); + OutMat c = cspan_md_layout(c_ROWMAJOR, out, D1, D2); Mat2 a = cspan_submd3(&data, 0); double sum = 0.0; clock_t t = clock(); diff --git a/misc/examples/spans/mdspan.c b/misc/examples/spans/mdspan.c index db601850..630ffddb 100644 --- a/misc/examples/spans/mdspan.c +++ b/misc/examples/spans/mdspan.c @@ -9,7 +9,7 @@ int main(void) { double* data = c_new_n(double, nx*ny*nz); printf("\nMultidim span ms[5, 4, 3], fortran ordered"); - DSpan3 ms = cspan_md_order('F', data, nx, ny, nz); // Fortran, not 'C' + DSpan3 ms = cspan_md_layout(c_COLMAJOR, data, nx, ny, nz); int idx = 0; for (int i = 0; i < ms.shape[0]; ++i) -- cgit v1.2.3 From 263dbab626e6a99f7959a4c1716f0496906ee638 Mon Sep 17 00:00:00 2001 From: Tyge Lovset Date: Wed, 30 Aug 2023 08:24:54 +0200 Subject: Some simplifications. Added i_ndebug macro flag to disable assertions in cspan. --- include/stc/ccommon.h | 6 ++---- include/stc/cspan.h | 42 +++++++++++++++--------------------------- misc/examples/spans/multidim.c | 4 ++++ 3 files changed, 21 insertions(+), 31 deletions(-) (limited to 'misc/examples/spans') diff --git a/include/stc/ccommon.h b/include/stc/ccommon.h index 6e90f75a..ad92212a 100644 --- a/include/stc/ccommon.h +++ b/include/stc/ccommon.h @@ -69,16 +69,14 @@ typedef long long _llong; #define c_new(T, ...) ((T*)memcpy(malloc(sizeof(T)), ((T[]){__VA_ARGS__}), sizeof(T))) #define c_LITERAL(T) (T) #endif -#define c_new_n(T, n) ((T*)malloc(sizeof(T)*(size_t)(n))) +#define c_new_n(T, n) ((T*)malloc(sizeof(T)*c_i2u(n))) #define c_malloc(sz) malloc(c_i2u(sz)) #define c_calloc(n, sz) calloc(c_i2u(n), c_i2u(sz)) #define c_realloc(p, sz) realloc(p, c_i2u(sz)) #define c_free(p) free(p) #define c_delete(T, ptr) do { T *_tp = ptr; T##_drop(_tp); free(_tp); } while (0) -#define c_static_assert(...) c_MACRO_OVERLOAD(c_static_assert, __VA_ARGS__) -#define c_static_assert_1(b) ((int)(0*sizeof(int[(b) ? 1 : -1]))) -#define c_static_assert_2(b, m) c_static_assert_1(b) +#define c_static_assert(expr) (1 ? 0 : (int)sizeof(int[(expr) ? 1 : -1])) #if defined STC_NDEBUG || defined NDEBUG #define c_assert(expr) ((void)0) #else diff --git a/include/stc/cspan.h b/include/stc/cspan.h index 8a422dad..3f2b300f 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -63,6 +63,12 @@ int demo2() { #include "priv/linkage.h" #include "ccommon.h" +#ifdef i_ndebug + #define cspan_assert(x) ((void)0) +#else + #define cspan_assert(x) c_assert(x) +#endif + #define using_cspan(...) c_MACRO_OVERLOAD(using_cspan, __VA_ARGS__) #define using_cspan_2(Self, T) \ using_cspan_3(Self, T, 1); \ @@ -85,7 +91,7 @@ int demo2() { const int rank, const int32_t a[][2]) { \ Self s; int outrank; \ s.data = d + _cspan_slice(s.shape, s.stride.d, &outrank, shape, stri, rank, a); \ - c_assert(outrank == RANK); \ + cspan_assert(outrank == RANK); \ return s; \ } \ STC_INLINE Self##_iter Self##_begin(const Self* self) { \ @@ -135,10 +141,12 @@ typedef enum {c_ROWMAJOR, c_COLMAJOR} cspan_layout; #define cspan_is_colmajor(self) ((self)->stride.d[0] < (self)->stride.d[cspan_rank(self) - 1]) #define cspan_is_rowmajor(self) (!cspan_is_colmajor(self)) #define cspan_get_layout(self) (cspan_is_colmajor(self) ? c_COLMAJOR : c_ROWMAJOR) -#define cspan_index(self, ...) c_PASTE(cspan_idx_, c_NUMARGS(__VA_ARGS__))(self, __VA_ARGS__) #define cspan_at(self, ...) ((self)->data + cspan_index(self, __VA_ARGS__)) #define cspan_front(self) ((self)->data) #define cspan_back(self) ((self)->data + cspan_size(self) - 1) +#define cspan_index(self, ...) \ + (_cspan_index(c_NUMARGS(__VA_ARGS__), (self)->shape, (self)->stride.d, (int32_t[]){__VA_ARGS__}) + \ + c_static_assert(cspan_rank(self) == c_NUMARGS(__VA_ARGS__))) // general // cspan_subspanX: (X <= 3) optimized. Similar to cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL}); #define cspan_subspan(self, offset, count) \ @@ -183,17 +191,6 @@ typedef enum {c_ROWMAJOR, c_COLMAJOR} cspan_layout; /* ------------------- PRIVAT DEFINITIONS ------------------- */ -// cspan_index() helpers: -#define cspan_idx_1 cspan_idx_3 -#define cspan_idx_2 cspan_idx_3 -#define cspan_idx_3(self, ...) \ - c_PASTE(_cspan_idx, c_NUMARGS(__VA_ARGS__))((self)->shape, (self)->stride, __VA_ARGS__) // small/fast -#define cspan_idx_4(self, ...) \ - (_cspan_idxN(c_NUMARGS(__VA_ARGS__), (self)->shape, (self)->stride.d, (int32_t[]){__VA_ARGS__}) + \ - c_static_assert(cspan_rank(self) == c_NUMARGS(__VA_ARGS__))) // general -#define cspan_idx_5 cspan_idx_4 -#define cspan_idx_6 cspan_idx_4 - STC_INLINE intptr_t _cspan_size(const int32_t shape[], int rank) { intptr_t sz = shape[0]; while (--rank > 0) sz *= shape[rank]; @@ -207,20 +204,10 @@ STC_INLINE void _cspan_transpose(int32_t shape[], int32_t stride[], int rank) { } } -STC_INLINE intptr_t _cspan_idx1(const int32_t shape[1], const cspan_tuple1 stri, int32_t x) - { c_assert(c_LTu(x, shape[0])); return (intptr_t)stri.d[0]*x; } - -STC_INLINE intptr_t _cspan_idx2(const int32_t shape[2], const cspan_tuple2 stri, int32_t x, int32_t y) - { c_assert(c_LTu(x, shape[0]) && c_LTu(y, shape[1])); return (intptr_t)stri.d[0]*x + stri.d[1]*y; } - -STC_INLINE intptr_t _cspan_idx3(const int32_t shape[3], const cspan_tuple3 stri, int32_t x, int32_t y, int32_t z) { - c_assert(c_LTu(x, shape[0]) && c_LTu(y, shape[1]) && c_LTu(z, shape[2])); - return (intptr_t)stri.d[0]*x + stri.d[1]*y + stri.d[2]*z; -} -STC_INLINE intptr_t _cspan_idxN(int rank, const int32_t shape[], const int32_t stride[], const int32_t a[]) { +STC_INLINE intptr_t _cspan_index(int rank, const int32_t shape[], const int32_t stride[], const int32_t a[]) { intptr_t off = 0; while (rank--) { - c_assert(c_LTu(a[rank], shape[rank])); + cspan_assert(c_LTu(a[rank], shape[rank])); off += stride[rank]*a[rank]; } return off; @@ -283,13 +270,13 @@ STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank, for (; i < rank; ++i) { off += stride[i]*a[i][0]; switch (a[i][1]) { - case 0: c_assert(c_LTu(a[i][0], shape[i])); continue; + case 0: cspan_assert(c_LTu(a[i][0], shape[i])); continue; case -1: end = shape[i]; break; default: end = a[i][1]; } oshape[oi] = end - a[i][0]; ostride[oi] = stride[i]; - c_assert(c_LTu(0, oshape[oi]) & !c_LTu(shape[i], end)); + cspan_assert(c_LTu(0, oshape[oi]) & !c_LTu(shape[i], end)); ++oi; } *orank = oi; @@ -297,6 +284,7 @@ STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank, } #endif +#undef i_ndebug #undef i_opt #undef i_header #undef i_implement diff --git a/misc/examples/spans/multidim.c b/misc/examples/spans/multidim.c index ebc05a70..70fda7e2 100644 --- a/misc/examples/spans/multidim.c +++ b/misc/examples/spans/multidim.c @@ -66,6 +66,10 @@ int main(void) puts("\nOriginal ms3 span with updated data:"); print3d(ms3); + + puts("col = ms3[1, :, 2]"); + ispan col = cspan_slice(ispan, &ms3, {1}, {c_ALL}, {2}); + c_foreach (i, ispan, col) printf(" %d", *i.ref); puts(""); cstack_int_drop(&v); -- cgit v1.2.3 From 80cd2adc2cd008aeee9f799f2dd5042f42b4ec82 Mon Sep 17 00:00:00 2001 From: Tyge Løvset Date: Wed, 30 Aug 2023 17:16:03 +0200 Subject: Smaller updates. --- include/stc/cspan.h | 20 ++++++---------- misc/benchmarks/various/cspan_bench.c | 43 ++++++++++++++++++----------------- misc/examples/spans/matmult.c | 22 +++++++++--------- src/libstc.c | 5 ++-- 4 files changed, 42 insertions(+), 48 deletions(-) (limited to 'misc/examples/spans') diff --git a/include/stc/cspan.h b/include/stc/cspan.h index 3f2b300f..e72bb97a 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -63,12 +63,6 @@ int demo2() { #include "priv/linkage.h" #include "ccommon.h" -#ifdef i_ndebug - #define cspan_assert(x) ((void)0) -#else - #define cspan_assert(x) c_assert(x) -#endif - #define using_cspan(...) c_MACRO_OVERLOAD(using_cspan, __VA_ARGS__) #define using_cspan_2(Self, T) \ using_cspan_3(Self, T, 1); \ @@ -91,7 +85,7 @@ int demo2() { const int rank, const int32_t a[][2]) { \ Self s; int outrank; \ s.data = d + _cspan_slice(s.shape, s.stride.d, &outrank, shape, stri, rank, a); \ - cspan_assert(outrank == RANK); \ + c_assert(outrank == RANK); \ return s; \ } \ STC_INLINE Self##_iter Self##_begin(const Self* self) { \ @@ -193,7 +187,7 @@ typedef enum {c_ROWMAJOR, c_COLMAJOR} cspan_layout; STC_INLINE intptr_t _cspan_size(const int32_t shape[], int rank) { intptr_t sz = shape[0]; - while (--rank > 0) sz *= shape[rank]; + while (--rank) sz *= shape[rank]; return sz; } @@ -207,14 +201,15 @@ STC_INLINE void _cspan_transpose(int32_t shape[], int32_t stride[], int rank) { STC_INLINE intptr_t _cspan_index(int rank, const int32_t shape[], const int32_t stride[], const int32_t a[]) { intptr_t off = 0; while (rank--) { - cspan_assert(c_LTu(a[rank], shape[rank])); + c_assert(c_LTu(a[rank], shape[rank])); off += stride[rank]*a[rank]; } return off; } -STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done); #define _cspan_next1(pos, shape, stride, rank, done) (*done = ++pos[0]==shape[0], stride[0]) +STC_API intptr_t + _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done); #define _cspan_next3 _cspan_next2 #define _cspan_next4 _cspan_next2 #define _cspan_next5 _cspan_next2 @@ -270,13 +265,13 @@ STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank, for (; i < rank; ++i) { off += stride[i]*a[i][0]; switch (a[i][1]) { - case 0: cspan_assert(c_LTu(a[i][0], shape[i])); continue; + case 0: c_assert(c_LTu(a[i][0], shape[i])); continue; case -1: end = shape[i]; break; default: end = a[i][1]; } oshape[oi] = end - a[i][0]; ostride[oi] = stride[i]; - cspan_assert(c_LTu(0, oshape[oi]) & !c_LTu(shape[i], end)); + c_assert((oshape[oi] > 0) & !c_LTu(shape[i], end)); ++oi; } *orank = oi; @@ -284,7 +279,6 @@ STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank, } #endif -#undef i_ndebug #undef i_opt #undef i_header #undef i_implement diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c index 3b1c3132..bfc0ead3 100644 --- a/misc/benchmarks/various/cspan_bench.c +++ b/misc/benchmarks/various/cspan_bench.c @@ -42,7 +42,7 @@ static void Traditional_for_loop(intptr_t n) printf("forloop : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -static void MDRanges_nested_loop(intptr_t n) +static void MDRanges_loop_over_joined(intptr_t n) { clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); @@ -52,22 +52,20 @@ static void MDRanges_nested_loop(intptr_t n) double sum = 0; for (intptr_t s = 0; s < n; ++s) { - for (int x = 0; x < r_in.shape[0]; ++x) { - for (int y = 0; y < r_in.shape[1]; ++y) { - for (int z = 0; z < r_in.shape[2]; ++z) - { - double d = *cspan_at(&r_in, x,y,z); - *cspan_at(&r_out, x,y,z) += d; - sum += d; - } - } + MD3_iter i = MD3_begin(&r_in); + MD3_iter o = MD3_begin(&r_out); + + for (; i.ref; MD3_next(&i), MD3_next(&o)) + { + *o.ref += *i.ref; + sum += *i.ref; } } t = clock() - t; - printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); + printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -static void MDRanges_loop_over_joined(intptr_t n) +static void MDRanges_nested_loop(intptr_t n) { clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); @@ -77,19 +75,22 @@ static void MDRanges_loop_over_joined(intptr_t n) double sum = 0; for (intptr_t s = 0; s < n; ++s) { - MD3_iter i = MD3_begin(&r_in); - MD3_iter o = MD3_begin(&r_out); - - for (; i.ref; MD3_next(&i), MD3_next(&o)) - { - *o.ref += *i.ref; - sum += *i.ref; + for (int x = 0; x < r_in.shape[0]; ++x) { + for (int y = 0; y < r_in.shape[1]; ++y) { + for (int z = 0; z < r_in.shape[2]; ++z) + { + double d = *cspan_at(&r_in, x,y,z); + *cspan_at(&r_out, x,y,z) += d; + sum += d; + } + } } } t = clock() - t; - printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); + printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } + int main(void) { intptr_t n = 100000; @@ -97,6 +98,6 @@ int main(void) Vin[i] = i + 1.23; Traditional_for_loop(n); - MDRanges_nested_loop(n); MDRanges_loop_over_joined(n); + MDRanges_nested_loop(n); } diff --git a/misc/examples/spans/matmult.c b/misc/examples/spans/matmult.c index 266fa121..ec992ff9 100644 --- a/misc/examples/spans/matmult.c +++ b/misc/examples/spans/matmult.c @@ -37,7 +37,7 @@ void base_case_matrix_product(Mat2 A, Mat2 B, OutMat C) void recursive_matrix_product(Mat2 A, Mat2 B, OutMat C) { // Some hardware-dependent constant - enum {recursion_threshold = 16}; + enum {recursion_threshold = 32}; if (C.shape[0] <= recursion_threshold || C.shape[1] <= recursion_threshold) { base_case_matrix_product(A, B, C); } else { @@ -63,28 +63,28 @@ void recursive_matrix_product(Mat2 A, Mat2 B, OutMat C) int main(void) { - enum {N = 10, D1 = 256, D2 = D1}; + enum {N = 10, D = 256}; Values values = {0}; - for (int i=0; i < N*D1*D2; ++i) + for (int i=0; i < N*D*D; ++i) Values_push(&values, (crandf() - 0.5)*4.0); - double out[D1*D2]; - Mat3 data = cspan_md_layout(c_ROWMAJOR, values.data, N, D1, D2); - OutMat c = cspan_md_layout(c_ROWMAJOR, out, D1, D2); + double out[D*D]; + Mat3 data = cspan_md_layout(c_ROWMAJOR, values.data, N, D, D); + OutMat c = cspan_md_layout(c_COLMAJOR, out, D, D); Mat2 a = cspan_submd3(&data, 0); - double sum = 0.0; - clock_t t = clock(); + clock_t t = clock(); for (int i=1; i= 201112L -# define i_implement # include "../include/c11/fmt.h" #endif -- cgit v1.2.3