diff options
| author | tylov <[email protected]> | 2023-07-11 16:36:55 +0200 |
|---|---|---|
| committer | tylov <[email protected]> | 2023-07-11 16:36:55 +0200 |
| commit | afc968975a057f5b2653e3cfa51ef2eff83a8d5b (patch) | |
| tree | f0100b3ae35cd2a77a9648812e6e2ce198676f47 | |
| parent | 8debe47bc014c41b6cf8082dcef4b87e4ef29cfa (diff) | |
| download | STC-modified-afc968975a057f5b2653e3cfa51ef2eff83a8d5b.tar.gz STC-modified-afc968975a057f5b2653e3cfa51ef2eff83a8d5b.zip | |
Internal updates and doc reorg.
| -rw-r--r-- | docs/cspan_api.md | 41 | ||||
| -rw-r--r-- | include/stc/cspan.h | 72 | ||||
| -rw-r--r-- | misc/benchmarks/various/cspan_bench.c | 14 | ||||
| -rw-r--r-- | misc/benchmarks/various/string_bench_STD.cpp | 1 | ||||
| -rw-r--r-- | misc/examples/multidim.c | 1 |
5 files changed, 67 insertions, 62 deletions
diff --git a/docs/cspan_api.md b/docs/cspan_api.md index 58b06af0..e2636086 100644 --- a/docs/cspan_api.md +++ b/docs/cspan_api.md @@ -29,7 +29,11 @@ a compile error is issued. Runtime bounds checks are enabled by default (define SpanType cspan_init(T SpanType, {v1, v2, ...}); // make a 1-d cspan from values SpanType cspan_from(STCContainer* cnt); // make a 1-d cspan from compatible STC container SpanType cspan_from_array(ValueType array[]); // make a 1-d cspan from C array -SpanTypeN cspan_md(char order, ValueType* data, d1, d2, ...); // make a multi-dim cspan. order: 'C' or 'F' (Fortran) + + // make a subspan of input span rank. Like e.g. cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL}); +SpanType cspan_subspan(const SpanType* span, intptr_t offset, intptr_t count); +SpanType2 cspan_subspan2(const SpanType2* span, intptr_t offset, intptr_t count); +SpanType3 cspan_subspan3(const SpanType3* span, intptr_t offset, intptr_t count); intptr_t cspan_size(const SpanTypeN* self); // return number of elements intptr_t cspan_rank(const SpanTypeN* self); // dimensions; compile time constant @@ -39,35 +43,30 @@ ValueType* cspan_at(const SpanTypeN* self, intptr_t x, ...); // #args mus ValueType* cspan_front(const SpanTypeN* self); ValueType* cspan_back(const SpanTypeN* self); - // general index slicing to create a subspan. - // {i} reduces rank. {i,c_END} slice to end. {c_ALL} use the full extent. -SpanTypeR cspan_slice(T SpanTypeR, const SpanTypeN* self, {x0,x1}, {y0,y1}.., {N0,N1}); +SpanTypeN_iter SpanType_begin(const SpanTypeN* self); +SpanTypeN_iter SpanType_end(const SpanTypeN* self); +void SpanType_next(SpanTypeN_iter* it); +SpanTypeN cspan_md(char order, ValueType* data, d1, d2, ...); // make a multi-dim cspan. order: 'C' or 'F' (Fortran) // transpose the md span (inverse axes). no changes to the underlying array. void cspan_transpose(const SpanTypeN* self); - - // create a subspan of lower rank. Like e.g. cspan_slice(Span2, &ms4, {x}, {y}, {c_ALL}, {c_ALL}); -SpanType cspan_submd2(const SpanType2* self, intptr_t x); // return a 1d subspan from a 2d span. -SpanTypeN cspan_submd3(const SpanType3* self, intptr_t x, ...); // return a 1d or 2d subspan from a 3d span. -SpanTypeN cspan_submd4(const SpanType4* self, intptr_t x, ...); // number of args determines rank of output span. - // create a subspan of same rank. Like e.g. cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL}); -SpanType cspan_subspan(const SpanType* self, intptr_t offset, intptr_t count); -SpanType2 cspan_subspan2(const SpanType2* self, intptr_t offset, intptr_t count); -SpanType3 cspan_subspan3(const SpanType3* self, intptr_t offset, intptr_t count); + // create a sub md span of lower rank. Like e.g. cspan_slice(Span2, &ms4, {x}, {y}, {c_ALL}, {c_ALL}); +OutSpan1 cspan_submd2(const SpanType2* parent, intptr_t x); // return a 1d subspan from a 2d span. +OutSpanN cspan_submd3(const SpanType3* parent, intptr_t x, ...); // return a 1d or 2d subspan from a 3d span. +OutSpanN cspan_submd4(const SpanType4* parent, intptr_t x, ...); // number of args decides rank of output span. -SpanTypeN_iter SpanType_begin(const SpanTypeN* self); -SpanTypeN_iter SpanType_end(const SpanTypeN* self); -void SpanType_next(SpanTypeN_iter* it); + // general slicing of an md span. + // {i}: reduce rank. {i,c_END}: slice to end. {c_ALL}: use full extent. +OutSpanN cspan_slice(TYPE OutSpanN, const SpanTypeN* parent, {x0,x1}, {y0,y1}.., {N0,N1}); ``` -## Types - -| Type name | Type definition | Used to represent... | +## TypesPd +| Type name | Type definition / usage | Used to represent... | |:------------------|:----------------------------------------------------|:---------------------| | SpanTypeN | `struct { ValueType *data; uint32_t shape[N]; .. }` | SpanType with rank N | | SpanTypeN`_value` | `ValueType` | The ValueType | -| `c_ALL` | | Full extent | -| `c_END` | | End of extent | +| `c_ALL` | Use with `cspan_slice()`. | Full extent | +| `c_END` | " | End of extent | ## Example 1 diff --git a/include/stc/cspan.h b/include/stc/cspan.h index 7a0e9c8d..027b5275 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -107,58 +107,37 @@ int demo2() { #define using_cspan2(Self, T) using_cspan_3(Self, T, 1); using_cspan_3(Self##2, T, 2) #define using_cspan3(Self, T) using_cspan2(Self, T); using_cspan_3(Self##3, T, 3) #define using_cspan4(Self, T) using_cspan3(Self, T); using_cspan_3(Self##4, T, 4) -typedef struct { int32_t d[1]; } cspan_tuple1; -typedef struct { int32_t d[2]; } cspan_tuple2; -typedef struct { int32_t d[3]; } cspan_tuple3; -typedef struct { int32_t d[4]; } cspan_tuple4; -typedef struct { int32_t d[5]; } cspan_tuple5; -typedef struct { int32_t d[6]; } cspan_tuple6; +#define using_cspan_tuple(N) typedef struct { int32_t d[N]; } cspan_tuple##N +using_cspan_tuple(1); using_cspan_tuple(2); +using_cspan_tuple(3); using_cspan_tuple(4); +using_cspan_tuple(5); using_cspan_tuple(6); +using_cspan_tuple(7); using_cspan_tuple(8); + #define c_END -1 #define c_ALL 0,-1 -#define cspan_md(order, array, ...) \ - {.data=array, .shape={__VA_ARGS__}, \ - .stride=*(c_PASTE(cspan_tuple, c_NUMARGS(__VA_ARGS__))*)_cspan_shape2stride(order, ((int32_t[]){__VA_ARGS__}), c_NUMARGS(__VA_ARGS__))} - -#define cspan_transpose(self) \ - _cspan_transpose((self)->shape, (self)->stride.d, cspan_rank(self)) - /* Use cspan_init() for static initialization only. c_init() for non-static init. */ #define cspan_init(SpanType, ...) \ {.data=(SpanType##_value[])__VA_ARGS__, .shape={sizeof((SpanType##_value[])__VA_ARGS__)/sizeof(SpanType##_value)}, .stride={.d={1}}} -#define cspan_slice(OutSpan, parent, ...) \ - OutSpan##_slice_((parent)->data, (parent)->shape, (parent)->stride.d, cspan_rank(parent) + \ - c_static_assert(cspan_rank(parent) == sizeof((int32_t[][2]){__VA_ARGS__})/sizeof(int32_t[2])), \ - (const int32_t[][2]){__VA_ARGS__}) - /* create a cspan from a cvec, cstack, cdeq, cqueue, or cpque (heap) */ #define cspan_from(container) \ {.data=(container)->data, .shape={(int32_t)(container)->_len}, .stride={.d={1}}} #define cspan_from_array(array) \ - {.data=(array) + c_static_assert(sizeof(array) != sizeof(void*)), .shape={c_arraylen(array)}, .stride={.d={1}}} + {.data=(array), .shape={c_arraylen(array)}, .stride={.d={1}}} #define cspan_size(self) _cspan_size((self)->shape, cspan_rank(self)) #define cspan_rank(self) c_arraylen((self)->shape) #define cspan_is_order_F(self) ((self)->stride.d[0] < (self)->stride.d[cspan_rank(self) - 1]) #define cspan_index(self, ...) c_PASTE(cspan_idx_, c_NUMARGS(__VA_ARGS__))(self, __VA_ARGS__) -#define cspan_idx_1 cspan_idx_3 -#define cspan_idx_2 cspan_idx_3 -#define cspan_idx_3(self, ...) \ - c_PASTE(_cspan_idx, c_NUMARGS(__VA_ARGS__))((self)->shape, (self)->stride, __VA_ARGS__) // small/fast -#define cspan_idx_4(self, ...) \ - (_cspan_idxN(c_NUMARGS(__VA_ARGS__), (self)->shape, (self)->stride.d, (int32_t[]){__VA_ARGS__}) + \ - c_static_assert(cspan_rank(self) == c_NUMARGS(__VA_ARGS__))) // general -#define cspan_idx_5 cspan_idx_4 -#define cspan_idx_6 cspan_idx_4 #define cspan_at(self, ...) ((self)->data + cspan_index(self, __VA_ARGS__)) #define cspan_front(self) ((self)->data) #define cspan_back(self) ((self)->data + cspan_size(self) - 1) -// cspan_subspanN. (N<=3) Optimized, same as e.g. cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL}); +// cspan_subspanX: (X <= 3) optimized. Similar to cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL}); #define cspan_subspan(self, offset, count) \ {.data=cspan_at(self, offset), .shape={count}, .stride=(self)->stride} #define cspan_subspan2(self, offset, count) \ @@ -166,16 +145,17 @@ typedef struct { int32_t d[6]; } cspan_tuple6; #define cspan_subspan3(self, offset, count) \ {.data=cspan_at(self, offset, 0, 0), .shape={count, (self)->shape[1], (self)->shape[2]}, .stride=(self)->stride} -// cspan_submdN: reduce rank (N<=4) Optimized, same as e.g. cspan_slice(Span2, &ms4, {x}, {y}, {c_ALL}, {c_ALL}); -#define cspan_submd4(...) c_MACRO_OVERLOAD(cspan_submd4, __VA_ARGS__) -#define cspan_submd3(...) c_MACRO_OVERLOAD(cspan_submd3, __VA_ARGS__) + +// cspan_submd(): Reduce rank (N <= 4) Optimized, same as e.g. cspan_slice(Span2, &ms4, {x}, {y}, {c_ALL}, {c_ALL}); #define cspan_submd2(self, x) \ {.data=cspan_at(self, x, 0), .shape={(self)->shape[1]}, .stride={.d={(self)->stride.d[1]}}} +#define cspan_submd3(...) c_MACRO_OVERLOAD(cspan_submd3, __VA_ARGS__) #define cspan_submd3_2(self, x) \ {.data=cspan_at(self, x, 0, 0), .shape={(self)->shape[1], (self)->shape[2]}, \ .stride={.d={(self)->stride.d[1], (self)->stride.d[2]}}} #define cspan_submd3_3(self, x, y) \ {.data=cspan_at(self, x, y, 0), .shape={(self)->shape[2]}, .stride={.d={(self)->stride.d[2]}}} +#define cspan_submd4(...) c_MACRO_OVERLOAD(cspan_submd4, __VA_ARGS__) #define cspan_submd4_2(self, x) \ {.data=cspan_at(self, x, 0, 0, 0), .shape={(self)->shape[1], (self)->shape[2], (self)->shape[3]}, \ .stride={.d={(self)->stride.d[1], (self)->stride.d[2], (self)->stride.d[3]}}} @@ -185,7 +165,33 @@ typedef struct { int32_t d[6]; } cspan_tuple6; #define cspan_submd4_4(self, x, y, z) \ {.data=cspan_at(self, x, y, z, 0), .shape={(self)->shape[3]}, .stride={.d={(self)->stride.d[3]}}} -// private definitions: + +#define cspan_md(order, array, ...) \ + {.data=array, .shape={__VA_ARGS__}, \ + .stride=*(c_PASTE(cspan_tuple, c_NUMARGS(__VA_ARGS__))*)_cspan_shape2stride(order, ((int32_t[]){__VA_ARGS__}), c_NUMARGS(__VA_ARGS__))} + +#define cspan_transpose(self) \ + _cspan_transpose((self)->shape, (self)->stride.d, cspan_rank(self)) + + +// General slicing function; +#define cspan_slice(OutSpan, parent, ...) \ + OutSpan##_slice_((parent)->data, (parent)->shape, (parent)->stride.d, cspan_rank(parent) + \ + c_static_assert(cspan_rank(parent) == sizeof((int32_t[][2]){__VA_ARGS__})/sizeof(int32_t[2])), \ + (const int32_t[][2]){__VA_ARGS__}) + +// ----------- private definitions ------------ + +// cspan_index() helpers: +#define cspan_idx_1 cspan_idx_3 +#define cspan_idx_2 cspan_idx_3 +#define cspan_idx_3(self, ...) \ + c_PASTE(_cspan_idx, c_NUMARGS(__VA_ARGS__))((self)->shape, (self)->stride, __VA_ARGS__) // small/fast +#define cspan_idx_4(self, ...) \ + (_cspan_idxN(c_NUMARGS(__VA_ARGS__), (self)->shape, (self)->stride.d, (int32_t[]){__VA_ARGS__}) + \ + c_static_assert(cspan_rank(self) == c_NUMARGS(__VA_ARGS__))) // general +#define cspan_idx_5 cspan_idx_4 +#define cspan_idx_6 cspan_idx_4 STC_INLINE intptr_t _cspan_size(const int32_t shape[], int rank) { intptr_t sz = shape[0]; diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c index 6ca7425d..e724bdbd 100644 --- a/misc/benchmarks/various/cspan_bench.c +++ b/misc/benchmarks/various/cspan_bench.c @@ -12,8 +12,8 @@ enum { nz = 64 }; int lx = 15, ly = 10, lz = 5; -int hx = 20, hy = 15, hz = 15; -intptr_t n = 1000000; +int hx = 30, hy = 15, hz = 15; +intptr_t n = 100000; // define the contents of two nx x ny x nz arrays in and out double Vout[nx * ny * nz]; @@ -49,10 +49,10 @@ static void TraditionalForLoop(intptr_t state) for (int s = 0; s < state; ++s) { for (int x = lx; x < hx; ++x) { for (int y = ly; y < hy; ++y) { - for (int z = lz; z < hz; ++z) - { - double d = Vin[nz*(ny*x + y) + z]; - Vout[nz*(ny*x + y) + z] += d; + for (int z = lz; z < hz; ++z) { + int i = nz*(ny*x + y) + z; + double d = Vin[i]; + Vout[i] += d; sum += d; } } @@ -64,13 +64,13 @@ static void TraditionalForLoop(intptr_t state) static void MDRanges_nested_loop(intptr_t state) { + clock_t t = clock(); MD3 r_in = cspan_md('C', Vin, nx, ny, nz); MD3 r_out = cspan_md('C', Vout, nx, ny, nz); r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; } - clock_t t = clock(); double sum = 0; for (intptr_t s = 0; s < state; ++s) { diff --git a/misc/benchmarks/various/string_bench_STD.cpp b/misc/benchmarks/various/string_bench_STD.cpp index 8bb87937..07934948 100644 --- a/misc/benchmarks/various/string_bench_STD.cpp +++ b/misc/benchmarks/various/string_bench_STD.cpp @@ -12,6 +12,7 @@ #include <unordered_map> #define i_static #include <stc/cstr.h> +#include <stc/algo/raii.h> std::vector<std::string> read_file(const char* name) { diff --git a/misc/examples/multidim.c b/misc/examples/multidim.c index df8f485d..43c21443 100644 --- a/misc/examples/multidim.c +++ b/misc/examples/multidim.c @@ -28,7 +28,6 @@ int main() } puts("ss3 = ms3[:, 1:3, 1:3]"); ispan3 ss3 = ms3; - //cspan_slice(&ss3, {c_ALL}, {1,3}, {1,3}); ss3 = cspan_slice(ispan3, &ms3, {c_ALL}, {1,3}, {1,3}); for (int i=0; i != ss3.shape[0]; i++) { |
