summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authortylov <[email protected]>2023-07-11 16:36:55 +0200
committertylov <[email protected]>2023-07-11 16:36:55 +0200
commitafc968975a057f5b2653e3cfa51ef2eff83a8d5b (patch)
treef0100b3ae35cd2a77a9648812e6e2ce198676f47
parent8debe47bc014c41b6cf8082dcef4b87e4ef29cfa (diff)
downloadSTC-modified-afc968975a057f5b2653e3cfa51ef2eff83a8d5b.tar.gz
STC-modified-afc968975a057f5b2653e3cfa51ef2eff83a8d5b.zip
Internal updates and doc reorg.
-rw-r--r--docs/cspan_api.md41
-rw-r--r--include/stc/cspan.h72
-rw-r--r--misc/benchmarks/various/cspan_bench.c14
-rw-r--r--misc/benchmarks/various/string_bench_STD.cpp1
-rw-r--r--misc/examples/multidim.c1
5 files changed, 67 insertions, 62 deletions
diff --git a/docs/cspan_api.md b/docs/cspan_api.md
index 58b06af0..e2636086 100644
--- a/docs/cspan_api.md
+++ b/docs/cspan_api.md
@@ -29,7 +29,11 @@ a compile error is issued. Runtime bounds checks are enabled by default (define
SpanType cspan_init(T SpanType, {v1, v2, ...}); // make a 1-d cspan from values
SpanType cspan_from(STCContainer* cnt); // make a 1-d cspan from compatible STC container
SpanType cspan_from_array(ValueType array[]); // make a 1-d cspan from C array
-SpanTypeN cspan_md(char order, ValueType* data, d1, d2, ...); // make a multi-dim cspan. order: 'C' or 'F' (Fortran)
+
+ // make a subspan of input span rank. Like e.g. cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL});
+SpanType cspan_subspan(const SpanType* span, intptr_t offset, intptr_t count);
+SpanType2 cspan_subspan2(const SpanType2* span, intptr_t offset, intptr_t count);
+SpanType3 cspan_subspan3(const SpanType3* span, intptr_t offset, intptr_t count);
intptr_t cspan_size(const SpanTypeN* self); // return number of elements
intptr_t cspan_rank(const SpanTypeN* self); // dimensions; compile time constant
@@ -39,35 +43,30 @@ ValueType* cspan_at(const SpanTypeN* self, intptr_t x, ...); // #args mus
ValueType* cspan_front(const SpanTypeN* self);
ValueType* cspan_back(const SpanTypeN* self);
- // general index slicing to create a subspan.
- // {i} reduces rank. {i,c_END} slice to end. {c_ALL} use the full extent.
-SpanTypeR cspan_slice(T SpanTypeR, const SpanTypeN* self, {x0,x1}, {y0,y1}.., {N0,N1});
+SpanTypeN_iter SpanType_begin(const SpanTypeN* self);
+SpanTypeN_iter SpanType_end(const SpanTypeN* self);
+void SpanType_next(SpanTypeN_iter* it);
+SpanTypeN cspan_md(char order, ValueType* data, d1, d2, ...); // make a multi-dim cspan. order: 'C' or 'F' (Fortran)
// transpose the md span (inverse axes). no changes to the underlying array.
void cspan_transpose(const SpanTypeN* self);
-
- // create a subspan of lower rank. Like e.g. cspan_slice(Span2, &ms4, {x}, {y}, {c_ALL}, {c_ALL});
-SpanType cspan_submd2(const SpanType2* self, intptr_t x); // return a 1d subspan from a 2d span.
-SpanTypeN cspan_submd3(const SpanType3* self, intptr_t x, ...); // return a 1d or 2d subspan from a 3d span.
-SpanTypeN cspan_submd4(const SpanType4* self, intptr_t x, ...); // number of args determines rank of output span.
- // create a subspan of same rank. Like e.g. cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL});
-SpanType cspan_subspan(const SpanType* self, intptr_t offset, intptr_t count);
-SpanType2 cspan_subspan2(const SpanType2* self, intptr_t offset, intptr_t count);
-SpanType3 cspan_subspan3(const SpanType3* self, intptr_t offset, intptr_t count);
+ // create a sub md span of lower rank. Like e.g. cspan_slice(Span2, &ms4, {x}, {y}, {c_ALL}, {c_ALL});
+OutSpan1 cspan_submd2(const SpanType2* parent, intptr_t x); // return a 1d subspan from a 2d span.
+OutSpanN cspan_submd3(const SpanType3* parent, intptr_t x, ...); // return a 1d or 2d subspan from a 3d span.
+OutSpanN cspan_submd4(const SpanType4* parent, intptr_t x, ...); // number of args decides rank of output span.
-SpanTypeN_iter SpanType_begin(const SpanTypeN* self);
-SpanTypeN_iter SpanType_end(const SpanTypeN* self);
-void SpanType_next(SpanTypeN_iter* it);
+ // general slicing of an md span.
+ // {i}: reduce rank. {i,c_END}: slice to end. {c_ALL}: use full extent.
+OutSpanN cspan_slice(TYPE OutSpanN, const SpanTypeN* parent, {x0,x1}, {y0,y1}.., {N0,N1});
```
-## Types
-
-| Type name | Type definition | Used to represent... |
+## TypesPd
+| Type name | Type definition / usage | Used to represent... |
|:------------------|:----------------------------------------------------|:---------------------|
| SpanTypeN | `struct { ValueType *data; uint32_t shape[N]; .. }` | SpanType with rank N |
| SpanTypeN`_value` | `ValueType` | The ValueType |
-| `c_ALL` | | Full extent |
-| `c_END` | | End of extent |
+| `c_ALL` | Use with `cspan_slice()`. | Full extent |
+| `c_END` | " | End of extent |
## Example 1
diff --git a/include/stc/cspan.h b/include/stc/cspan.h
index 7a0e9c8d..027b5275 100644
--- a/include/stc/cspan.h
+++ b/include/stc/cspan.h
@@ -107,58 +107,37 @@ int demo2() {
#define using_cspan2(Self, T) using_cspan_3(Self, T, 1); using_cspan_3(Self##2, T, 2)
#define using_cspan3(Self, T) using_cspan2(Self, T); using_cspan_3(Self##3, T, 3)
#define using_cspan4(Self, T) using_cspan3(Self, T); using_cspan_3(Self##4, T, 4)
-typedef struct { int32_t d[1]; } cspan_tuple1;
-typedef struct { int32_t d[2]; } cspan_tuple2;
-typedef struct { int32_t d[3]; } cspan_tuple3;
-typedef struct { int32_t d[4]; } cspan_tuple4;
-typedef struct { int32_t d[5]; } cspan_tuple5;
-typedef struct { int32_t d[6]; } cspan_tuple6;
+#define using_cspan_tuple(N) typedef struct { int32_t d[N]; } cspan_tuple##N
+using_cspan_tuple(1); using_cspan_tuple(2);
+using_cspan_tuple(3); using_cspan_tuple(4);
+using_cspan_tuple(5); using_cspan_tuple(6);
+using_cspan_tuple(7); using_cspan_tuple(8);
+
#define c_END -1
#define c_ALL 0,-1
-#define cspan_md(order, array, ...) \
- {.data=array, .shape={__VA_ARGS__}, \
- .stride=*(c_PASTE(cspan_tuple, c_NUMARGS(__VA_ARGS__))*)_cspan_shape2stride(order, ((int32_t[]){__VA_ARGS__}), c_NUMARGS(__VA_ARGS__))}
-
-#define cspan_transpose(self) \
- _cspan_transpose((self)->shape, (self)->stride.d, cspan_rank(self))
-
/* Use cspan_init() for static initialization only. c_init() for non-static init. */
#define cspan_init(SpanType, ...) \
{.data=(SpanType##_value[])__VA_ARGS__, .shape={sizeof((SpanType##_value[])__VA_ARGS__)/sizeof(SpanType##_value)}, .stride={.d={1}}}
-#define cspan_slice(OutSpan, parent, ...) \
- OutSpan##_slice_((parent)->data, (parent)->shape, (parent)->stride.d, cspan_rank(parent) + \
- c_static_assert(cspan_rank(parent) == sizeof((int32_t[][2]){__VA_ARGS__})/sizeof(int32_t[2])), \
- (const int32_t[][2]){__VA_ARGS__})
-
/* create a cspan from a cvec, cstack, cdeq, cqueue, or cpque (heap) */
#define cspan_from(container) \
{.data=(container)->data, .shape={(int32_t)(container)->_len}, .stride={.d={1}}}
#define cspan_from_array(array) \
- {.data=(array) + c_static_assert(sizeof(array) != sizeof(void*)), .shape={c_arraylen(array)}, .stride={.d={1}}}
+ {.data=(array), .shape={c_arraylen(array)}, .stride={.d={1}}}
#define cspan_size(self) _cspan_size((self)->shape, cspan_rank(self))
#define cspan_rank(self) c_arraylen((self)->shape)
#define cspan_is_order_F(self) ((self)->stride.d[0] < (self)->stride.d[cspan_rank(self) - 1])
#define cspan_index(self, ...) c_PASTE(cspan_idx_, c_NUMARGS(__VA_ARGS__))(self, __VA_ARGS__)
-#define cspan_idx_1 cspan_idx_3
-#define cspan_idx_2 cspan_idx_3
-#define cspan_idx_3(self, ...) \
- c_PASTE(_cspan_idx, c_NUMARGS(__VA_ARGS__))((self)->shape, (self)->stride, __VA_ARGS__) // small/fast
-#define cspan_idx_4(self, ...) \
- (_cspan_idxN(c_NUMARGS(__VA_ARGS__), (self)->shape, (self)->stride.d, (int32_t[]){__VA_ARGS__}) + \
- c_static_assert(cspan_rank(self) == c_NUMARGS(__VA_ARGS__))) // general
-#define cspan_idx_5 cspan_idx_4
-#define cspan_idx_6 cspan_idx_4
#define cspan_at(self, ...) ((self)->data + cspan_index(self, __VA_ARGS__))
#define cspan_front(self) ((self)->data)
#define cspan_back(self) ((self)->data + cspan_size(self) - 1)
-// cspan_subspanN. (N<=3) Optimized, same as e.g. cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL});
+// cspan_subspanX: (X <= 3) optimized. Similar to cspan_slice(Span3, &ms3, {off,off+count}, {c_ALL}, {c_ALL});
#define cspan_subspan(self, offset, count) \
{.data=cspan_at(self, offset), .shape={count}, .stride=(self)->stride}
#define cspan_subspan2(self, offset, count) \
@@ -166,16 +145,17 @@ typedef struct { int32_t d[6]; } cspan_tuple6;
#define cspan_subspan3(self, offset, count) \
{.data=cspan_at(self, offset, 0, 0), .shape={count, (self)->shape[1], (self)->shape[2]}, .stride=(self)->stride}
-// cspan_submdN: reduce rank (N<=4) Optimized, same as e.g. cspan_slice(Span2, &ms4, {x}, {y}, {c_ALL}, {c_ALL});
-#define cspan_submd4(...) c_MACRO_OVERLOAD(cspan_submd4, __VA_ARGS__)
-#define cspan_submd3(...) c_MACRO_OVERLOAD(cspan_submd3, __VA_ARGS__)
+
+// cspan_submd(): Reduce rank (N <= 4) Optimized, same as e.g. cspan_slice(Span2, &ms4, {x}, {y}, {c_ALL}, {c_ALL});
#define cspan_submd2(self, x) \
{.data=cspan_at(self, x, 0), .shape={(self)->shape[1]}, .stride={.d={(self)->stride.d[1]}}}
+#define cspan_submd3(...) c_MACRO_OVERLOAD(cspan_submd3, __VA_ARGS__)
#define cspan_submd3_2(self, x) \
{.data=cspan_at(self, x, 0, 0), .shape={(self)->shape[1], (self)->shape[2]}, \
.stride={.d={(self)->stride.d[1], (self)->stride.d[2]}}}
#define cspan_submd3_3(self, x, y) \
{.data=cspan_at(self, x, y, 0), .shape={(self)->shape[2]}, .stride={.d={(self)->stride.d[2]}}}
+#define cspan_submd4(...) c_MACRO_OVERLOAD(cspan_submd4, __VA_ARGS__)
#define cspan_submd4_2(self, x) \
{.data=cspan_at(self, x, 0, 0, 0), .shape={(self)->shape[1], (self)->shape[2], (self)->shape[3]}, \
.stride={.d={(self)->stride.d[1], (self)->stride.d[2], (self)->stride.d[3]}}}
@@ -185,7 +165,33 @@ typedef struct { int32_t d[6]; } cspan_tuple6;
#define cspan_submd4_4(self, x, y, z) \
{.data=cspan_at(self, x, y, z, 0), .shape={(self)->shape[3]}, .stride={.d={(self)->stride.d[3]}}}
-// private definitions:
+
+#define cspan_md(order, array, ...) \
+ {.data=array, .shape={__VA_ARGS__}, \
+ .stride=*(c_PASTE(cspan_tuple, c_NUMARGS(__VA_ARGS__))*)_cspan_shape2stride(order, ((int32_t[]){__VA_ARGS__}), c_NUMARGS(__VA_ARGS__))}
+
+#define cspan_transpose(self) \
+ _cspan_transpose((self)->shape, (self)->stride.d, cspan_rank(self))
+
+
+// General slicing function;
+#define cspan_slice(OutSpan, parent, ...) \
+ OutSpan##_slice_((parent)->data, (parent)->shape, (parent)->stride.d, cspan_rank(parent) + \
+ c_static_assert(cspan_rank(parent) == sizeof((int32_t[][2]){__VA_ARGS__})/sizeof(int32_t[2])), \
+ (const int32_t[][2]){__VA_ARGS__})
+
+// ----------- private definitions ------------
+
+// cspan_index() helpers:
+#define cspan_idx_1 cspan_idx_3
+#define cspan_idx_2 cspan_idx_3
+#define cspan_idx_3(self, ...) \
+ c_PASTE(_cspan_idx, c_NUMARGS(__VA_ARGS__))((self)->shape, (self)->stride, __VA_ARGS__) // small/fast
+#define cspan_idx_4(self, ...) \
+ (_cspan_idxN(c_NUMARGS(__VA_ARGS__), (self)->shape, (self)->stride.d, (int32_t[]){__VA_ARGS__}) + \
+ c_static_assert(cspan_rank(self) == c_NUMARGS(__VA_ARGS__))) // general
+#define cspan_idx_5 cspan_idx_4
+#define cspan_idx_6 cspan_idx_4
STC_INLINE intptr_t _cspan_size(const int32_t shape[], int rank) {
intptr_t sz = shape[0];
diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c
index 6ca7425d..e724bdbd 100644
--- a/misc/benchmarks/various/cspan_bench.c
+++ b/misc/benchmarks/various/cspan_bench.c
@@ -12,8 +12,8 @@ enum {
nz = 64
};
int lx = 15, ly = 10, lz = 5;
-int hx = 20, hy = 15, hz = 15;
-intptr_t n = 1000000;
+int hx = 30, hy = 15, hz = 15;
+intptr_t n = 100000;
// define the contents of two nx x ny x nz arrays in and out
double Vout[nx * ny * nz];
@@ -49,10 +49,10 @@ static void TraditionalForLoop(intptr_t state)
for (int s = 0; s < state; ++s) {
for (int x = lx; x < hx; ++x) {
for (int y = ly; y < hy; ++y) {
- for (int z = lz; z < hz; ++z)
- {
- double d = Vin[nz*(ny*x + y) + z];
- Vout[nz*(ny*x + y) + z] += d;
+ for (int z = lz; z < hz; ++z) {
+ int i = nz*(ny*x + y) + z;
+ double d = Vin[i];
+ Vout[i] += d;
sum += d;
}
}
@@ -64,13 +64,13 @@ static void TraditionalForLoop(intptr_t state)
static void MDRanges_nested_loop(intptr_t state)
{
+ clock_t t = clock();
MD3 r_in = cspan_md('C', Vin, nx, ny, nz);
MD3 r_out = cspan_md('C', Vout, nx, ny, nz);
r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});
// C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; }
- clock_t t = clock();
double sum = 0;
for (intptr_t s = 0; s < state; ++s) {
diff --git a/misc/benchmarks/various/string_bench_STD.cpp b/misc/benchmarks/various/string_bench_STD.cpp
index 8bb87937..07934948 100644
--- a/misc/benchmarks/various/string_bench_STD.cpp
+++ b/misc/benchmarks/various/string_bench_STD.cpp
@@ -12,6 +12,7 @@
#include <unordered_map>
#define i_static
#include <stc/cstr.h>
+#include <stc/algo/raii.h>
std::vector<std::string> read_file(const char* name)
{
diff --git a/misc/examples/multidim.c b/misc/examples/multidim.c
index df8f485d..43c21443 100644
--- a/misc/examples/multidim.c
+++ b/misc/examples/multidim.c
@@ -28,7 +28,6 @@ int main()
}
puts("ss3 = ms3[:, 1:3, 1:3]");
ispan3 ss3 = ms3;
- //cspan_slice(&ss3, {c_ALL}, {1,3}, {1,3});
ss3 = cspan_slice(ispan3, &ms3, {c_ALL}, {1,3}, {1,3});
for (int i=0; i != ss3.shape[0]; i++) {