diff options
| author | tylov <[email protected]> | 2023-07-16 09:07:01 +0200 |
|---|---|---|
| committer | tylov <[email protected]> | 2023-07-16 11:15:24 +0200 |
| commit | 23eeedb3fc298602732f394adba6a43c876ca7d8 (patch) | |
| tree | 5f19fbe97631868f8eec22f73ec8899c252efe9d | |
| parent | e9121702a5d69624ef1e782e85a8f032e4f4e875 (diff) | |
| download | STC-modified-23eeedb3fc298602732f394adba6a43c876ca7d8.tar.gz STC-modified-23eeedb3fc298602732f394adba6a43c876ca7d8.zip | |
Moved _cspan_next2() to header section in cspan.h to allow optimizations.
| -rw-r--r-- | include/stc/cspan.h | 22 | ||||
| -rw-r--r-- | misc/benchmarks/various/cspan_bench.c | 18 |
2 files changed, 19 insertions, 21 deletions
diff --git a/include/stc/cspan.h b/include/stc/cspan.h index 582e1004..dcb02961 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -114,7 +114,7 @@ using_cspan_tuple(5); using_cspan_tuple(6); using_cspan_tuple(7); using_cspan_tuple(8); #define c_END -1 -#define c_ALL 0,-1 +#define c_ALL 0,c_END /* Use cspan_init() for static initialization only. c_init() for non-static init. */ #define cspan_init(SpanType, ...) \ @@ -221,8 +221,16 @@ STC_INLINE intptr_t _cspan_idxN(int rank, const int32_t shape[], const int32_t s return off; } +STC_INLINE intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc) { + intptr_t off = stride[i]; + ++pos[i]; + for (; --rank && pos[i] == shape[i]; i += inc) { + pos[i] = 0; ++pos[i + inc]; + off += stride[i + inc] - stride[i]*shape[i]; + } + return off; +} #define _cspan_next1(pos, shape, stride, rank, i, inc) (++pos[0], stride[0]) -STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc); #define _cspan_next3 _cspan_next2 #define _cspan_next4 _cspan_next2 #define _cspan_next5 _cspan_next2 @@ -254,16 +262,6 @@ STC_DEF int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank) { return shape; } -STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc) { - intptr_t off = stride[i]; - ++pos[i]; - for (; --rank && pos[i] == shape[i]; i += inc) { - pos[i] = 0; ++pos[i + inc]; - off += stride[i + inc] - stride[i]*shape[i]; - } - return off; -} - STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank, const int32_t shape[], const int32_t stride[], int rank, const int32_t a[][2]) { diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c index e3997ff0..f4b067f8 100644 --- a/misc/benchmarks/various/cspan_bench.c +++ b/misc/benchmarks/various/cspan_bench.c @@ -13,7 +13,6 @@ enum { }; int lx = 15, ly = 10, lz = 5; int hx = 30, hy = 15, hz = 15; -intptr_t n = 100000; // define the contents of two nx x ny x nz arrays in and out double Vout[nx * ny * nz]; @@ -21,12 +20,12 @@ double Vin[nx * ny * nz]; //, 1.23; // define some slice indices for each dimension -static void MDRanges_setup(intptr_t state) +static void MDRanges_setup(intptr_t n) { double sum = 0; clock_t t = clock(); - for (intptr_t s = 0; s < state; ++s) + for (intptr_t s = 0; s < n; ++s) { MD3 r_in = cspan_md(Vin, nx, ny, nz); MD3 r_out = cspan_md(Vout, nx, ny, nz); @@ -41,12 +40,12 @@ static void MDRanges_setup(intptr_t state) printf("setup: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); } -static void TraditionalForLoop(intptr_t state) +static void TraditionalForLoop(intptr_t n) { clock_t t = clock(); double sum = 0; - for (int s = 0; s < state; ++s) { + for (int s = 0; s < n; ++s) { for (int x = lx; x < hx; ++x) { for (int y = ly; y < hy; ++y) { for (int z = lz; z < hz; ++z) { @@ -62,7 +61,7 @@ static void TraditionalForLoop(intptr_t state) printf("forloop: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); } -static void MDRanges_nested_loop(intptr_t state) +static void MDRanges_nested_loop(intptr_t n) { clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); @@ -73,7 +72,7 @@ static void MDRanges_nested_loop(intptr_t state) // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; } double sum = 0; - for (intptr_t s = 0; s < state; ++s) { + for (intptr_t s = 0; s < n; ++s) { for (int x = 0; x < r_in.shape[0]; ++x) { for (int y = 0; y < r_in.shape[1]; ++y) { for (int z = 0; z < r_in.shape[2]; ++z) @@ -89,7 +88,7 @@ static void MDRanges_nested_loop(intptr_t state) printf("nested: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); } -static void MDRanges_loop_over_joined(intptr_t state) +static void MDRanges_loop_over_joined(intptr_t n) { MD3 r_in = cspan_md(Vin, nx, ny, nz); MD3 r_out = cspan_md(Vout, nx, ny, nz); @@ -100,7 +99,7 @@ static void MDRanges_loop_over_joined(intptr_t state) double sum = 0; clock_t t = clock(); - for (intptr_t s = 0; s < state; ++s) { + for (intptr_t s = 0; s < n; ++s) { MD3_iter i = MD3_begin(&r_in); MD3_iter o = MD3_begin(&r_out); @@ -116,6 +115,7 @@ static void MDRanges_loop_over_joined(intptr_t state) int main(void) { + intptr_t n = 100000; for (int i = 0; i < nx * ny * nz; ++i) Vin[i] = i + 1.23; |
