diff options
| author | _Tradam <[email protected]> | 2023-09-08 01:29:47 +0000 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-09-08 01:29:47 +0000 |
| commit | 3c76c7f3d5db3f9586a90d03f8fbb02d79de9acd (patch) | |
| tree | afbe4b540967223911f7c5de36559b82154f02f3 /misc/benchmarks/various/cspan_bench.c | |
| parent | 0841165881871ee01b782129be681209aeed2423 (diff) | |
| parent | 1a72205fe05c2375cfd380dd8381a8460d9ed8d1 (diff) | |
| download | STC-modified-modified.tar.gz STC-modified-modified.zip | |
Diffstat (limited to 'misc/benchmarks/various/cspan_bench.c')
| -rw-r--r-- | misc/benchmarks/various/cspan_bench.c | 97 |
1 files changed, 37 insertions, 60 deletions
diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c index 589df13a..bfc0ead3 100644 --- a/misc/benchmarks/various/cspan_bench.c +++ b/misc/benchmarks/various/cspan_bench.c @@ -1,4 +1,5 @@ -#define STC_NDEBUG +// ref: https://stackoverflow.com/questions/74382366/why-is-iterating-over-stdrangesviewsjoin-so-slow +#define NDEBUG #include <stc/cspan.h> #include <stdio.h> #include <time.h> @@ -11,9 +12,9 @@ enum { ny = 64, nz = 64 }; +// subspan 15x5x10: int lx = 15, ly = 10, lz = 5; -int hx = 20, hy = 15, hz = 15; -intptr_t n = 1000000; +int hx = 30, hy = 15, hz = 15; // define the contents of two nx x ny x nz arrays in and out double Vout[nx * ny * nz]; @@ -21,36 +22,15 @@ double Vin[nx * ny * nz]; //, 1.23; // define some slice indices for each dimension -static void MDRanges_setup(intptr_t state) -{ - double sum = 0; - clock_t t = clock(); - - for (intptr_t s = 0; s < state; ++s) - { - MD3 r_in = cspan_md(Vin, nx, ny, nz); - MD3 r_out = cspan_md(Vout, nx, ny, nz); - - r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); - r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); - MD3_iter i = MD3_begin(&r_in); // can be iterated "flat". - MD3_iter o = MD3_begin(&r_out); - sum += Vin[s % nx]; - } - t = clock() - t; - printf("setup: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); -} - -static void TraditionalForLoop(intptr_t state) +static void Traditional_for_loop(intptr_t n) { clock_t t = clock(); double sum = 0; - for (int s = 0; s < state; ++s) { + for (int s = 0; s < n; ++s) { for (int x = lx; x < hx; ++x) { for (int y = ly; y < hy; ++y) { - for (int z = lz; z < hz; ++z) - { + for (int z = lz; z < hz; ++z) { double d = Vin[nz*(ny*x + y) + z]; Vout[nz*(ny*x + y) + z] += d; sum += d; @@ -59,68 +39,65 @@ static void TraditionalForLoop(intptr_t state) } } t = clock() - t; - printf("forloop: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); + printf("forloop : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -static void MDRanges_nested_loop(intptr_t state) +static void MDRanges_loop_over_joined(intptr_t n) { + clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); MD3 r_out = cspan_md(Vout, nx, ny, nz); r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); - - // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; } - clock_t t = clock(); double sum = 0; - for (intptr_t s = 0; s < state; ++s) { - for (int x = 0; x < r_in.shape[0]; ++x) { - for (int y = 0; y < r_in.shape[1]; ++y) { - for (int z = 0; z < r_in.shape[2]; ++z) - { - double d = *cspan_at(&r_in, x, y, z); - *cspan_at(&r_out, x, y, z) += d; - sum += d; - } - } + for (intptr_t s = 0; s < n; ++s) { + MD3_iter i = MD3_begin(&r_in); + MD3_iter o = MD3_begin(&r_out); + + for (; i.ref; MD3_next(&i), MD3_next(&o)) + { + *o.ref += *i.ref; + sum += *i.ref; } } t = clock() - t; - printf("nested: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); + printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -static void MDRanges_loop_over_joined(intptr_t state) +static void MDRanges_nested_loop(intptr_t n) { + clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); MD3 r_out = cspan_md(Vout, nx, ny, nz); r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); - - // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; } double sum = 0; - clock_t t = clock(); - - for (intptr_t s = 0; s < state; ++s) { - MD3_iter i = MD3_begin(&r_in); - MD3_iter o = MD3_begin(&r_out); - for (; i.ref; MD3_next(&i), MD3_next(&o)) - { - *o.ref += *i.ref; - sum += *i.ref; + for (intptr_t s = 0; s < n; ++s) { + for (int x = 0; x < r_in.shape[0]; ++x) { + for (int y = 0; y < r_in.shape[1]; ++y) { + for (int z = 0; z < r_in.shape[2]; ++z) + { + double d = *cspan_at(&r_in, x,y,z); + *cspan_at(&r_out, x,y,z) += d; + sum += d; + } + } } } t = clock() - t; - printf("joined: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); + printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -int main() + +int main(void) { + intptr_t n = 100000; for (int i = 0; i < nx * ny * nz; ++i) Vin[i] = i + 1.23; - MDRanges_setup(n); - TraditionalForLoop(n); - MDRanges_nested_loop(n); + Traditional_for_loop(n); MDRanges_loop_over_joined(n); + MDRanges_nested_loop(n); } |
