summaryrefslogtreecommitdiffhomepage
path: root/misc/benchmarks/various/cspan_bench.c
diff options
context:
space:
mode:
author_Tradam <[email protected]>2023-09-08 01:29:47 +0000
committerGitHub <[email protected]>2023-09-08 01:29:47 +0000
commit3c76c7f3d5db3f9586a90d03f8fbb02d79de9acd (patch)
treeafbe4b540967223911f7c5de36559b82154f02f3 /misc/benchmarks/various/cspan_bench.c
parent0841165881871ee01b782129be681209aeed2423 (diff)
parent1a72205fe05c2375cfd380dd8381a8460d9ed8d1 (diff)
downloadSTC-modified-modified.tar.gz
STC-modified-modified.zip
Merge branch 'stclib:master' into modifiedHEADmodified
Diffstat (limited to 'misc/benchmarks/various/cspan_bench.c')
-rw-r--r--misc/benchmarks/various/cspan_bench.c97
1 files changed, 37 insertions, 60 deletions
diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c
index 589df13a..bfc0ead3 100644
--- a/misc/benchmarks/various/cspan_bench.c
+++ b/misc/benchmarks/various/cspan_bench.c
@@ -1,4 +1,5 @@
-#define STC_NDEBUG
+// ref: https://stackoverflow.com/questions/74382366/why-is-iterating-over-stdrangesviewsjoin-so-slow
+#define NDEBUG
#include <stc/cspan.h>
#include <stdio.h>
#include <time.h>
@@ -11,9 +12,9 @@ enum {
ny = 64,
nz = 64
};
+// subspan 15x5x10:
int lx = 15, ly = 10, lz = 5;
-int hx = 20, hy = 15, hz = 15;
-intptr_t n = 1000000;
+int hx = 30, hy = 15, hz = 15;
// define the contents of two nx x ny x nz arrays in and out
double Vout[nx * ny * nz];
@@ -21,36 +22,15 @@ double Vin[nx * ny * nz]; //, 1.23;
// define some slice indices for each dimension
-static void MDRanges_setup(intptr_t state)
-{
- double sum = 0;
- clock_t t = clock();
-
- for (intptr_t s = 0; s < state; ++s)
- {
- MD3 r_in = cspan_md(Vin, nx, ny, nz);
- MD3 r_out = cspan_md(Vout, nx, ny, nz);
-
- r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
- r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});
- MD3_iter i = MD3_begin(&r_in); // can be iterated "flat".
- MD3_iter o = MD3_begin(&r_out);
- sum += Vin[s % nx];
- }
- t = clock() - t;
- printf("setup: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
-}
-
-static void TraditionalForLoop(intptr_t state)
+static void Traditional_for_loop(intptr_t n)
{
clock_t t = clock();
double sum = 0;
- for (int s = 0; s < state; ++s) {
+ for (int s = 0; s < n; ++s) {
for (int x = lx; x < hx; ++x) {
for (int y = ly; y < hy; ++y) {
- for (int z = lz; z < hz; ++z)
- {
+ for (int z = lz; z < hz; ++z) {
double d = Vin[nz*(ny*x + y) + z];
Vout[nz*(ny*x + y) + z] += d;
sum += d;
@@ -59,68 +39,65 @@ static void TraditionalForLoop(intptr_t state)
}
}
t = clock() - t;
- printf("forloop: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
+ printf("forloop : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
}
-static void MDRanges_nested_loop(intptr_t state)
+static void MDRanges_loop_over_joined(intptr_t n)
{
+ clock_t t = clock();
MD3 r_in = cspan_md(Vin, nx, ny, nz);
MD3 r_out = cspan_md(Vout, nx, ny, nz);
r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});
-
- // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; }
- clock_t t = clock();
double sum = 0;
- for (intptr_t s = 0; s < state; ++s) {
- for (int x = 0; x < r_in.shape[0]; ++x) {
- for (int y = 0; y < r_in.shape[1]; ++y) {
- for (int z = 0; z < r_in.shape[2]; ++z)
- {
- double d = *cspan_at(&r_in, x, y, z);
- *cspan_at(&r_out, x, y, z) += d;
- sum += d;
- }
- }
+ for (intptr_t s = 0; s < n; ++s) {
+ MD3_iter i = MD3_begin(&r_in);
+ MD3_iter o = MD3_begin(&r_out);
+
+ for (; i.ref; MD3_next(&i), MD3_next(&o))
+ {
+ *o.ref += *i.ref;
+ sum += *i.ref;
}
}
t = clock() - t;
- printf("nested: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
+ printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
}
-static void MDRanges_loop_over_joined(intptr_t state)
+static void MDRanges_nested_loop(intptr_t n)
{
+ clock_t t = clock();
MD3 r_in = cspan_md(Vin, nx, ny, nz);
MD3 r_out = cspan_md(Vout, nx, ny, nz);
r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});
-
- // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; }
double sum = 0;
- clock_t t = clock();
-
- for (intptr_t s = 0; s < state; ++s) {
- MD3_iter i = MD3_begin(&r_in);
- MD3_iter o = MD3_begin(&r_out);
- for (; i.ref; MD3_next(&i), MD3_next(&o))
- {
- *o.ref += *i.ref;
- sum += *i.ref;
+ for (intptr_t s = 0; s < n; ++s) {
+ for (int x = 0; x < r_in.shape[0]; ++x) {
+ for (int y = 0; y < r_in.shape[1]; ++y) {
+ for (int z = 0; z < r_in.shape[2]; ++z)
+ {
+ double d = *cspan_at(&r_in, x,y,z);
+ *cspan_at(&r_out, x,y,z) += d;
+ sum += d;
+ }
+ }
}
}
t = clock() - t;
- printf("joined: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
+ printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
}
-int main()
+
+int main(void)
{
+ intptr_t n = 100000;
for (int i = 0; i < nx * ny * nz; ++i)
Vin[i] = i + 1.23;
- MDRanges_setup(n);
- TraditionalForLoop(n);
- MDRanges_nested_loop(n);
+ Traditional_for_loop(n);
MDRanges_loop_over_joined(n);
+ MDRanges_nested_loop(n);
}