1 files changed, 10 insertions, 33 deletions
diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c
index b5caca83..3b1c3132 100644
--- a/misc/benchmarks/various/cspan_bench.c
+++ b/misc/benchmarks/various/cspan_bench.c
@@ -1,3 +1,4 @@
+// ref: https://stackoverflow.com/questions/74382366/why-is-iterating-over-stdrangesviewsjoin-so-slow
 #define NDEBUG
 #include <stc/cspan.h>
 #include <stdio.h>
@@ -11,6 +12,7 @@ enum {
     ny = 64,
     nz = 64
 };
+// subspan 15x5x10:
 int lx = 15, ly = 10, lz = 5;
 int hx = 30, hy = 15, hz = 15;
 
@@ -20,27 +22,7 @@ double Vin[nx * ny * nz]; //, 1.23;
 
 // define some slice indices for each dimension
 
-static void MDRanges_setup(intptr_t n)
-{
-    double sum = 0;
-    clock_t t = clock();
-
-    for (intptr_t s = 0; s < n; ++s)
-    {
-        MD3 r_in = cspan_md(Vin, nx, ny, nz);
-        MD3 r_out = cspan_md(Vout, nx, ny, nz);
-
-        r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
-        r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});
-        MD3_iter i = MD3_begin(&r_in); // can be iterated "flat".
-        MD3_iter o = MD3_begin(&r_out);
-        sum += Vin[s % nx];
-    }
-    t = clock() - t;
-    printf("setup: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
-}
-
-static void TraditionalForLoop(intptr_t n)
+static void Traditional_for_loop(intptr_t n)
 {
     clock_t t = clock();
     double sum = 0;
@@ -57,7 +39,7 @@ static void TraditionalForLoop(intptr_t n)
         }
     }
     t = clock() - t;
-    printf("forloop: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
+    printf("forloop : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
 }
 
 static void MDRanges_nested_loop(intptr_t n)
@@ -67,8 +49,6 @@ static void MDRanges_nested_loop(intptr_t n)
     MD3 r_out = cspan_md(Vout, nx, ny, nz);
     r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
     r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});
-
-    // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; }
     double sum = 0;
 
     for (intptr_t s = 0; s < n; ++s) {
@@ -76,27 +56,25 @@ static void MDRanges_nested_loop(intptr_t n)
             for (int y = 0; y < r_in.shape[1]; ++y) {
                 for (int z = 0; z < r_in.shape[2]; ++z)
                 {
-                    double d = *cspan_at(&r_in, x, y, z);
-                    *cspan_at(&r_out, x, y, z) += d;
+                    double d = *cspan_at(&r_in, x,y,z);
+                    *cspan_at(&r_out, x,y,z) += d;
                     sum += d;
                 }
             }
         }
     }
     t = clock() - t;
-    printf("nested: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
+    printf("nested  : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
 }
 
 static void MDRanges_loop_over_joined(intptr_t n)
 {
+    clock_t t = clock();
     MD3 r_in = cspan_md(Vin, nx, ny, nz);
     MD3 r_out = cspan_md(Vout, nx, ny, nz);
     r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
     r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});
-
-    // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; }
     double sum = 0;
-    clock_t t = clock();
 
     for (intptr_t s = 0; s < n; ++s) {
         MD3_iter i = MD3_begin(&r_in);
@@ -109,7 +87,7 @@ static void MDRanges_loop_over_joined(intptr_t n)
         }
     }
     t = clock() - t;
-    printf("joined: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
+    printf("joined  : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
 }
 
 int main(void)
@@ -118,8 +96,7 @@ int main(void)
     for (int i = 0; i < nx * ny * nz; ++i)
         Vin[i] = i + 1.23;
 
-    MDRanges_setup(n);
-    TraditionalForLoop(n);
+    Traditional_for_loop(n);
     MDRanges_nested_loop(n);
     MDRanges_loop_over_joined(n);
 }