summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--include/stc/cspan.h22
-rw-r--r--misc/benchmarks/various/cspan_bench.c18
2 files changed, 19 insertions, 21 deletions
diff --git a/include/stc/cspan.h b/include/stc/cspan.h
index 582e1004..dcb02961 100644
--- a/include/stc/cspan.h
+++ b/include/stc/cspan.h
@@ -114,7 +114,7 @@ using_cspan_tuple(5); using_cspan_tuple(6);
using_cspan_tuple(7); using_cspan_tuple(8);
#define c_END -1
-#define c_ALL 0,-1
+#define c_ALL 0,c_END
/* Use cspan_init() for static initialization only. c_init() for non-static init. */
#define cspan_init(SpanType, ...) \
@@ -221,8 +221,16 @@ STC_INLINE intptr_t _cspan_idxN(int rank, const int32_t shape[], const int32_t s
return off;
}
+STC_INLINE intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc) {
+ intptr_t off = stride[i];
+ ++pos[i];
+ for (; --rank && pos[i] == shape[i]; i += inc) {
+ pos[i] = 0; ++pos[i + inc];
+ off += stride[i + inc] - stride[i]*shape[i];
+ }
+ return off;
+}
#define _cspan_next1(pos, shape, stride, rank, i, inc) (++pos[0], stride[0])
-STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc);
#define _cspan_next3 _cspan_next2
#define _cspan_next4 _cspan_next2
#define _cspan_next5 _cspan_next2
@@ -254,16 +262,6 @@ STC_DEF int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank) {
return shape;
}
-STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc) {
- intptr_t off = stride[i];
- ++pos[i];
- for (; --rank && pos[i] == shape[i]; i += inc) {
- pos[i] = 0; ++pos[i + inc];
- off += stride[i + inc] - stride[i]*shape[i];
- }
- return off;
-}
-
STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank,
const int32_t shape[], const int32_t stride[],
int rank, const int32_t a[][2]) {
diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c
index e3997ff0..f4b067f8 100644
--- a/misc/benchmarks/various/cspan_bench.c
+++ b/misc/benchmarks/various/cspan_bench.c
@@ -13,7 +13,6 @@ enum {
};
int lx = 15, ly = 10, lz = 5;
int hx = 30, hy = 15, hz = 15;
-intptr_t n = 100000;
// define the contents of two nx x ny x nz arrays in and out
double Vout[nx * ny * nz];
@@ -21,12 +20,12 @@ double Vin[nx * ny * nz]; //, 1.23;
// define some slice indices for each dimension
-static void MDRanges_setup(intptr_t state)
+static void MDRanges_setup(intptr_t n)
{
double sum = 0;
clock_t t = clock();
- for (intptr_t s = 0; s < state; ++s)
+ for (intptr_t s = 0; s < n; ++s)
{
MD3 r_in = cspan_md(Vin, nx, ny, nz);
MD3 r_out = cspan_md(Vout, nx, ny, nz);
@@ -41,12 +40,12 @@ static void MDRanges_setup(intptr_t state)
printf("setup: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
}
-static void TraditionalForLoop(intptr_t state)
+static void TraditionalForLoop(intptr_t n)
{
clock_t t = clock();
double sum = 0;
- for (int s = 0; s < state; ++s) {
+ for (int s = 0; s < n; ++s) {
for (int x = lx; x < hx; ++x) {
for (int y = ly; y < hy; ++y) {
for (int z = lz; z < hz; ++z) {
@@ -62,7 +61,7 @@ static void TraditionalForLoop(intptr_t state)
printf("forloop: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
}
-static void MDRanges_nested_loop(intptr_t state)
+static void MDRanges_nested_loop(intptr_t n)
{
clock_t t = clock();
MD3 r_in = cspan_md(Vin, nx, ny, nz);
@@ -73,7 +72,7 @@ static void MDRanges_nested_loop(intptr_t state)
// C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; }
double sum = 0;
- for (intptr_t s = 0; s < state; ++s) {
+ for (intptr_t s = 0; s < n; ++s) {
for (int x = 0; x < r_in.shape[0]; ++x) {
for (int y = 0; y < r_in.shape[1]; ++y) {
for (int z = 0; z < r_in.shape[2]; ++z)
@@ -89,7 +88,7 @@ static void MDRanges_nested_loop(intptr_t state)
printf("nested: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
}
-static void MDRanges_loop_over_joined(intptr_t state)
+static void MDRanges_loop_over_joined(intptr_t n)
{
MD3 r_in = cspan_md(Vin, nx, ny, nz);
MD3 r_out = cspan_md(Vout, nx, ny, nz);
@@ -100,7 +99,7 @@ static void MDRanges_loop_over_joined(intptr_t state)
double sum = 0;
clock_t t = clock();
- for (intptr_t s = 0; s < state; ++s) {
+ for (intptr_t s = 0; s < n; ++s) {
MD3_iter i = MD3_begin(&r_in);
MD3_iter o = MD3_begin(&r_out);
@@ -116,6 +115,7 @@ static void MDRanges_loop_over_joined(intptr_t state)
int main(void)
{
+ intptr_t n = 100000;
for (int i = 0; i < nx * ny * nz; ++i)
Vin[i] = i + 1.23;