summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authortylov <[email protected]>2023-08-19 21:46:52 +0200
committertylov <[email protected]>2023-08-19 21:46:52 +0200
commit7ae6e4d155e9c4835d2dbf80f6e27873b7c7439a (patch)
tree2a8659151be779dfee268b3b99b12f23f4e647b2
parentac7afe963527eb66a12962c638242f0426b39575 (diff)
downloadSTC-modified-7ae6e4d155e9c4835d2dbf80f6e27873b7c7439a.tar.gz
STC-modified-7ae6e4d155e9c4835d2dbf80f6e27873b7c7439a.zip
Optimized cspan_next(): awesome speedup on gcc.
-rw-r--r--include/stc/cspan.h15
-rw-r--r--misc/benchmarks/various/cspan_bench.c5
2 files changed, 8 insertions, 12 deletions
diff --git a/include/stc/cspan.h b/include/stc/cspan.h
index 32921390..6f8de8ec 100644
--- a/include/stc/cspan.h
+++ b/include/stc/cspan.h
@@ -97,8 +97,9 @@ int demo2() {
return it; \
} \
STC_INLINE void Self##_next(Self##_iter* it) { \
- int done; \
- it->ref += _cspan_next##RANK(it->pos, it->_s->shape, it->_s->stride.d, RANK, &done); \
+ int i, inc, done; \
+ if (it->_s->stride.d[0] < it->_s->stride.d[RANK - 1]) i=0, inc=1; else i=RANK-1, inc=-1; \
+ it->ref += _cspan_next##RANK(it->pos, it->_s->shape, it->_s->stride.d, RANK, i, inc, &done); \
if (done) it->ref = NULL; \
} \
struct stc_nostruct
@@ -223,8 +224,8 @@ STC_INLINE intptr_t _cspan_idxN(int rank, const int32_t shape[], const int32_t s
return off;
}
-STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done);
-#define _cspan_next1(pos, shape, stride, rank, done) (*done = ++pos[0]==shape[0], stride[0])
+STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc, int* done);
+#define _cspan_next1(pos, shape, stride, rank, i, inc, done) (*done = ++pos[0]==shape[0], stride[0])
#define _cspan_next3 _cspan_next2
#define _cspan_next4 _cspan_next2
#define _cspan_next5 _cspan_next2
@@ -242,11 +243,7 @@ STC_API int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank);
/* --------------------- IMPLEMENTATION --------------------- */
#if defined(i_implement) || defined(i_static)
-STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done) {
- int i, inc;
- if (stride[0] < stride[rank - 1]) i = rank - 1, inc = -1;
- else /* order 'C' */ i = 0, inc = 1;
-
+STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc, int* done) {
intptr_t off = stride[i];
++pos[i];
while (--rank && pos[i] == shape[i]) {
diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c
index f4b067f8..b5caca83 100644
--- a/misc/benchmarks/various/cspan_bench.c
+++ b/misc/benchmarks/various/cspan_bench.c
@@ -49,9 +49,8 @@ static void TraditionalForLoop(intptr_t n)
for (int x = lx; x < hx; ++x) {
for (int y = ly; y < hy; ++y) {
for (int z = lz; z < hz; ++z) {
- int i = nz*(ny*x + y) + z;
- double d = Vin[i];
- Vout[i] += d;
+ double d = Vin[nz*(ny*x + y) + z];
+ Vout[nz*(ny*x + y) + z] += d;
sum += d;
}
}