diff options
| author | tylov <[email protected]> | 2023-08-19 21:46:52 +0200 |
|---|---|---|
| committer | tylov <[email protected]> | 2023-08-19 21:46:52 +0200 |
| commit | 7ae6e4d155e9c4835d2dbf80f6e27873b7c7439a (patch) | |
| tree | 2a8659151be779dfee268b3b99b12f23f4e647b2 | |
| parent | ac7afe963527eb66a12962c638242f0426b39575 (diff) | |
| download | STC-modified-7ae6e4d155e9c4835d2dbf80f6e27873b7c7439a.tar.gz STC-modified-7ae6e4d155e9c4835d2dbf80f6e27873b7c7439a.zip | |
Optimized cspan_next(): awesome speedup on gcc.
| -rw-r--r-- | include/stc/cspan.h | 15 | ||||
| -rw-r--r-- | misc/benchmarks/various/cspan_bench.c | 5 |
2 files changed, 8 insertions, 12 deletions
diff --git a/include/stc/cspan.h b/include/stc/cspan.h index 32921390..6f8de8ec 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -97,8 +97,9 @@ int demo2() { return it; \ } \ STC_INLINE void Self##_next(Self##_iter* it) { \ - int done; \ - it->ref += _cspan_next##RANK(it->pos, it->_s->shape, it->_s->stride.d, RANK, &done); \ + int i, inc, done; \ + if (it->_s->stride.d[0] < it->_s->stride.d[RANK - 1]) i=0, inc=1; else i=RANK-1, inc=-1; \ + it->ref += _cspan_next##RANK(it->pos, it->_s->shape, it->_s->stride.d, RANK, i, inc, &done); \ if (done) it->ref = NULL; \ } \ struct stc_nostruct @@ -223,8 +224,8 @@ STC_INLINE intptr_t _cspan_idxN(int rank, const int32_t shape[], const int32_t s return off; } -STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done); -#define _cspan_next1(pos, shape, stride, rank, done) (*done = ++pos[0]==shape[0], stride[0]) +STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc, int* done); +#define _cspan_next1(pos, shape, stride, rank, i, inc, done) (*done = ++pos[0]==shape[0], stride[0]) #define _cspan_next3 _cspan_next2 #define _cspan_next4 _cspan_next2 #define _cspan_next5 _cspan_next2 @@ -242,11 +243,7 @@ STC_API int32_t* _cspan_shape2stride(char order, int32_t shape[], int rank); /* --------------------- IMPLEMENTATION --------------------- */ #if defined(i_implement) || defined(i_static) -STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done) { - int i, inc; - if (stride[0] < stride[rank - 1]) i = rank - 1, inc = -1; - else /* order 'C' */ i = 0, inc = 1; - +STC_DEF intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int i, int inc, int* done) { intptr_t off = stride[i]; ++pos[i]; while (--rank && pos[i] == shape[i]) { diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c index f4b067f8..b5caca83 100644 --- a/misc/benchmarks/various/cspan_bench.c +++ b/misc/benchmarks/various/cspan_bench.c @@ -49,9 +49,8 @@ static void TraditionalForLoop(intptr_t n) for (int x = lx; x < hx; ++x) { for (int y = ly; y < hy; ++y) { for (int z = lz; z < hz; ++z) { - int i = nz*(ny*x + y) + z; - double d = Vin[i]; - Vout[i] += d; + double d = Vin[nz*(ny*x + y) + z]; + Vout[nz*(ny*x + y) + z] += d; sum += d; } } |
