diff options
| author | Tyge Løvset <[email protected]> | 2023-08-30 17:16:03 +0200 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2023-08-30 17:16:03 +0200 |
| commit | 80cd2adc2cd008aeee9f799f2dd5042f42b4ec82 (patch) | |
| tree | be2da7806abb6fe3e4fe33c1d2ae54e108d8efe5 | |
| parent | 263dbab626e6a99f7959a4c1716f0496906ee638 (diff) | |
| download | STC-modified-80cd2adc2cd008aeee9f799f2dd5042f42b4ec82.tar.gz STC-modified-80cd2adc2cd008aeee9f799f2dd5042f42b4ec82.zip | |
Smaller updates.
| -rw-r--r-- | include/stc/cspan.h | 20 | ||||
| -rw-r--r-- | misc/benchmarks/various/cspan_bench.c | 43 | ||||
| -rw-r--r-- | misc/examples/spans/matmult.c | 22 | ||||
| -rw-r--r-- | src/libstc.c | 5 |
4 files changed, 42 insertions, 48 deletions
diff --git a/include/stc/cspan.h b/include/stc/cspan.h index 3f2b300f..e72bb97a 100644 --- a/include/stc/cspan.h +++ b/include/stc/cspan.h @@ -63,12 +63,6 @@ int demo2() { #include "priv/linkage.h" #include "ccommon.h" -#ifdef i_ndebug - #define cspan_assert(x) ((void)0) -#else - #define cspan_assert(x) c_assert(x) -#endif - #define using_cspan(...) c_MACRO_OVERLOAD(using_cspan, __VA_ARGS__) #define using_cspan_2(Self, T) \ using_cspan_3(Self, T, 1); \ @@ -91,7 +85,7 @@ int demo2() { const int rank, const int32_t a[][2]) { \ Self s; int outrank; \ s.data = d + _cspan_slice(s.shape, s.stride.d, &outrank, shape, stri, rank, a); \ - cspan_assert(outrank == RANK); \ + c_assert(outrank == RANK); \ return s; \ } \ STC_INLINE Self##_iter Self##_begin(const Self* self) { \ @@ -193,7 +187,7 @@ typedef enum {c_ROWMAJOR, c_COLMAJOR} cspan_layout; STC_INLINE intptr_t _cspan_size(const int32_t shape[], int rank) { intptr_t sz = shape[0]; - while (--rank > 0) sz *= shape[rank]; + while (--rank) sz *= shape[rank]; return sz; } @@ -207,14 +201,15 @@ STC_INLINE void _cspan_transpose(int32_t shape[], int32_t stride[], int rank) { STC_INLINE intptr_t _cspan_index(int rank, const int32_t shape[], const int32_t stride[], const int32_t a[]) { intptr_t off = 0; while (rank--) { - cspan_assert(c_LTu(a[rank], shape[rank])); + c_assert(c_LTu(a[rank], shape[rank])); off += stride[rank]*a[rank]; } return off; } -STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done); #define _cspan_next1(pos, shape, stride, rank, done) (*done = ++pos[0]==shape[0], stride[0]) +STC_API intptr_t + _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done); #define _cspan_next3 _cspan_next2 #define _cspan_next4 _cspan_next2 #define _cspan_next5 _cspan_next2 @@ -270,13 +265,13 @@ STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank, for (; i < rank; ++i) { off += stride[i]*a[i][0]; switch (a[i][1]) { - case 0: cspan_assert(c_LTu(a[i][0], shape[i])); continue; + case 0: c_assert(c_LTu(a[i][0], shape[i])); continue; case -1: end = shape[i]; break; default: end = a[i][1]; } oshape[oi] = end - a[i][0]; ostride[oi] = stride[i]; - cspan_assert(c_LTu(0, oshape[oi]) & !c_LTu(shape[i], end)); + c_assert((oshape[oi] > 0) & !c_LTu(shape[i], end)); ++oi; } *orank = oi; @@ -284,7 +279,6 @@ STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank, } #endif -#undef i_ndebug #undef i_opt #undef i_header #undef i_implement diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c index 3b1c3132..bfc0ead3 100644 --- a/misc/benchmarks/various/cspan_bench.c +++ b/misc/benchmarks/various/cspan_bench.c @@ -42,7 +42,7 @@ static void Traditional_for_loop(intptr_t n) printf("forloop : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -static void MDRanges_nested_loop(intptr_t n) +static void MDRanges_loop_over_joined(intptr_t n) { clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); @@ -52,22 +52,20 @@ static void MDRanges_nested_loop(intptr_t n) double sum = 0; for (intptr_t s = 0; s < n; ++s) { - for (int x = 0; x < r_in.shape[0]; ++x) { - for (int y = 0; y < r_in.shape[1]; ++y) { - for (int z = 0; z < r_in.shape[2]; ++z) - { - double d = *cspan_at(&r_in, x,y,z); - *cspan_at(&r_out, x,y,z) += d; - sum += d; - } - } + MD3_iter i = MD3_begin(&r_in); + MD3_iter o = MD3_begin(&r_out); + + for (; i.ref; MD3_next(&i), MD3_next(&o)) + { + *o.ref += *i.ref; + sum += *i.ref; } } t = clock() - t; - printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); + printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -static void MDRanges_loop_over_joined(intptr_t n) +static void MDRanges_nested_loop(intptr_t n) { clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); @@ -77,19 +75,22 @@ static void MDRanges_loop_over_joined(intptr_t n) double sum = 0; for (intptr_t s = 0; s < n; ++s) { - MD3_iter i = MD3_begin(&r_in); - MD3_iter o = MD3_begin(&r_out); - - for (; i.ref; MD3_next(&i), MD3_next(&o)) - { - *o.ref += *i.ref; - sum += *i.ref; + for (int x = 0; x < r_in.shape[0]; ++x) { + for (int y = 0; y < r_in.shape[1]; ++y) { + for (int z = 0; z < r_in.shape[2]; ++z) + { + double d = *cspan_at(&r_in, x,y,z); + *cspan_at(&r_out, x,y,z) += d; + sum += d; + } + } } } t = clock() - t; - printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); + printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } + int main(void) { intptr_t n = 100000; @@ -97,6 +98,6 @@ int main(void) Vin[i] = i + 1.23; Traditional_for_loop(n); - MDRanges_nested_loop(n); MDRanges_loop_over_joined(n); + MDRanges_nested_loop(n); } diff --git a/misc/examples/spans/matmult.c b/misc/examples/spans/matmult.c index 266fa121..ec992ff9 100644 --- a/misc/examples/spans/matmult.c +++ b/misc/examples/spans/matmult.c @@ -37,7 +37,7 @@ void base_case_matrix_product(Mat2 A, Mat2 B, OutMat C) void recursive_matrix_product(Mat2 A, Mat2 B, OutMat C) { // Some hardware-dependent constant - enum {recursion_threshold = 16}; + enum {recursion_threshold = 32}; if (C.shape[0] <= recursion_threshold || C.shape[1] <= recursion_threshold) { base_case_matrix_product(A, B, C); } else { @@ -63,28 +63,28 @@ void recursive_matrix_product(Mat2 A, Mat2 B, OutMat C) int main(void) { - enum {N = 10, D1 = 256, D2 = D1}; + enum {N = 10, D = 256}; Values values = {0}; - for (int i=0; i < N*D1*D2; ++i) + for (int i=0; i < N*D*D; ++i) Values_push(&values, (crandf() - 0.5)*4.0); - double out[D1*D2]; - Mat3 data = cspan_md_layout(c_ROWMAJOR, values.data, N, D1, D2); - OutMat c = cspan_md_layout(c_ROWMAJOR, out, D1, D2); + double out[D*D]; + Mat3 data = cspan_md_layout(c_ROWMAJOR, values.data, N, D, D); + OutMat c = cspan_md_layout(c_COLMAJOR, out, D, D); Mat2 a = cspan_submd3(&data, 0); - double sum = 0.0; - clock_t t = clock(); + clock_t t = clock(); for (int i=1; i<N; ++i) { Mat2 b = cspan_submd3(&data, i); memset(out, 0, sizeof out); recursive_matrix_product(a, b, c); //base_case_matrix_product(a, b, c); - sum += *cspan_at(&c, 0, 1); } - t = clock() - t; - printf("%.16g: %f\n", sum, (double)t*1000.0/CLOCKS_PER_SEC); + + double sum = 0.0; + c_foreach (i, Mat2, c) sum += *i.ref; + printf("sum=%.16g, %f ms\n", sum, (double)t*1000.0/CLOCKS_PER_SEC); Values_drop(&values); } diff --git a/src/libstc.c b/src/libstc.c index 462c97c4..1305abef 100644 --- a/src/libstc.c +++ b/src/libstc.c @@ -1,10 +1,9 @@ +#define STC_IMPLEMENT
#define i_import
#include "../include/stc/cregex.h" /* cstr. utf8, and cregex */
-#define i_implement
#include "../include/stc/csview.h"
-#define i_implement
#include "../include/stc/crand.h"
+#include "../include/stc/cspan.h"
#if __STDC_VERSION__ >= 201112L
-# define i_implement
# include "../include/c11/fmt.h"
#endif
|
