summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2023-08-30 17:16:03 +0200
committerTyge Løvset <[email protected]>2023-08-30 17:16:03 +0200
commit80cd2adc2cd008aeee9f799f2dd5042f42b4ec82 (patch)
treebe2da7806abb6fe3e4fe33c1d2ae54e108d8efe5
parent263dbab626e6a99f7959a4c1716f0496906ee638 (diff)
downloadSTC-modified-80cd2adc2cd008aeee9f799f2dd5042f42b4ec82.tar.gz
STC-modified-80cd2adc2cd008aeee9f799f2dd5042f42b4ec82.zip
Smaller updates.
-rw-r--r--include/stc/cspan.h20
-rw-r--r--misc/benchmarks/various/cspan_bench.c43
-rw-r--r--misc/examples/spans/matmult.c22
-rw-r--r--src/libstc.c5
4 files changed, 42 insertions, 48 deletions
diff --git a/include/stc/cspan.h b/include/stc/cspan.h
index 3f2b300f..e72bb97a 100644
--- a/include/stc/cspan.h
+++ b/include/stc/cspan.h
@@ -63,12 +63,6 @@ int demo2() {
#include "priv/linkage.h"
#include "ccommon.h"
-#ifdef i_ndebug
- #define cspan_assert(x) ((void)0)
-#else
- #define cspan_assert(x) c_assert(x)
-#endif
-
#define using_cspan(...) c_MACRO_OVERLOAD(using_cspan, __VA_ARGS__)
#define using_cspan_2(Self, T) \
using_cspan_3(Self, T, 1); \
@@ -91,7 +85,7 @@ int demo2() {
const int rank, const int32_t a[][2]) { \
Self s; int outrank; \
s.data = d + _cspan_slice(s.shape, s.stride.d, &outrank, shape, stri, rank, a); \
- cspan_assert(outrank == RANK); \
+ c_assert(outrank == RANK); \
return s; \
} \
STC_INLINE Self##_iter Self##_begin(const Self* self) { \
@@ -193,7 +187,7 @@ typedef enum {c_ROWMAJOR, c_COLMAJOR} cspan_layout;
STC_INLINE intptr_t _cspan_size(const int32_t shape[], int rank) {
intptr_t sz = shape[0];
- while (--rank > 0) sz *= shape[rank];
+ while (--rank) sz *= shape[rank];
return sz;
}
@@ -207,14 +201,15 @@ STC_INLINE void _cspan_transpose(int32_t shape[], int32_t stride[], int rank) {
STC_INLINE intptr_t _cspan_index(int rank, const int32_t shape[], const int32_t stride[], const int32_t a[]) {
intptr_t off = 0;
while (rank--) {
- cspan_assert(c_LTu(a[rank], shape[rank]));
+ c_assert(c_LTu(a[rank], shape[rank]));
off += stride[rank]*a[rank];
}
return off;
}
-STC_API intptr_t _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done);
#define _cspan_next1(pos, shape, stride, rank, done) (*done = ++pos[0]==shape[0], stride[0])
+STC_API intptr_t
+ _cspan_next2(int32_t pos[], const int32_t shape[], const int32_t stride[], int rank, int* done);
#define _cspan_next3 _cspan_next2
#define _cspan_next4 _cspan_next2
#define _cspan_next5 _cspan_next2
@@ -270,13 +265,13 @@ STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank,
for (; i < rank; ++i) {
off += stride[i]*a[i][0];
switch (a[i][1]) {
- case 0: cspan_assert(c_LTu(a[i][0], shape[i])); continue;
+ case 0: c_assert(c_LTu(a[i][0], shape[i])); continue;
case -1: end = shape[i]; break;
default: end = a[i][1];
}
oshape[oi] = end - a[i][0];
ostride[oi] = stride[i];
- cspan_assert(c_LTu(0, oshape[oi]) & !c_LTu(shape[i], end));
+ c_assert((oshape[oi] > 0) & !c_LTu(shape[i], end));
++oi;
}
*orank = oi;
@@ -284,7 +279,6 @@ STC_DEF intptr_t _cspan_slice(int32_t oshape[], int32_t ostride[], int* orank,
}
#endif
-#undef i_ndebug
#undef i_opt
#undef i_header
#undef i_implement
diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c
index 3b1c3132..bfc0ead3 100644
--- a/misc/benchmarks/various/cspan_bench.c
+++ b/misc/benchmarks/various/cspan_bench.c
@@ -42,7 +42,7 @@ static void Traditional_for_loop(intptr_t n)
printf("forloop : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
}
-static void MDRanges_nested_loop(intptr_t n)
+static void MDRanges_loop_over_joined(intptr_t n)
{
clock_t t = clock();
MD3 r_in = cspan_md(Vin, nx, ny, nz);
@@ -52,22 +52,20 @@ static void MDRanges_nested_loop(intptr_t n)
double sum = 0;
for (intptr_t s = 0; s < n; ++s) {
- for (int x = 0; x < r_in.shape[0]; ++x) {
- for (int y = 0; y < r_in.shape[1]; ++y) {
- for (int z = 0; z < r_in.shape[2]; ++z)
- {
- double d = *cspan_at(&r_in, x,y,z);
- *cspan_at(&r_out, x,y,z) += d;
- sum += d;
- }
- }
+ MD3_iter i = MD3_begin(&r_in);
+ MD3_iter o = MD3_begin(&r_out);
+
+ for (; i.ref; MD3_next(&i), MD3_next(&o))
+ {
+ *o.ref += *i.ref;
+ sum += *i.ref;
}
}
t = clock() - t;
- printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
+ printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
}
-static void MDRanges_loop_over_joined(intptr_t n)
+static void MDRanges_nested_loop(intptr_t n)
{
clock_t t = clock();
MD3 r_in = cspan_md(Vin, nx, ny, nz);
@@ -77,19 +75,22 @@ static void MDRanges_loop_over_joined(intptr_t n)
double sum = 0;
for (intptr_t s = 0; s < n; ++s) {
- MD3_iter i = MD3_begin(&r_in);
- MD3_iter o = MD3_begin(&r_out);
-
- for (; i.ref; MD3_next(&i), MD3_next(&o))
- {
- *o.ref += *i.ref;
- sum += *i.ref;
+ for (int x = 0; x < r_in.shape[0]; ++x) {
+ for (int y = 0; y < r_in.shape[1]; ++y) {
+ for (int z = 0; z < r_in.shape[2]; ++z)
+ {
+ double d = *cspan_at(&r_in, x,y,z);
+ *cspan_at(&r_out, x,y,z) += d;
+ sum += d;
+ }
+ }
}
}
t = clock() - t;
- printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
+ printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum);
}
+
int main(void)
{
intptr_t n = 100000;
@@ -97,6 +98,6 @@ int main(void)
Vin[i] = i + 1.23;
Traditional_for_loop(n);
- MDRanges_nested_loop(n);
MDRanges_loop_over_joined(n);
+ MDRanges_nested_loop(n);
}
diff --git a/misc/examples/spans/matmult.c b/misc/examples/spans/matmult.c
index 266fa121..ec992ff9 100644
--- a/misc/examples/spans/matmult.c
+++ b/misc/examples/spans/matmult.c
@@ -37,7 +37,7 @@ void base_case_matrix_product(Mat2 A, Mat2 B, OutMat C)
void recursive_matrix_product(Mat2 A, Mat2 B, OutMat C)
{
// Some hardware-dependent constant
- enum {recursion_threshold = 16};
+ enum {recursion_threshold = 32};
if (C.shape[0] <= recursion_threshold || C.shape[1] <= recursion_threshold) {
base_case_matrix_product(A, B, C);
} else {
@@ -63,28 +63,28 @@ void recursive_matrix_product(Mat2 A, Mat2 B, OutMat C)
int main(void)
{
- enum {N = 10, D1 = 256, D2 = D1};
+ enum {N = 10, D = 256};
Values values = {0};
- for (int i=0; i < N*D1*D2; ++i)
+ for (int i=0; i < N*D*D; ++i)
Values_push(&values, (crandf() - 0.5)*4.0);
- double out[D1*D2];
- Mat3 data = cspan_md_layout(c_ROWMAJOR, values.data, N, D1, D2);
- OutMat c = cspan_md_layout(c_ROWMAJOR, out, D1, D2);
+ double out[D*D];
+ Mat3 data = cspan_md_layout(c_ROWMAJOR, values.data, N, D, D);
+ OutMat c = cspan_md_layout(c_COLMAJOR, out, D, D);
Mat2 a = cspan_submd3(&data, 0);
- double sum = 0.0;
- clock_t t = clock();
+ clock_t t = clock();
for (int i=1; i<N; ++i) {
Mat2 b = cspan_submd3(&data, i);
memset(out, 0, sizeof out);
recursive_matrix_product(a, b, c);
//base_case_matrix_product(a, b, c);
- sum += *cspan_at(&c, 0, 1);
}
-
t = clock() - t;
- printf("%.16g: %f\n", sum, (double)t*1000.0/CLOCKS_PER_SEC);
+
+ double sum = 0.0;
+ c_foreach (i, Mat2, c) sum += *i.ref;
+ printf("sum=%.16g, %f ms\n", sum, (double)t*1000.0/CLOCKS_PER_SEC);
Values_drop(&values);
}
diff --git a/src/libstc.c b/src/libstc.c
index 462c97c4..1305abef 100644
--- a/src/libstc.c
+++ b/src/libstc.c
@@ -1,10 +1,9 @@
+#define STC_IMPLEMENT
#define i_import
#include "../include/stc/cregex.h" /* cstr. utf8, and cregex */
-#define i_implement
#include "../include/stc/csview.h"
-#define i_implement
#include "../include/stc/crand.h"
+#include "../include/stc/cspan.h"
#if __STDC_VERSION__ >= 201112L
-# define i_implement
# include "../include/c11/fmt.h"
#endif