summaryrefslogtreecommitdiffhomepage
path: root/misc/examples/spans/matmult.c
diff options
context:
space:
mode:
author_Tradam <[email protected]>2023-09-08 01:29:47 +0000
committerGitHub <[email protected]>2023-09-08 01:29:47 +0000
commit3c76c7f3d5db3f9586a90d03f8fbb02d79de9acd (patch)
treeafbe4b540967223911f7c5de36559b82154f02f3 /misc/examples/spans/matmult.c
parent0841165881871ee01b782129be681209aeed2423 (diff)
parent1a72205fe05c2375cfd380dd8381a8460d9ed8d1 (diff)
downloadSTC-modified-modified.tar.gz
STC-modified-modified.zip
Merge branch 'stclib:master' into modifiedHEADmodified
Diffstat (limited to 'misc/examples/spans/matmult.c')
-rw-r--r--misc/examples/spans/matmult.c90
1 files changed, 90 insertions, 0 deletions
diff --git a/misc/examples/spans/matmult.c b/misc/examples/spans/matmult.c
new file mode 100644
index 00000000..ec992ff9
--- /dev/null
+++ b/misc/examples/spans/matmult.c
@@ -0,0 +1,90 @@
+// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p2642r2.html
+// C99:
+#include <stdio.h>
+#include <time.h>
+#include <stc/cspan.h>
+
+using_cspan3(Mat, double);
+typedef Mat2 OutMat;
+typedef struct { Mat2 m00, m01, m10, m11; } Partition;
+
+Partition partition(Mat2 A)
+{
+ int32_t M = A.shape[0];
+ int32_t N = A.shape[1];
+ return (Partition){
+ .m00 = cspan_slice(Mat2, &A, {0, M/2}, {0, N/2}),
+ .m01 = cspan_slice(Mat2, &A, {0, M/2}, {N/2, N}),
+ .m10 = cspan_slice(Mat2, &A, {M/2, M}, {0, N/2}),
+ .m11 = cspan_slice(Mat2, &A, {M/2, M}, {N/2, N}),
+ };
+}
+
+// Slow generic implementation
+void base_case_matrix_product(Mat2 A, Mat2 B, OutMat C)
+{
+ for (int j = 0; j < C.shape[1]; ++j) {
+ for (int i = 0; i < C.shape[0]; ++i) {
+ Mat2_value C_ij = 0;
+ for (int k = 0; k < A.shape[1]; ++k) {
+ C_ij += *cspan_at(&A, i,k) * *cspan_at(&B, k,j);
+ }
+ *cspan_at(&C, i,j) += C_ij;
+ }
+ }
+}
+
+void recursive_matrix_product(Mat2 A, Mat2 B, OutMat C)
+{
+ // Some hardware-dependent constant
+ enum {recursion_threshold = 32};
+ if (C.shape[0] <= recursion_threshold || C.shape[1] <= recursion_threshold) {
+ base_case_matrix_product(A, B, C);
+ } else {
+ Partition c = partition(C),
+ a = partition(A),
+ b = partition(B);
+ recursive_matrix_product(a.m00, b.m00, c.m00);
+ recursive_matrix_product(a.m01, b.m10, c.m00);
+ recursive_matrix_product(a.m10, b.m00, c.m10);
+ recursive_matrix_product(a.m11, b.m10, c.m10);
+ recursive_matrix_product(a.m00, b.m01, c.m01);
+ recursive_matrix_product(a.m01, b.m11, c.m01);
+ recursive_matrix_product(a.m10, b.m01, c.m11);
+ recursive_matrix_product(a.m11, b.m11, c.m11);
+ }
+}
+
+
+#define i_type Values
+#define i_val double
+#include <stc/cstack.h>
+#include <stc/crand.h>
+
+int main(void)
+{
+ enum {N = 10, D = 256};
+
+ Values values = {0};
+ for (int i=0; i < N*D*D; ++i)
+ Values_push(&values, (crandf() - 0.5)*4.0);
+
+ double out[D*D];
+ Mat3 data = cspan_md_layout(c_ROWMAJOR, values.data, N, D, D);
+ OutMat c = cspan_md_layout(c_COLMAJOR, out, D, D);
+ Mat2 a = cspan_submd3(&data, 0);
+
+ clock_t t = clock();
+ for (int i=1; i<N; ++i) {
+ Mat2 b = cspan_submd3(&data, i);
+ memset(out, 0, sizeof out);
+ recursive_matrix_product(a, b, c);
+ //base_case_matrix_product(a, b, c);
+ }
+ t = clock() - t;
+
+ double sum = 0.0;
+ c_foreach (i, Mat2, c) sum += *i.ref;
+ printf("sum=%.16g, %f ms\n", sum, (double)t*1000.0/CLOCKS_PER_SEC);
+ Values_drop(&values);
+}