summaryrefslogtreecommitdiffhomepage
path: root/misc/benchmarks/various/cspan_bench.c
blob: b5caca83e890c4c3c4aa3629078ec902c84db15c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#define NDEBUG
#include <stc/cspan.h>
#include <stdio.h>
#include <time.h>

using_cspan3(MD, double);

// define the dimensions of a 3d-array
enum {
    nx = 64,
    ny = 64,
    nz = 64
};
int lx = 15, ly = 10, lz = 5;
int hx = 30, hy = 15, hz = 15;

// define the contents of two nx x ny x nz arrays in and out
double Vout[nx * ny * nz];
double Vin[nx * ny * nz]; //, 1.23;

// define some slice indices for each dimension

static void MDRanges_setup(intptr_t n)
{
    double sum = 0;
    clock_t t = clock();

    for (intptr_t s = 0; s < n; ++s)
    {
        MD3 r_in = cspan_md(Vin, nx, ny, nz);
        MD3 r_out = cspan_md(Vout, nx, ny, nz);

        r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
        r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});
        MD3_iter i = MD3_begin(&r_in); // can be iterated "flat".
        MD3_iter o = MD3_begin(&r_out);
        sum += Vin[s % nx];
    }
    t = clock() - t;
    printf("setup: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
}

static void TraditionalForLoop(intptr_t n)
{
    clock_t t = clock();
    double sum = 0;

    for (int s = 0; s < n; ++s) {
        for (int x = lx; x < hx; ++x) {
            for (int y = ly; y < hy; ++y) {
                for (int z = lz; z < hz; ++z) {
                    double d = Vin[nz*(ny*x + y) + z];
                    Vout[nz*(ny*x + y) + z] += d;
                    sum += d;
                }
            }
        }
    }
    t = clock() - t;
    printf("forloop: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
}

static void MDRanges_nested_loop(intptr_t n)
{
    clock_t t = clock();
    MD3 r_in = cspan_md(Vin, nx, ny, nz);
    MD3 r_out = cspan_md(Vout, nx, ny, nz);
    r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
    r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});

    // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; }
    double sum = 0;

    for (intptr_t s = 0; s < n; ++s) {
        for (int x = 0; x < r_in.shape[0]; ++x) {
            for (int y = 0; y < r_in.shape[1]; ++y) {
                for (int z = 0; z < r_in.shape[2]; ++z)
                {
                    double d = *cspan_at(&r_in, x, y, z);
                    *cspan_at(&r_out, x, y, z) += d;
                    sum += d;
                }
            }
        }
    }
    t = clock() - t;
    printf("nested: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
}

static void MDRanges_loop_over_joined(intptr_t n)
{
    MD3 r_in = cspan_md(Vin, nx, ny, nz);
    MD3 r_out = cspan_md(Vout, nx, ny, nz);
    r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz});
    r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz});

    // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; }
    double sum = 0;
    clock_t t = clock();

    for (intptr_t s = 0; s < n; ++s) {
        MD3_iter i = MD3_begin(&r_in);
        MD3_iter o = MD3_begin(&r_out);

        for (; i.ref; MD3_next(&i), MD3_next(&o))
        {
            *o.ref += *i.ref;
            sum += *i.ref;
        }
    }
    t = clock() - t;
    printf("joined: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum);
}

int main(void)
{
    intptr_t n = 100000;
    for (int i = 0; i < nx * ny * nz; ++i)
        Vin[i] = i + 1.23;

    MDRanges_setup(n);
    TraditionalForLoop(n);
    MDRanges_nested_loop(n);
    MDRanges_loop_over_joined(n);
}