Updates on benchmark.

author: Tyge Løvset <[email protected]> 2021-01-20 09:12:37 +0100
committer: Tyge Løvset <[email protected]> 2021-01-20 09:12:37 +0100
commit: 616f51309113f41418166c030111914391670264 (patch)
tree: 6844364c12864b9f6b4ece73ea9809f08ca44b84 /benchmarks
parent: 7fc666a831a617b206d58e503b5cf5c68e1a3e24 (diff)
download: STC-modified-616f51309113f41418166c030111914391670264.tar.gz
STC-modified-616f51309113f41418166c030111914391670264.zip
2 files changed, 89 insertions, 83 deletions
diff --git a/benchmarks/cmap_benchmark2.cpp b/benchmarks/cmap_benchmark2.cpp
index 6eedeb9f..3eaff7a2 100644
--- a/benchmarks/cmap_benchmark2.cpp
+++ b/benchmarks/cmap_benchmark2.cpp
@@ -13,7 +13,7 @@
 #include "picobench.hpp"
 
 PICOBENCH_SUITE("Map");
-enum {N1 = 5000000, S1 = 1, MaxLoadFactor100 = 80};
+enum {N1 = 10000000, S1 = 1, MaxLoadFactor100 = 80};
 uint64_t seed = time(NULL);
 
 static inline uint32_t hash32(const void* data, size_t len) {
@@ -139,7 +139,7 @@ PICOBENCH(ins_and_erase_cmap_i).P;
 template <class MapInt>
 static void ins_and_access_i(picobench::state& s)
 {
-    uint64_t mask = (1ull << s.iter_data()) - 1;
+    uint64_t mask = (1ull << s.arg()) - 1;
     size_t result = 0;
     MapInt map;
     map.max_load_factor(MaxLoadFactor100 / 100.0);
@@ -153,7 +153,7 @@ static void ins_and_access_i(picobench::state& s)
 
 static void ins_and_access_cmap_i(picobench::state& s)
 {
-    uint64_t mask = (1ull << s.iter_data()) - 1;
+    uint64_t mask = (1ull << s.arg()) - 1;
     size_t result = 0;
     cmap_i map = cmap_inits;
     cmap_i_set_load_factors(&map, 0.0, MaxLoadFactor100 / 100.0);
@@ -166,7 +166,7 @@ static void ins_and_access_cmap_i(picobench::state& s)
     cmap_i_del(&map);
 }
 
-#define P samples(S1).iterations({N1, N1, N1, N1}).iter_data({18, 23, 25, 31})
+#define P samples(S1).iterations({N1, N1, N1, N1}).args({18, 23, 25, 31})
 PICOBENCH(ins_and_access_i<umap_i>).P.baseline();
 PICOBENCH(ins_and_access_i<bmap_i>).P;
 PICOBENCH(ins_and_access_i<fmap_i>).P;
@@ -186,7 +186,7 @@ static void randomize(char* str, size_t len) {
 template <class MapStr>
 static void ins_and_access_s(picobench::state& s)
 {
-    std::string str(s.iter_data(), 'x');
+    std::string str(s.arg(), 'x');
     size_t result = 0;
     MapStr map;
     map.max_load_factor(MaxLoadFactor100 / 100.0);
@@ -208,7 +208,7 @@ static void ins_and_access_s(picobench::state& s)
 
 static void ins_and_access_cmap_s(picobench::state& s)
 {
-    cstr str = cstr_with_size(s.iter_data(), 'x');
+    cstr str = cstr_with_size(s.arg(), 'x');
     size_t result = 0;
     cmap_s map = cmap_inits;
     cmap_s_set_load_factors(&map, 0.0, MaxLoadFactor100 / 100.0);
@@ -230,7 +230,7 @@ static void ins_and_access_cmap_s(picobench::state& s)
     cmap_s_del(&map);
 }
 
-#define P samples(S1).iterations({N1/5, N1/5, N1/5, N1/10, N1/40}).iter_data({13, 7, 8, 100, 1000})
+#define P samples(S1).iterations({N1/5, N1/5, N1/5, N1/10, N1/40}).args({13, 7, 8, 100, 1000})
 PICOBENCH(ins_and_access_s<umap_s>).P.baseline();
 PICOBENCH(ins_and_access_s<bmap_s>).P;
 PICOBENCH(ins_and_access_s<fmap_s>).P;
@@ -246,7 +246,7 @@ static void iterate_x(picobench::state& s)
 {
     MapX map;
     map.max_load_factor(MaxLoadFactor100 / 100.0);
-    uint64_t K = (1ull << s.iter_data()) - 1;
+    uint64_t K = (1ull << s.arg()) - 1;
 
     picobench::scope scope(s);
     stc64_srandom(seed);
@@ -275,7 +275,7 @@ static void iterate_cmap_x(picobench::state& s)
 {
     cmap_x map = cmap_inits;
     cmap_x_set_load_factors(&map, 0.3, MaxLoadFactor100 / 100.0);
-    uint64_t K = (1ull << s.iter_data()) - 1;
+    uint64_t K = (1ull << s.arg()) - 1;
 
     picobench::scope scope(s);
     stc64_srandom(seed);
@@ -302,7 +302,7 @@ static void iterate_cmap_x(picobench::state& s)
 }
 
 
-#define P samples(S1).iterations({N1/20}).iter_data({12})
+#define P samples(S1).iterations({N1/20}).args({12})
 PICOBENCH(iterate_x<umap_x>).P.baseline();
 PICOBENCH(iterate_x<bmap_x>).P;
 PICOBENCH(iterate_x<fmap_x>).P;
diff --git a/benchmarks/picobench.hpp b/benchmarks/picobench.hpp
index 221a9609..271a4412 100644
--- a/benchmarks/picobench.hpp
+++ b/benchmarks/picobench.hpp
@@ -1,4 +1,4 @@
-// picobench v2
+// picobench v2.00
 // https://github.com/iboB/picobench
 //
 // A micro microbenchmarking library in a single header file
@@ -28,7 +28,7 @@
 //
 //                  VERSION HISTORY
 //
-//  2.02 (2021-01-09) * Added user data per iter, wider text output.
+//  2.xx (2021-01-19) * Added user data per iter, changed text output.
 //  2.01 (2019-03-03) * Fixed android build when binding to a signle core
 //  2.00 (2018-10-30) * Breaking change! runner::run_benchmarks doesn't return
 //                      a report anymore. The report is generated by
@@ -113,8 +113,8 @@
 #   include <functional>
 #endif
 
-#define PICOBENCH_VERSION 2.02
-#define PICOBENCH_VERSION_STR "2.02"
+#define PICOBENCH_VERSION 2.01
+#define PICOBENCH_VERSION_STR "2.01"
 
 #if defined(PICOBENCH_DEBUG)
 #   include <cassert>
@@ -149,15 +149,15 @@ struct high_res_clock
 using high_res_clock = std::chrono::high_resolution_clock;
 #endif
 
-using result_t = int64_t;
-using udata_t = uint64_t;
+using result_t = intptr_t;
+using udata_t = uintptr_t;
 
 class state
 {
 public:
-    explicit state(size_t num_iterations, udata_t user_data = 0, udata_t iter_data = 0)
+    explicit state(size_t num_iterations, udata_t user_data = 0, udata_t arg = 0)
         : _user_data(user_data)
-        , _iter_data(iter_data)
+        , _arg(arg)
         , _iterations(num_iterations)
     {
         I_PICOBENCH_ASSERT(_iterations > 0);
@@ -165,16 +165,16 @@ public:
 
     size_t iterations() const { return _iterations; }
 
-    uint64_t duration_ns() const { return _duration_ns; }
-    void add_custom_duration(uint64_t duration_ns) { _duration_ns += duration_ns; }
+    int64_t duration_ns() const { return _duration_ns; }
+    void add_custom_duration(int64_t duration_ns) { _duration_ns += duration_ns; }
 
     udata_t user_data() const { return _user_data; }
-    udata_t iter_data() const { return _iter_data; }
+    udata_t arg() const { return _arg; }
 
     // optionally set result of benchmark
     // this can be used as a value sync to prevent optimizations
     // or a way to check whether benchmarks produce the same results
-    void set_result(udata_t data) { _result = data; }
+    void set_result(uintptr_t data) { _result = data; }
     result_t result() const { return _result; }
 
     PICOBENCH_INLINE
@@ -225,14 +225,14 @@ public:
         }
 
         PICOBENCH_INLINE
-        int operator*() const
+        size_t operator*() const
         {
             return _counter;
         }
 
     private:
-        int _counter;
-        const int _lim;
+        size_t _counter;
+        const size_t _lim;
         state* _state;
     };
 
@@ -251,9 +251,9 @@ public:
 
 private:
     high_res_clock::time_point _start;
-    uint64_t _duration_ns = 0;
+    int64_t _duration_ns = 0;
     udata_t _user_data;
-    udata_t _iter_data;
+    udata_t _arg;
     size_t _iterations;
     result_t _result = 0;
 };
@@ -294,7 +294,7 @@ public:
     benchmark& label(const char* label) { _name = label; return *this; }
     benchmark& baseline(bool b = true) { _baseline = b; return *this; }
     benchmark& user_data(udata_t data) { _user_data = data; return *this; }
-    benchmark& iter_data(std::vector<udata_t> data) { _iter_data = std::move(data); return *this; }
+    benchmark& args(std::vector<udata_t> data) { _args = std::move(data); return *this; }
 
 protected:
     friend class runner;
@@ -305,9 +305,9 @@ protected:
     const benchmark_proc _proc;
     bool _baseline = false;
 
-    udata_t _user_data;
-    std::vector<udata_t> _iter_data;
+    udata_t _user_data = 0;
     std::vector<size_t> _state_iterations;
+    std::vector<udata_t> _args;
     int _samples = 0;
 };
 
@@ -383,10 +383,10 @@ public:
     struct benchmark_problem_space
     {
         size_t dimension; // number of iterations for the problem space
-        udata_t user_data; // additional user data.
-        udata_t iter_data; // additional user data.
+        udata_t user_data;
+        udata_t arg;
         int samples; // number of samples taken
-        uint64_t total_time_ns; // fastest sample!!!
+        int64_t total_time_ns; // fastest sample!!!
         result_t result; // result of fastest sample
     };
     struct benchmark
@@ -450,7 +450,7 @@ public:
             }
             line(out, width);
             out <<
-                "  Name (* = baseline)        |Iterations | User data |  Total ms |     ns/op |Baseline |  Ops/second\n";
+                "  Name (* = baseline)        |Iterations |       Arg |Baseline |     ns/op |  Total ms |  Ops/second\n";
             line(out, width);
 
             auto problem_space_view = get_problem_space_view(suite);
@@ -472,8 +472,24 @@ public:
 
                     out << " |"
                         << setw(10) << ps.first.first << " |"
-                        << setw(10) << bm.iter_data << " |"
-                        << setw(10) << fixed << setprecision(2) << double(bm.total_time_ns) / 1000000.0 << " |";
+                        << setw(10) << bm.arg << " |";
+
+                    if (bm.is_baseline) //(baseline == &bm)
+                    {
+                        baseline = &bm;
+                        out << "       - |";
+                    }
+                    else if (baseline)
+                    {
+                        out << setw(8) << fixed << setprecision(3)
+                            << double(bm.total_time_ns) / double(baseline->total_time_ns) << " |";
+                    }
+                    else
+                    {
+                        // no baseline to compare to
+                        out << "       ? |";
+                    }
+
                     auto ns_op = (bm.total_time_ns / ps.first.first);
                     if (ns_op > 99999999)
                     {
@@ -489,23 +505,8 @@ public:
                     {
                         out << setw(10) << ns_op;
                     }
-
                     out << " |";
-
-                    if (baseline == &bm)
-                    {
-                        out << "       - |";
-                    }
-                    else if (baseline)
-                    {
-                        out << setw(8) << fixed << setprecision(3)
-                            << double(bm.total_time_ns) / double(baseline->total_time_ns) << " |";
-                    }
-                    else
-                    {
-                        // no baseline to compare to
-                        out << "       ? |";
-                    }
+                    out << setw(10) << fixed << setprecision(2) << double(bm.total_time_ns) / 1000000.0 << " |";
 
                     auto ops_per_sec = ps.first.first * (1000000000.0 / double(bm.total_time_ns));
                     out << setw(12) << fixed << setprecision(1) << ops_per_sec << "\n";
@@ -529,7 +530,7 @@ public:
             line(out, width);
 
             out <<
-                "  Name (* = baseline)        |     ns/op | Baseline |  Ops/second\n";
+                "  Name (* = baseline)        | Baseline |     ns/op |    Total ms\n"; // |  Ops/second\n";
 
             line(out, width);
 
@@ -550,11 +551,11 @@ public:
                 baseline_total_time += d.total_time_ns;
                 baseline_total_iterations += d.dimension;
             }
-            uint64_t baseline_ns_per_op = baseline_total_time / baseline_total_iterations;
 
             for (auto& bm : suite.benchmarks)
             {
-                out << (bm.is_baseline ? "* " : "  ") << left << setw(26) << bm.name << right;
+                out << (bm.is_baseline ? "* " : "  ") << left << setw(26) << bm.name << right
+                    << " |";
 
                 uint64_t total_time = 0;
                 size_t total_iterations = 0;
@@ -565,22 +566,24 @@ public:
                 }
                 uint64_t ns_per_op = total_time / total_iterations;
 
-                out << " |" << setw(10) << ns_per_op << " |";
-
                 if (bm.is_baseline)
                 {
-                    out << "        - |";
+                    out << "        -";
+                    baseline = &bm;
                     baseline_total_time = total_time;
                     baseline_total_iterations = total_iterations;
-                    baseline_ns_per_op = ns_per_op;
                 }
                 else
                 {
                     out << setw(9) << fixed << setprecision(3)
-                        << double(ns_per_op) / baseline_ns_per_op << " |";
+                        << double(total_time) / baseline_total_time;
                 }
-                auto ops_per_sec = total_iterations * (1000000000.0 / total_time);
-                out << setw(12) << fixed << setprecision(1) << ops_per_sec << "\n";
+
+                out << " |" << setw(10) << ns_per_op << " |";
+                out << setw(12) << fixed << setprecision(2) << double(total_time) / 1000000.0 << "\n";
+
+                //auto ops_per_sec = total_iterations * (1000000000.0 / total_time);
+                //out << setw(12) << fixed << setprecision(1) << ops_per_sec << "\n";
             }
 
             line(out, width);
@@ -594,7 +597,7 @@ public:
 
         for (auto& suite : suites)
         {
-            out << "Suite, Baseline, Benchmark, Iterations, User data, Total ms, ns/op, Ratio, Ops/second\n";
+            out << "Suite, Baseline, Benchmark, Iterations, Arg, Ratio, Total ms, ns/op, Ops/second\n";
 
             auto problem_space_view = get_problem_space_view(suite);
             for (auto& ps : problem_space_view)
@@ -615,11 +618,8 @@ public:
                     out << sep << (bm.is_baseline ? "true" : "false");
                     out << sep << '"' << bm.name << '"';
                     out << sep << ps.first.first
-                        << sep << bm.iter_data
-                        << sep << fixed << setprecision(3) << bm.total_time_ns / 1000000.0;
+                        << sep << bm.arg << sep;
 
-                    auto ns_op = (bm.total_time_ns / ps.first.first);
-                    out << sep << ns_op << sep;
                     if (baseline == &bm)
                     {
                         out << 1.0;
@@ -633,6 +633,11 @@ public:
                         out << -1.0; // no baseline to compare to
                     }
 
+                    out << sep << fixed << setprecision(3) << bm.total_time_ns / 1000000.0;
+
+                    auto ns_op = (bm.total_time_ns / ps.first.first);
+                    out << sep << ns_op;
+
                     auto ops_per_sec = ps.first.first * (1000000000.0 / bm.total_time_ns);
                     out << sep << fixed << setprecision(1) << ops_per_sec << "\n";
                 }
@@ -641,16 +646,16 @@ public:
     }
 
 
+
     struct problem_space_benchmark
     {
         const char* name;
         bool is_baseline;
         udata_t user_data;
-        udata_t iter_data;
-        uint64_t total_time_ns; // fastest sample!!!
+        udata_t arg;
+        int64_t total_time_ns; // fastest sample!!!
         result_t result; // result of fastest sample
     };
-    // Use a pair as key in a map to sort views along user_data when iterations are equal.
     using problem_space_view_map = std::map<std::pair<size_t, udata_t>,
                                             std::vector<problem_space_benchmark>>;
     static problem_space_view_map get_problem_space_view(const suite& s)
@@ -660,8 +665,8 @@ public:
         {
             for (auto& d : bm.data)
             {
-                auto& pvbs = res[{d.dimension, d.iter_data}];
-                pvbs.push_back({ bm.name, bm.is_baseline, d.user_data, d.iter_data, d.total_time_ns, d.result });
+                auto& pvbs = res[{d.dimension, d.arg}];
+                pvbs.push_back({ bm.name, bm.is_baseline, d.user_data, d.arg, d.total_time_ns, d.result });
             }
         }
         return res;
@@ -669,7 +674,7 @@ public:
 
 private:
 
-    static void line(std::ostream& out, int width)
+    static void line(std::ostream& out, int width = 79)
     {
         for (int i = 0; i < width; ++i) out.put('=');
         out.put('\n');
@@ -914,8 +919,8 @@ public:
             if (b->_state_iterations.empty())
                 b->_state_iterations = _default_state_iterations;
 
-            udata_t idata = b->_iter_data.empty() ? udata_t() : b->_iter_data.back();
-            b->_iter_data.resize(b->_state_iterations.size(), idata);
+            udata_t arg = b->_args.empty() ? udata_t() : b->_args.back();
+            b->_args.resize(b->_state_iterations.size(), arg);
 
             if (b->_samples == 0)
                 b->_samples = _default_samples;
@@ -929,7 +934,7 @@ public:
                 {
                     auto index = rnd() % (b->_states.size() + 1);
                     auto pos = b->_states.begin() + long(index);
-                    b->_states.emplace(pos, b->_state_iterations[iter], b->_user_data, b->_iter_data[iter]);
+                    b->_states.emplace(pos, b->_state_iterations[iter], b->_user_data, b->_args[iter]);
                 }
             }
 
@@ -965,8 +970,9 @@ public:
         {
             auto i = benchmarks.begin() + long(rnd() % benchmarks.size());
             auto& b = *i;
-            std::cerr << "run: " << b->_name << ": " << b->_istate->iterations()
-                                             << " (" << b->_istate->iter_data() << ")\n";
+            //std::cerr << "run: " << b->_name << ": " << b->_istate->iterations()
+            //                                 << " (" << b->_istate->arg() << ")\n";
+            std::cerr << '.';
             b->_proc(*b->_istate);
 
             ++b->_istate;
@@ -976,6 +982,7 @@ public:
                 benchmarks.erase(i);
             }
         }
+        std::cerr << '\n';
     }
 
     // function to compare results
@@ -1003,14 +1010,14 @@ public:
                 rpt_benchmark->data.reserve(b->_state_iterations.size());
                 for (size_t i = 0; i < b->_state_iterations.size(); ++i)
                 {
-                    rpt_benchmark->data.push_back({ b->_state_iterations[i], b->_user_data, b->_iter_data[i], 0, 0ll });
+                    rpt_benchmark->data.push_back({ b->_state_iterations[i], b->_user_data, b->_args[i], 0, 0ll });
                 }
 
                 for (auto& state : b->_states)
                 {
                     for (auto& d : rpt_benchmark->data)
                     {
-                        if (state.iterations() == d.dimension && state.iter_data() == d.iter_data)
+                        if (state.iterations() == d.dimension && state.arg() == d.arg)
                         {
                             if (d.total_time_ns == 0 || d.total_time_ns > state.duration_ns())
                             {
@@ -1104,7 +1111,7 @@ public:
         return _default_samples;
     }
 
-    void add_cmd_opt(const char* cmd, const char* arg_desc, const char* cmd_desc, bool(*handler)(udata_t, const char*), udata_t user_data = udata_t())
+    void add_cmd_opt(const char* cmd, const char* arg_desc, const char* cmd_desc, bool(*handler)(uintptr_t, const char*), udata_t user_data = 0)
     {
         cmd_line_option opt;
         opt.cmd = picostring(cmd);
@@ -1340,14 +1347,13 @@ private:
         _output_format = report_output_format::all;
         return true;
     }
-
     bool cmd_out_fmt(const char* line)
     {
         if (strcmp(line, "txt") == 0)
         {
             _output_format = report_output_format::text;
         }
-        else if (strcmp(line, "lst") == 0)
+        else if (strcmp(line, "con") == 0)
         {
             _output_format = report_output_format::concise_text;
         }
author	Tyge Løvset <[email protected]>	2021-01-20 09:12:37 +0100
committer	Tyge Løvset <[email protected]>	2021-01-20 09:12:37 +0100
commit	616f51309113f41418166c030111914391670264 (patch)
tree	6844364c12864b9f6b4ece73ea9809f08ca44b84 /benchmarks
parent	7fc666a831a617b206d58e503b5cf5c68e1a3e24 (diff)
download	STC-modified-616f51309113f41418166c030111914391670264.tar.gz STC-modified-616f51309113f41418166c030111914391670264.zip