summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTyge Løvset <[email protected]>2021-01-11 22:15:24 +0100
committerTyge Løvset <[email protected]>2021-01-11 22:15:24 +0100
commita43d9beef9e02c5fee177089219cdb34722cde74 (patch)
tree6a0408061d10539496e00d683dff5051840ab1dd
parentaa235b0080c563786e46fff1961277e2f84b379c (diff)
downloadSTC-modified-a43d9beef9e02c5fee177089219cdb34722cde74.tar.gz
STC-modified-a43d9beef9e02c5fee177089219cdb34722cde74.zip
Added output all in picobenchmark + allow to have multiple baselines in one suite.
Updated latest robin_hood.hpp
-rw-r--r--benchmarks/cmap_benchmark2.cpp163
-rw-r--r--benchmarks/others/robin_hood.hpp263
-rw-r--r--benchmarks/picobench.hpp151
3 files changed, 357 insertions, 220 deletions
diff --git a/benchmarks/cmap_benchmark2.cpp b/benchmarks/cmap_benchmark2.cpp
index 7904effa..39dd61b8 100644
--- a/benchmarks/cmap_benchmark2.cpp
+++ b/benchmarks/cmap_benchmark2.cpp
@@ -13,39 +13,44 @@
#include "picobench.hpp"
PICOBENCH_SUITE("Map");
-enum {N1 = 5000000};
-uint64_t seed = 123456;
+enum {N1 = 5000000, S1 = 1, MaxLoadFactor100 = 80};
+uint64_t seed = time(NULL);
-static inline uint32_t fibonacci_hash(const void* data, size_t len) {
- return (uint32_t) (((*(const uint32_t *) data) * 11400714819323198485llu) >> 24);
+static inline uint32_t hash32(const void* data, size_t len) {
+ return (uint32_t) ((*(const uint32_t *) data) * 2654435769u);
+}
+static inline uint32_t hash64(const void* data, size_t len) {
+ return (uint32_t) (((*(const uint64_t *) data) * 11400714819323198485ull) >> 24);
}
template <class K, class V> using umap = std::unordered_map<K, V>;
template <class K, class V> using bmap = ska::bytell_hash_map<K, V>;
template <class K, class V> using fmap = ska::flat_hash_map<K, V>;
template <class K, class V> using hmap = tsl::hopscotch_map<K, V>;
template <class K, class V> using smap = spp::sparse_hash_map<K, V>;
-template <class K, class V> using rmap = robin_hood::unordered_flat_map<K, V>;
-
-using_cmap(i, int, int, c_default_del, c_default_clone, c_default_equals, fibonacci_hash);
+template <class K, class V> using rmap = robin_hood::unordered_flat_map<K, V, robin_hood::hash<K>,
+ std::equal_to<K>, MaxLoadFactor100>;
+#define DEFMAP(map, ...) \
+ using u##map = umap __VA_ARGS__; \
+ using b##map = bmap __VA_ARGS__; \
+ using f##map = fmap __VA_ARGS__; \
+ using h##map = hmap __VA_ARGS__; \
+ using s##map = smap __VA_ARGS__; \
+ using r##map = rmap __VA_ARGS__
+
+DEFMAP(map_i, <int, int>);
+DEFMAP(map_x, <uint64_t, uint64_t>);
+DEFMAP(map_s, <std::string, std::string>);
+
+using_cmap(i, int, int, c_default_del, c_default_clone, c_default_equals, hash32);
+using_cmap(x, uint64_t, uint64_t, c_default_del, c_default_clone, c_default_equals, hash64);
using_cmap_strkey(s, cstr, cstr_del, cstr_clone);
-using umap_i = umap<int, int>;
-using umap_s = umap<std::string, std::string>;
-using bmap_i = bmap<int, int>;
-using bmap_s = bmap<std::string, std::string>;
-using fmap_i = fmap<int, int>;
-using fmap_s = fmap<std::string, std::string>;
-using hmap_i = hmap<int, int>;
-using hmap_s = hmap<std::string, std::string>;
-using smap_i = smap<int, int>;
-using smap_s = smap<std::string, std::string>;
-using rmap_i = rmap<int, int>;
-using rmap_s = rmap<std::string, std::string>;
template <class MapInt>
-static void ctor_and_ins_one(picobench::state& s)
+static void ctor_and_ins_one_i(picobench::state& s)
{
- picobench::scope scope(s);
size_t result = 0;
+
+ picobench::scope scope(s);
c_forrange (n, s.iterations()) {
MapInt map;
map[n];
@@ -54,10 +59,11 @@ static void ctor_and_ins_one(picobench::state& s)
s.set_result(result);
}
-static void ctor_and_ins_one_cmap_i_(picobench::state& s)
+static void ctor_and_ins_one_cmap_i(picobench::state& s)
{
- picobench::scope scope(s);
size_t result = 0;
+
+ picobench::scope scope(s);
c_forrange (n, s.iterations()) {
cmap_i map = cmap_inits;
cmap_i_emplace(&map, n, 0);
@@ -67,24 +73,26 @@ static void ctor_and_ins_one_cmap_i_(picobench::state& s)
s.set_result(result);
}
-#define P samples(2).iterations({N1})
-PICOBENCH(ctor_and_ins_one<umap_i>).P.baseline();
-PICOBENCH(ctor_and_ins_one<bmap_i>).P;
-PICOBENCH(ctor_and_ins_one<fmap_i>).P;
-PICOBENCH(ctor_and_ins_one<hmap_i>).P;
-PICOBENCH(ctor_and_ins_one<smap_i>).P;
-PICOBENCH(ctor_and_ins_one<rmap_i>).P;
-PICOBENCH(ctor_and_ins_one_cmap_i_).P;
+#define P samples(S1).iterations({N1})
+PICOBENCH(ctor_and_ins_one_i<umap_i>).P.baseline();
+PICOBENCH(ctor_and_ins_one_i<bmap_i>).P;
+PICOBENCH(ctor_and_ins_one_i<fmap_i>).P;
+PICOBENCH(ctor_and_ins_one_i<hmap_i>).P;
+PICOBENCH(ctor_and_ins_one_i<smap_i>).P;
+PICOBENCH(ctor_and_ins_one_i<rmap_i>).P;
+PICOBENCH(ctor_and_ins_one_cmap_i).P;
#undef P
template <class MapInt>
-static void ins_and_erase(picobench::state& s)
+static void ins_and_erase_i(picobench::state& s)
{
- stc64_srandom(seed);
- picobench::scope scope(s);
size_t result = 0;
MapInt map;
+ map.max_load_factor(MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (s.iterations())
map[stc64_random()];
map.clear();
@@ -97,12 +105,13 @@ static void ins_and_erase(picobench::state& s)
s.set_result(map.size());
}
-static void ins_and_erase_cmap_i_(picobench::state& s)
+static void ins_and_erase_cmap_i(picobench::state& s)
{
+ cmap_i map = cmap_inits;
+ cmap_i_set_load_factors(&map, 0.0, MaxLoadFactor100 / 100.0);
stc64_srandom(seed);
+
picobench::scope scope(s);
- cmap_i map = cmap_inits;
- cmap_i_set_load_factors(&map, 0.0, 0.80);
c_forrange (s.iterations())
cmap_i_emplace(&map, stc64_random(), 0);
cmap_i_clear(&map);
@@ -116,52 +125,55 @@ static void ins_and_erase_cmap_i_(picobench::state& s)
s.set_result(cmap_i_size(map));
}
-#define P samples(2).iterations({N1/4})
-PICOBENCH(ins_and_erase<umap_i>).P.baseline();
-PICOBENCH(ins_and_erase<bmap_i>).P;
-PICOBENCH(ins_and_erase<fmap_i>).P;
-PICOBENCH(ins_and_erase<hmap_i>).P;
-PICOBENCH(ins_and_erase<smap_i>).P;
-PICOBENCH(ins_and_erase<rmap_i>).P;
-PICOBENCH(ins_and_erase_cmap_i_).P;
+#define P samples(S1).iterations({N1/4})
+PICOBENCH(ins_and_erase_i<umap_i>).P.baseline();
+PICOBENCH(ins_and_erase_i<bmap_i>).P;
+PICOBENCH(ins_and_erase_i<fmap_i>).P;
+PICOBENCH(ins_and_erase_i<hmap_i>).P;
+PICOBENCH(ins_and_erase_i<smap_i>).P;
+PICOBENCH(ins_and_erase_i<rmap_i>).P;
+PICOBENCH(ins_and_erase_cmap_i).P;
#undef P
template <class MapInt>
-static void ins_and_access(picobench::state& s)
+static void ins_and_access_i(picobench::state& s)
{
- stc64_srandom(seed);
- uint64_t filter = (1ull << s.user_data()) - 1;
- picobench::scope scope(s);
+ uint64_t mask = (1ull << s.user_data()) - 1;
size_t result = 0;
MapInt map;
+ map.max_load_factor(MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (N1)
- result += ++map[stc64_random() & filter];
+ result += ++map[stc64_random() & mask];
s.set_result(result);
}
-static void ins_and_access_cmap_i_(picobench::state& s)
+static void ins_and_access_cmap_i(picobench::state& s)
{
- stc64_srandom(seed);
- uint64_t filter = (1ull << s.user_data()) - 1;
- picobench::scope scope(s);
+ uint64_t mask = (1ull << s.user_data()) - 1;
size_t result = 0;
cmap_i map = cmap_inits;
- cmap_i_set_load_factors(&map, 0.0, 0.80);
+ cmap_i_set_load_factors(&map, 0.0, MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (N1)
- result += ++cmap_i_emplace(&map, stc64_random() & filter, 0).first->second;
+ result += ++cmap_i_emplace(&map, stc64_random() & mask, 0).first->second;
s.set_result(result);
cmap_i_del(&map);
}
-#define P samples(2).iterations({N1, N1, N1, N1}).user_data({18, 23, 25, 31})
-PICOBENCH(ins_and_access<umap_i>).P.baseline();
-PICOBENCH(ins_and_access<bmap_i>).P;
-PICOBENCH(ins_and_access<fmap_i>).P;
-PICOBENCH(ins_and_access<hmap_i>).P;
-PICOBENCH(ins_and_access<smap_i>).P;
-PICOBENCH(ins_and_access<rmap_i>).P;
-PICOBENCH(ins_and_access_cmap_i_).P;
+#define P samples(S1).iterations({N1, N1, N1, N1}).user_data({18, 23, 25, 31})
+PICOBENCH(ins_and_access_i<umap_i>).P.baseline();
+PICOBENCH(ins_and_access_i<bmap_i>).P;
+PICOBENCH(ins_and_access_i<fmap_i>).P;
+PICOBENCH(ins_and_access_i<hmap_i>).P;
+PICOBENCH(ins_and_access_i<smap_i>).P;
+PICOBENCH(ins_and_access_i<rmap_i>).P;
+PICOBENCH(ins_and_access_cmap_i).P;
#undef P
@@ -174,12 +186,13 @@ static void randomize(char* str, size_t len) {
template <class MapStr>
static void ins_and_access_s(picobench::state& s)
{
- stc64_srandom(seed);
std::string str(s.user_data(), 'x');
- randomize(&str[0], str.size());
- picobench::scope scope(s);
size_t result = 0;
MapStr map;
+ map.max_load_factor(MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (s.iterations()) {
randomize(&str[0], str.size());
map[str] = str;
@@ -193,15 +206,15 @@ static void ins_and_access_s(picobench::state& s)
s.set_result(result);
}
-static void ins_and_access_s_cmap_s_(picobench::state& s)
+static void ins_and_access_cmap_s(picobench::state& s)
{
- stc64_srandom(seed);
cstr str = cstr_with_size(s.user_data(), 'x');
- randomize(str.str, cstr_size(str));
- picobench::scope scope(s);
size_t result = 0;
cmap_s map = cmap_inits;
- cmap_s_set_load_factors(&map, 0.0, 0.80);
+ cmap_s_set_load_factors(&map, 0.0, MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (s.iterations()) {
randomize(str.str, cstr_size(str));
cmap_s_put(&map, str.str, cstr_clone(str));
@@ -217,12 +230,12 @@ static void ins_and_access_s_cmap_s_(picobench::state& s)
cmap_s_del(&map);
}
-#define P samples(2).iterations({N1/5, N1/5, N1/5, N1/10, N1/40}).user_data({7, 8, 13, 100, 1000})
+#define P samples(S1).iterations({N1/5, N1/5, N1/5, N1/10, N1/40}).user_data({13, 7, 8, 100, 1000})
PICOBENCH(ins_and_access_s<umap_s>).P.baseline();
PICOBENCH(ins_and_access_s<bmap_s>).P;
PICOBENCH(ins_and_access_s<fmap_s>).P;
PICOBENCH(ins_and_access_s<hmap_s>).P;
PICOBENCH(ins_and_access_s<smap_s>).P;
PICOBENCH(ins_and_access_s<rmap_s>).P;
-PICOBENCH(ins_and_access_s_cmap_s_).P;
-#undef P \ No newline at end of file
+PICOBENCH(ins_and_access_cmap_s).P;
+#undef P
diff --git a/benchmarks/others/robin_hood.hpp b/benchmarks/others/robin_hood.hpp
index 2cf9e029..a602dac9 100644
--- a/benchmarks/others/robin_hood.hpp
+++ b/benchmarks/others/robin_hood.hpp
@@ -6,7 +6,6 @@
// _/_____/
//
// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20
-// version 3.8.1
// https://github.com/martinus/robin-hood-hashing
//
// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
@@ -35,9 +34,9 @@
#define ROBIN_HOOD_H_INCLUDED
// see https://semver.org/
-#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes
-#define ROBIN_HOOD_VERSION_MINOR 8 // for adding functionality in a backwards-compatible manner
-#define ROBIN_HOOD_VERSION_PATCH 1 // for backwards-compatible bug fixes
+#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes
+#define ROBIN_HOOD_VERSION_MINOR 10 // for adding functionality in a backwards-compatible manner
+#define ROBIN_HOOD_VERSION_PATCH 0 // for backwards-compatible bug fixes
#include <algorithm>
#include <cstdlib>
@@ -55,7 +54,8 @@
// #define ROBIN_HOOD_LOG_ENABLED
#ifdef ROBIN_HOOD_LOG_ENABLED
# include <iostream>
-# define ROBIN_HOOD_LOG(x) std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl
+# define ROBIN_HOOD_LOG(...) \
+ std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
#else
# define ROBIN_HOOD_LOG(x)
#endif
@@ -63,8 +63,8 @@
// #define ROBIN_HOOD_TRACE_ENABLED
#ifdef ROBIN_HOOD_TRACE_ENABLED
# include <iostream>
-# define ROBIN_HOOD_TRACE(x) \
- std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl
+# define ROBIN_HOOD_TRACE(...) \
+ std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
#else
# define ROBIN_HOOD_TRACE(x)
#endif
@@ -132,42 +132,32 @@ static Counts& counts() {
#endif
// count leading/trailing bits
-#if ((defined __i386 || defined __x86_64__) && defined __BMI__) || defined _M_IX86 || defined _M_X64
+#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS)
# ifdef _MSC_VER
+# if ROBIN_HOOD(BITNESS) == 32
+# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
+# else
+# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
+# endif
# include <intrin.h>
+# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
+# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \
+ [](size_t mask) noexcept -> int { \
+ unsigned long index; \
+ return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
+ : ROBIN_HOOD(BITNESS); \
+ }(x)
# else
-# include <x86intrin.h>
+# if ROBIN_HOOD(BITNESS) == 32
+# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
+# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
+# else
+# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
+# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
+# endif
+# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
+# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
# endif
-# if ROBIN_HOOD(BITNESS) == 32
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u32
-# else
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u64
-# endif
-# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ROBIN_HOOD(CTZ)(x)
-#elif defined _MSC_VER
-# if ROBIN_HOOD(BITNESS) == 32
-# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
-# else
-# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
-# endif
-# include <intrin.h>
-# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
-# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \
- [](size_t mask) noexcept -> int { \
- unsigned long index; \
- return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
- : ROBIN_HOOD(BITNESS); \
- }(x)
-#else
-# if ROBIN_HOOD(BITNESS) == 32
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
-# else
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
-# endif
-# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
-# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
#endif
// fallthrough
@@ -301,6 +291,13 @@ using index_sequence_for = make_index_sequence<sizeof...(T)>;
namespace detail {
+// make sure we static_cast to the correct type for hash_int
+#if ROBIN_HOOD(BITNESS) == 64
+using SizeT = uint64_t;
+#else
+using SizeT = uint32_t;
+#endif
+
template <typename T>
T rotr(T x, unsigned k) {
return (x >> k) | (x << (8U * sizeof(T) - k));
@@ -322,14 +319,14 @@ inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept {
// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
// inlinings more difficult. Throws are also generally the slow path.
template <typename E, typename... Args>
-ROBIN_HOOD(NOINLINE)
+[[noreturn]] ROBIN_HOOD(NOINLINE)
#if ROBIN_HOOD(HAS_EXCEPTIONS)
-void doThrow(Args&&... args) {
+ void doThrow(Args&&... args) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
throw E(std::forward<Args>(args)...);
}
#else
-void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
+ void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
abort();
}
#endif
@@ -395,7 +392,8 @@ public:
void reset() noexcept {
while (mListForFree) {
T* tmp = *mListForFree;
- free(mListForFree);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(mListForFree);
mListForFree = reinterpret_cast_no_cast_align_warning<T**>(tmp);
}
mHead = nullptr;
@@ -430,8 +428,10 @@ public:
// calculate number of available elements in ptr
if (numBytes < ALIGNMENT + ALIGNED_SIZE) {
// not enough data for at least one element. Free and return.
- free(ptr);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(ptr);
} else {
+ ROBIN_HOOD_LOG("add to buffer")
add(ptr, numBytes);
}
}
@@ -495,9 +495,10 @@ private:
size_t const numElementsToAlloc = calcNumElementsToAlloc();
// alloc new memory: [prev |T, T, ... T]
- // std::cout << (sizeof(T*) + ALIGNED_SIZE * numElementsToAlloc) << " bytes" << std::endl;
size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc;
- add(assertNotNull<std::bad_alloc>(malloc(bytes)), bytes);
+ ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE
+ << " * " << numElementsToAlloc)
+ add(assertNotNull<std::bad_alloc>(std::malloc(bytes)), bytes);
return mHead;
}
@@ -533,7 +534,8 @@ struct NodeAllocator<T, MinSize, MaxSize, true> {
// we are not using the data, so just free it.
void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept {
- free(ptr);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(ptr);
}
};
@@ -677,7 +679,7 @@ inline constexpr bool operator>=(pair<A, B> const& x, pair<A, B> const& y) {
return !(x < y);
}
-inline size_t hash_bytes(void const* ptr, size_t const len) noexcept {
+inline size_t hash_bytes(void const* ptr, size_t len) noexcept {
static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
static constexpr uint64_t seed = UINT64_C(0xe17a1465);
static constexpr unsigned int r = 47;
@@ -737,8 +739,8 @@ inline size_t hash_int(uint64_t x) noexcept {
//
// Instead of shifts, we use rotations so we don't lose any bits.
//
- // Added a final multiplcation with a constant for more mixing. It is most important that the
- // lower bits are well mixed.
+ // Added a final multiplcation with a constant for more mixing. It is most important that
+ // the lower bits are well mixed.
auto h1 = x * UINT64_C(0xA24BAED4963EE407);
auto h2 = detail::rotr(x, 32U) * UINT64_C(0x9FB21C651E98DF25);
auto h = detail::rotr(h1 + h2, 32U);
@@ -746,14 +748,14 @@ inline size_t hash_int(uint64_t x) noexcept {
}
// A thin wrapper around std::hash, performing an additional simple mixing step of the result.
-template <typename T>
+template <typename T, typename Enable = void>
struct hash : public std::hash<T> {
size_t operator()(T const& obj) const
noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>()))) {
// call base hash
auto result = std::hash<T>::operator()(obj);
// return mixed of that, to be save against identity has
- return hash_int(static_cast<uint64_t>(result));
+ return hash_int(static_cast<detail::SizeT>(result));
}
};
@@ -776,21 +778,29 @@ struct hash<std::basic_string_view<CharT>> {
template <class T>
struct hash<T*> {
size_t operator()(T* ptr) const noexcept {
- return hash_int(reinterpret_cast<size_t>(ptr));
+ return hash_int(reinterpret_cast<detail::SizeT>(ptr));
}
};
template <class T>
struct hash<std::unique_ptr<T>> {
size_t operator()(std::unique_ptr<T> const& ptr) const noexcept {
- return hash_int(reinterpret_cast<size_t>(ptr.get()));
+ return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
}
};
template <class T>
struct hash<std::shared_ptr<T>> {
size_t operator()(std::shared_ptr<T> const& ptr) const noexcept {
- return hash_int(reinterpret_cast<size_t>(ptr.get()));
+ return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+ }
+};
+
+template <typename Enum>
+struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
+ size_t operator()(Enum e) const noexcept {
+ using Underlying = typename std::underlying_type<Enum>::type;
+ return hash<Underlying>{}(static_cast<Underlying>(e));
}
};
@@ -923,7 +933,8 @@ private:
static constexpr size_t InitialNumElements = sizeof(uint64_t);
static constexpr uint32_t InitialInfoNumBits = 5;
static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits;
- static constexpr uint8_t InitialInfoHashShift = sizeof(size_t) * 8 - InitialInfoNumBits;
+ static constexpr size_t InfoMask = InitialInfoInc - 1U;
+ static constexpr uint8_t InitialInfoHashShift = 0;
using DataPool = detail::NodeAllocator<value_type, 4, 16384, IsFlat>;
// type needs to be wider than uint8_t.
@@ -1287,13 +1298,29 @@ private:
mInfo += sizeof(size_t);
mKeyVals += sizeof(size_t);
}
-#if ROBIN_HOOD(LITTLE_ENDIAN)
- auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
+#if defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+ // we know for certain that within the next 8 bytes we'll find a non-zero one.
+ if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint32_t>(mInfo))) {
+ mInfo += 4;
+ mKeyVals += 4;
+ }
+ if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint16_t>(mInfo))) {
+ mInfo += 2;
+ mKeyVals += 2;
+ }
+ if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) {
+ mInfo += 1;
+ mKeyVals += 1;
+ }
#else
+# if ROBIN_HOOD(LITTLE_ENDIAN)
+ auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
+# else
auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8;
-#endif
+# endif
mInfo += inc;
mKeyVals += inc;
+#endif
}
friend class Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
@@ -1315,10 +1342,11 @@ private:
typename std::conditional<std::is_same<::robin_hood::hash<key_type>, hasher>::value,
::robin_hood::detail::identity_hash<size_t>,
::robin_hood::hash<size_t>>::type;
- *idx = Mix{}(WHash::operator()(key));
- *info = mInfoInc + static_cast<InfoType>(*idx >> mInfoHashShift);
- *idx &= mMask;
+ // the lower InitialInfoNumBits are reserved for info.
+ auto h = Mix{}(WHash::operator()(key));
+ *info = mInfoInc + static_cast<InfoType>((h & InfoMask) >> mInfoHashShift);
+ *idx = (h >> InitialInfoNumBits) & mMask;
}
// forwards the index by one, wrapping around at the end
@@ -1457,13 +1485,19 @@ public:
using iterator = Iter<false>;
using const_iterator = Iter<true>;
+ Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual()))
+ : WHash()
+ , WKeyEqual() {
+ ROBIN_HOOD_TRACE(this)
+ }
+
// Creates an empty hash map. Nothing is allocated yet, this happens at the first insert.
// This tremendously speeds up ctor & dtor of a map that never receives an element. The
// penalty is payed at the first insert, and not before. Lookup of this empty map works
// because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the
// standard, but we can ignore it.
explicit Table(
- size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
+ size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{},
const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal)))
: WHash(h)
, WKeyEqual(equal) {
@@ -1543,8 +1577,12 @@ public:
// elements and insert them, but copying is probably faster.
auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
- mKeyVals = static_cast<Node*>(detail::assertNotNull<std::bad_alloc>(
- malloc(calcNumBytesTotal(numElementsWithBuffer))));
+ auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+
+ ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+ << numElementsWithBuffer << ")")
+ mKeyVals = static_cast<Node*>(
+ detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
// no need for calloc because clonData does memcpy
mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
mNumElements = o.mNumElements;
@@ -1592,12 +1630,16 @@ public:
// no luck: we don't have the same array size allocated, so we need to realloc.
if (0 != mMask) {
// only deallocate if we actually have data!
- free(mKeyVals);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(mKeyVals);
}
auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
- mKeyVals = static_cast<Node*>(detail::assertNotNull<std::bad_alloc>(
- malloc(calcNumBytesTotal(numElementsWithBuffer))));
+ auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+ ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+ << numElementsWithBuffer << ")")
+ mKeyVals = static_cast<Node*>(
+ detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
// no need for calloc here because cloneData performs a memcpy.
mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
@@ -1939,23 +1981,36 @@ public:
// reserves space for the specified number of elements. Makes sure the old data fits.
// exactly the same as reserve(c).
void rehash(size_t c) {
- reserve(c);
+ // forces a reserve
+ reserve(c, true);
}
// reserves space for the specified number of elements. Makes sure the old data fits.
- // Exactly the same as resize(c). Use resize(0) to shrink to fit.
+ // Exactly the same as rehash(c). Use rehash(0) to shrink to fit.
void reserve(size_t c) {
+ // reserve, but don't force rehash
+ reserve(c, false);
+ }
+
+ // If possible reallocates the map to a smaller one. This frees the underlying table.
+ // Does not do anything if load_factor is too large for decreasing the table's size.
+ void compact() {
ROBIN_HOOD_TRACE(this)
- auto const minElementsAllowed = (std::max)(c, mNumElements);
auto newSize = InitialNumElements;
- while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
+ while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) {
newSize *= 2;
}
if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
throwOverflowError();
}
- rehashPowerOfTwo(newSize);
+ ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+ // only actually do anything when the new size is bigger than the old one. This prevents to
+ // continuously allocate for each reserve() call.
+ if (newSize < mMask + 1) {
+ rehashPowerOfTwo(newSize, true);
+ }
}
size_type size() const noexcept { // NOLINT(modernize-use-nodiscard)
@@ -1989,6 +2044,17 @@ public:
return mMask;
}
+#ifndef IGNORE_STANDARD_METHODS
+ ROBIN_HOOD(NODISCARD) size_t bucket_count() const noexcept {
+ ROBIN_HOOD_TRACE(this)
+ return mMask;
+ }
+ void max_load_factor(float) {
+ ROBIN_HOOD_TRACE(this)
+ // warning, not used
+ }
+#endif
+
ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept {
if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits<size_t>::max)() / 100)) {
return maxElements * MaxLoadFactor100 / 100;
@@ -2046,9 +2112,29 @@ private:
return find(e) != end();
}
+ void reserve(size_t c, bool forceRehash) {
+ ROBIN_HOOD_TRACE(this)
+ auto const minElementsAllowed = (std::max)(c, mNumElements);
+ auto newSize = InitialNumElements;
+ while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
+ newSize *= 2;
+ }
+ if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+ throwOverflowError();
+ }
+
+ ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+ // only actually do anything when the new size is bigger than the old one. This prevents to
+ // continuously allocate for each reserve() call.
+ if (forceRehash || newSize > mMask + 1) {
+ rehashPowerOfTwo(newSize, false);
+ }
+ }
+
// reserves space for at least the specified number of elements.
// only works if numBuckets if power of two
- void rehashPowerOfTwo(size_t numBuckets) {
+ void rehashPowerOfTwo(size_t numBuckets, bool forceFree) {
ROBIN_HOOD_TRACE(this)
Node* const oldKeyVals = mKeyVals;
@@ -2067,8 +2153,17 @@ private:
}
}
- // don't destroy old data: put it into the pool instead
- DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
+ // this check is not necessary as it's guarded by the previous if, but it helps silence
+ // g++'s overeager "attempt to free a non-heap object 'map'
+ // [-Werror=free-nonheap-object]" warning.
+ if (oldKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+ // don't destroy old data: put it into the pool instead
+ if (forceFree) {
+ std::free(oldKeyVals);
+ } else {
+ DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
+ }
+ }
}
}
@@ -2111,8 +2206,11 @@ private:
auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements);
// calloc also zeroes everything
- mKeyVals = reinterpret_cast<Node*>(detail::assertNotNull<std::bad_alloc>(
- calloc(1, calcNumBytesTotal(numElementsWithBuffer))));
+ auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+ ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal("
+ << numElementsWithBuffer << ")")
+ mKeyVals = reinterpret_cast<Node*>(
+ detail::assertNotNull<std::bad_alloc>(std::calloc(1, numBytesTotal)));
mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
// set sentinel
@@ -2282,7 +2380,7 @@ private:
throwOverflowError();
}
- rehashPowerOfTwo((mMask + 1) * 2);
+ rehashPowerOfTwo((mMask + 1) * 2, false);
}
void destroy() {
@@ -2296,10 +2394,11 @@ private:
// This protection against not deleting mMask shouldn't be needed as it's sufficiently
// protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise
- // reports a compile error: attempt to free a non-heap object ‘fm’
+ // reports a compile error: attempt to free a non-heap object 'fm'
// [-Werror=free-nonheap-object]
if (mKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
- free(mKeyVals);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(mKeyVals);
}
}
@@ -2363,4 +2462,4 @@ using unordered_set = detail::Table<sizeof(Key) <= sizeof(size_t) * 6 &&
} // namespace robin_hood
-#endif
+#endif \ No newline at end of file
diff --git a/benchmarks/picobench.hpp b/benchmarks/picobench.hpp
index fc06709c..bf6204f4 100644
--- a/benchmarks/picobench.hpp
+++ b/benchmarks/picobench.hpp
@@ -444,7 +444,7 @@ public:
}
line(out, width);
out <<
- " Name (* = baseline) | Dim | User data | Total ms | ns/op | Baseline| Ops/second\n";
+ " Name (* = baseline) |Iterations | User data | Total ms | ns/op |Baseline | Ops/second\n";
line(out, width);
auto problem_space_view = get_problem_space_view(suite);
@@ -467,7 +467,7 @@ public:
out << " |"
<< setw(10) << ps.first.first << " |"
<< setw(10) << bm.user_data << " |"
- << setw(10) << fixed << setprecision(3) << double(bm.total_time_ns) / 1000000.0 << " |";
+ << setw(10) << fixed << setprecision(2) << double(bm.total_time_ns) / 1000000.0 << " |";
auto ns_op = (bm.total_time_ns / ps.first.first);
if (ns_op > 99999999)
{
@@ -561,16 +561,18 @@ public:
out << " |" << setw(10) << ns_per_op << " |";
- if (&bm == baseline)
+ if (bm.is_baseline)
{
out << " - |";
+ baseline_total_time = total_time;
+ baseline_total_iterations = total_iterations;
+ baseline_ns_per_op = ns_per_op;
}
else
{
out << setw(9) << fixed << setprecision(3)
<< double(ns_per_op) / double(baseline_ns_per_op) << " |";
}
-
auto ops_per_sec = total_iterations * (1000000000.0 / double(total_time));
out << setw(12) << fixed << setprecision(1) << ops_per_sec << "\n";
}
@@ -579,61 +581,60 @@ public:
}
}
- void to_csv(std::ostream& out, bool header = true, bool dec = '.') const
+ void to_csv(std::ostream& out) const
{
using namespace std;
-
- if (header)
- {
- out << "Suite, Benchmark, Baseline, Dimension, User data, Samples, Total ns, Result, ns/op, Baseline ratio\n";
- }
+ const char* sep = ",";
for (auto& suite : suites)
{
- const benchmark* baseline = nullptr;
- for (auto& bm : suite.benchmarks)
+ out << "Suite, Baseline, Benchmark, Iterations, User data, Total ms, ns/op, Ratio, Ops/second\n";
+
+ auto problem_space_view = get_problem_space_view(suite);
+ for (auto& ps : problem_space_view)
{
- if (bm.is_baseline)
+ const problem_space_benchmark* baseline = nullptr;
+ for (auto& bm : ps.second)
{
- baseline = &bm;
- break;
+ if (bm.is_baseline)
+ {
+ baseline = &bm;
+ break;
+ }
}
- }
- I_PICOBENCH_ASSERT(baseline);
- for (auto& bm : suite.benchmarks)
- {
- for (auto& d : bm.data)
+ for (auto& bm : ps.second)
{
out << '"' << (suite.name ? suite.name : "") << '"';
- out << ",\"" << bm.name << "\", ";
- out << (&bm == baseline ? "true" : "false");
- out << ", "
- << d.dimension << ", "
- << d.user_data << ", "
- << d.samples << ", "
- << d.total_time_ns << ", "
- << d.result << ", "
- << (d.total_time_ns / d.dimension) << ", ";
-
- if (baseline)
+ out << sep << (bm.is_baseline ? "true" : "false");
+ out << sep << '"' << bm.name << '"';
+ out << sep << ps.first.first
+ << sep << bm.user_data
+ << sep << fixed << setprecision(3) << bm.total_time_ns / 1000000.0;
+
+ auto ns_op = (bm.total_time_ns / ps.first.first);
+ out << sep << ns_op << sep;
+ if (baseline == &bm)
{
- for (auto& bd : baseline->data)
- {
- if (bd.dimension == d.dimension && bd.user_data == d.user_data)
- {
- out << fixed << setprecision(3) << (double(d.total_time_ns) / double(bd.total_time_ns));
- }
- }
+ out << 1.0;
+ }
+ else if (baseline)
+ {
+ out << fixed << setprecision(3) << bm.total_time_ns / double(baseline->total_time_ns);
}
else
- out << -999;
- out << '\n';
+ {
+ out << -1.0; // no baseline to compare to
+ }
+
+ auto ops_per_sec = ps.first.first * (1000000000.0 / bm.total_time_ns);
+ out << sep << fixed << setprecision(1) << ops_per_sec << "\n";
}
}
}
}
+
struct problem_space_benchmark
{
const char* name;
@@ -720,7 +721,8 @@ enum class report_output_format
{
text,
concise_text,
- csv
+ csv,
+ all,
};
#if !defined(PICOBENCH_DEFAULT_ITERATIONS)
@@ -805,28 +807,47 @@ public:
auto report = generate_report();
std::ostream* out = _stdout;
std::ofstream fout;
- if (preferred_output_filename())
+ report_output_format fmt[] = {report_output_format::text,
+ report_output_format::concise_text,
+ report_output_format::csv};
+ const char *ext[] = {".txt", ".lst", ".csv"}, *fn = preferred_output_filename();
+ bool all = preferred_output_format() == report_output_format::all;
+ for (int i = 0; i < 3; ++i)
{
- fout.open(preferred_output_filename());
- if (!fout.is_open())
+ if (all || preferred_output_format() == fmt[i])
{
- std::cerr << "Error: Could not open output file `" << preferred_output_filename() << "`\n";
- return 1;
- }
- out = &fout;
- }
+ if (fn)
+ {
+ std::string name(fn);
- switch (preferred_output_format())
- {
- case picobench::report_output_format::text:
- report.to_text(*out);
- break;
- case picobench::report_output_format::concise_text:
- report.to_text_concise(*out);
- break;
- case picobench::report_output_format::csv:
- report.to_csv(*out);
- break;
+ if (all || name.find(".") == std::string::npos)
+ {
+ name += ext[i];
+ }
+ fout.close();
+ fout.open(name.c_str());
+ if (!fout.is_open())
+ {
+ std::cerr << "Error: Could not open output file `" << fn << "`\n";
+ return 1;
+ }
+ out = &fout;
+ }
+
+ switch (fmt[i])
+ {
+ case report_output_format::text:
+ report.to_text(*out);
+ break;
+ case report_output_format::concise_text:
+ report.to_text_concise(*out);
+ break;
+ case report_output_format::csv:
+ report.to_csv(*out);
+ break;
+ default: break;
+ }
+ }
}
}
return error();
@@ -1103,8 +1124,8 @@ public:
_opts.emplace_back("-samples=", "<n>",
"Sets default number of samples for benchmarks",
&runner::cmd_samples);
- _opts.emplace_back("-out-fmt=", "<txt|con|csv>",
- "Outputs text or concise or csv",
+ _opts.emplace_back("-out-fmt=", "<txt|lst|csv|all>",
+ "Outputs text, concise, csv or all",
&runner::cmd_out_fmt);
_opts.emplace_back("-output=", "<filename>",
"Sets output filename or `stdout`",
@@ -1310,7 +1331,7 @@ private:
{
_output_format = report_output_format::text;
}
- else if (strcmp(line, "con") == 0)
+ else if (strcmp(line, "lst") == 0)
{
_output_format = report_output_format::concise_text;
}
@@ -1318,6 +1339,10 @@ private:
{
_output_format = report_output_format::csv;
}
+ else if (strcmp(line, "all") == 0)
+ {
+ _output_format = report_output_format::all;
+ }
else
{
return false;