diff options
| author | Tyge Løvset <[email protected]> | 2021-01-11 22:15:24 +0100 |
|---|---|---|
| committer | Tyge Løvset <[email protected]> | 2021-01-11 22:15:24 +0100 |
| commit | a43d9beef9e02c5fee177089219cdb34722cde74 (patch) | |
| tree | 6a0408061d10539496e00d683dff5051840ab1dd | |
| parent | aa235b0080c563786e46fff1961277e2f84b379c (diff) | |
| download | STC-modified-a43d9beef9e02c5fee177089219cdb34722cde74.tar.gz STC-modified-a43d9beef9e02c5fee177089219cdb34722cde74.zip | |
Added output all in picobenchmark + allow to have multiple baselines in one suite.
Updated latest robin_hood.hpp
| -rw-r--r-- | benchmarks/cmap_benchmark2.cpp | 163 | ||||
| -rw-r--r-- | benchmarks/others/robin_hood.hpp | 263 | ||||
| -rw-r--r-- | benchmarks/picobench.hpp | 151 |
3 files changed, 357 insertions, 220 deletions
diff --git a/benchmarks/cmap_benchmark2.cpp b/benchmarks/cmap_benchmark2.cpp index 7904effa..39dd61b8 100644 --- a/benchmarks/cmap_benchmark2.cpp +++ b/benchmarks/cmap_benchmark2.cpp @@ -13,39 +13,44 @@ #include "picobench.hpp"
PICOBENCH_SUITE("Map");
-enum {N1 = 5000000};
-uint64_t seed = 123456;
+enum {N1 = 5000000, S1 = 1, MaxLoadFactor100 = 80};
+uint64_t seed = time(NULL);
-static inline uint32_t fibonacci_hash(const void* data, size_t len) {
- return (uint32_t) (((*(const uint32_t *) data) * 11400714819323198485llu) >> 24);
+static inline uint32_t hash32(const void* data, size_t len) {
+ return (uint32_t) ((*(const uint32_t *) data) * 2654435769u);
+}
+static inline uint32_t hash64(const void* data, size_t len) {
+ return (uint32_t) (((*(const uint64_t *) data) * 11400714819323198485ull) >> 24);
}
template <class K, class V> using umap = std::unordered_map<K, V>;
template <class K, class V> using bmap = ska::bytell_hash_map<K, V>;
template <class K, class V> using fmap = ska::flat_hash_map<K, V>;
template <class K, class V> using hmap = tsl::hopscotch_map<K, V>;
template <class K, class V> using smap = spp::sparse_hash_map<K, V>;
-template <class K, class V> using rmap = robin_hood::unordered_flat_map<K, V>;
-
-using_cmap(i, int, int, c_default_del, c_default_clone, c_default_equals, fibonacci_hash);
+template <class K, class V> using rmap = robin_hood::unordered_flat_map<K, V, robin_hood::hash<K>,
+ std::equal_to<K>, MaxLoadFactor100>;
+#define DEFMAP(map, ...) \
+ using u##map = umap __VA_ARGS__; \
+ using b##map = bmap __VA_ARGS__; \
+ using f##map = fmap __VA_ARGS__; \
+ using h##map = hmap __VA_ARGS__; \
+ using s##map = smap __VA_ARGS__; \
+ using r##map = rmap __VA_ARGS__
+
+DEFMAP(map_i, <int, int>);
+DEFMAP(map_x, <uint64_t, uint64_t>);
+DEFMAP(map_s, <std::string, std::string>);
+
+using_cmap(i, int, int, c_default_del, c_default_clone, c_default_equals, hash32);
+using_cmap(x, uint64_t, uint64_t, c_default_del, c_default_clone, c_default_equals, hash64);
using_cmap_strkey(s, cstr, cstr_del, cstr_clone);
-using umap_i = umap<int, int>;
-using umap_s = umap<std::string, std::string>;
-using bmap_i = bmap<int, int>;
-using bmap_s = bmap<std::string, std::string>;
-using fmap_i = fmap<int, int>;
-using fmap_s = fmap<std::string, std::string>;
-using hmap_i = hmap<int, int>;
-using hmap_s = hmap<std::string, std::string>;
-using smap_i = smap<int, int>;
-using smap_s = smap<std::string, std::string>;
-using rmap_i = rmap<int, int>;
-using rmap_s = rmap<std::string, std::string>;
template <class MapInt>
-static void ctor_and_ins_one(picobench::state& s)
+static void ctor_and_ins_one_i(picobench::state& s)
{
- picobench::scope scope(s);
size_t result = 0;
+
+ picobench::scope scope(s);
c_forrange (n, s.iterations()) {
MapInt map;
map[n];
@@ -54,10 +59,11 @@ static void ctor_and_ins_one(picobench::state& s) s.set_result(result);
}
-static void ctor_and_ins_one_cmap_i_(picobench::state& s)
+static void ctor_and_ins_one_cmap_i(picobench::state& s)
{
- picobench::scope scope(s);
size_t result = 0;
+
+ picobench::scope scope(s);
c_forrange (n, s.iterations()) {
cmap_i map = cmap_inits;
cmap_i_emplace(&map, n, 0);
@@ -67,24 +73,26 @@ static void ctor_and_ins_one_cmap_i_(picobench::state& s) s.set_result(result);
}
-#define P samples(2).iterations({N1})
-PICOBENCH(ctor_and_ins_one<umap_i>).P.baseline();
-PICOBENCH(ctor_and_ins_one<bmap_i>).P;
-PICOBENCH(ctor_and_ins_one<fmap_i>).P;
-PICOBENCH(ctor_and_ins_one<hmap_i>).P;
-PICOBENCH(ctor_and_ins_one<smap_i>).P;
-PICOBENCH(ctor_and_ins_one<rmap_i>).P;
-PICOBENCH(ctor_and_ins_one_cmap_i_).P;
+#define P samples(S1).iterations({N1})
+PICOBENCH(ctor_and_ins_one_i<umap_i>).P.baseline();
+PICOBENCH(ctor_and_ins_one_i<bmap_i>).P;
+PICOBENCH(ctor_and_ins_one_i<fmap_i>).P;
+PICOBENCH(ctor_and_ins_one_i<hmap_i>).P;
+PICOBENCH(ctor_and_ins_one_i<smap_i>).P;
+PICOBENCH(ctor_and_ins_one_i<rmap_i>).P;
+PICOBENCH(ctor_and_ins_one_cmap_i).P;
#undef P
template <class MapInt>
-static void ins_and_erase(picobench::state& s)
+static void ins_and_erase_i(picobench::state& s)
{
- stc64_srandom(seed);
- picobench::scope scope(s);
size_t result = 0;
MapInt map;
+ map.max_load_factor(MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (s.iterations())
map[stc64_random()];
map.clear();
@@ -97,12 +105,13 @@ static void ins_and_erase(picobench::state& s) s.set_result(map.size());
}
-static void ins_and_erase_cmap_i_(picobench::state& s)
+static void ins_and_erase_cmap_i(picobench::state& s)
{
+ cmap_i map = cmap_inits;
+ cmap_i_set_load_factors(&map, 0.0, MaxLoadFactor100 / 100.0);
stc64_srandom(seed);
+
picobench::scope scope(s);
- cmap_i map = cmap_inits;
- cmap_i_set_load_factors(&map, 0.0, 0.80);
c_forrange (s.iterations())
cmap_i_emplace(&map, stc64_random(), 0);
cmap_i_clear(&map);
@@ -116,52 +125,55 @@ static void ins_and_erase_cmap_i_(picobench::state& s) s.set_result(cmap_i_size(map));
}
-#define P samples(2).iterations({N1/4})
-PICOBENCH(ins_and_erase<umap_i>).P.baseline();
-PICOBENCH(ins_and_erase<bmap_i>).P;
-PICOBENCH(ins_and_erase<fmap_i>).P;
-PICOBENCH(ins_and_erase<hmap_i>).P;
-PICOBENCH(ins_and_erase<smap_i>).P;
-PICOBENCH(ins_and_erase<rmap_i>).P;
-PICOBENCH(ins_and_erase_cmap_i_).P;
+#define P samples(S1).iterations({N1/4})
+PICOBENCH(ins_and_erase_i<umap_i>).P.baseline();
+PICOBENCH(ins_and_erase_i<bmap_i>).P;
+PICOBENCH(ins_and_erase_i<fmap_i>).P;
+PICOBENCH(ins_and_erase_i<hmap_i>).P;
+PICOBENCH(ins_and_erase_i<smap_i>).P;
+PICOBENCH(ins_and_erase_i<rmap_i>).P;
+PICOBENCH(ins_and_erase_cmap_i).P;
#undef P
template <class MapInt>
-static void ins_and_access(picobench::state& s)
+static void ins_and_access_i(picobench::state& s)
{
- stc64_srandom(seed);
- uint64_t filter = (1ull << s.user_data()) - 1;
- picobench::scope scope(s);
+ uint64_t mask = (1ull << s.user_data()) - 1;
size_t result = 0;
MapInt map;
+ map.max_load_factor(MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (N1)
- result += ++map[stc64_random() & filter];
+ result += ++map[stc64_random() & mask];
s.set_result(result);
}
-static void ins_and_access_cmap_i_(picobench::state& s)
+static void ins_and_access_cmap_i(picobench::state& s)
{
- stc64_srandom(seed);
- uint64_t filter = (1ull << s.user_data()) - 1;
- picobench::scope scope(s);
+ uint64_t mask = (1ull << s.user_data()) - 1;
size_t result = 0;
cmap_i map = cmap_inits;
- cmap_i_set_load_factors(&map, 0.0, 0.80);
+ cmap_i_set_load_factors(&map, 0.0, MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (N1)
- result += ++cmap_i_emplace(&map, stc64_random() & filter, 0).first->second;
+ result += ++cmap_i_emplace(&map, stc64_random() & mask, 0).first->second;
s.set_result(result);
cmap_i_del(&map);
}
-#define P samples(2).iterations({N1, N1, N1, N1}).user_data({18, 23, 25, 31})
-PICOBENCH(ins_and_access<umap_i>).P.baseline();
-PICOBENCH(ins_and_access<bmap_i>).P;
-PICOBENCH(ins_and_access<fmap_i>).P;
-PICOBENCH(ins_and_access<hmap_i>).P;
-PICOBENCH(ins_and_access<smap_i>).P;
-PICOBENCH(ins_and_access<rmap_i>).P;
-PICOBENCH(ins_and_access_cmap_i_).P;
+#define P samples(S1).iterations({N1, N1, N1, N1}).user_data({18, 23, 25, 31})
+PICOBENCH(ins_and_access_i<umap_i>).P.baseline();
+PICOBENCH(ins_and_access_i<bmap_i>).P;
+PICOBENCH(ins_and_access_i<fmap_i>).P;
+PICOBENCH(ins_and_access_i<hmap_i>).P;
+PICOBENCH(ins_and_access_i<smap_i>).P;
+PICOBENCH(ins_and_access_i<rmap_i>).P;
+PICOBENCH(ins_and_access_cmap_i).P;
#undef P
@@ -174,12 +186,13 @@ static void randomize(char* str, size_t len) { template <class MapStr>
static void ins_and_access_s(picobench::state& s)
{
- stc64_srandom(seed);
std::string str(s.user_data(), 'x');
- randomize(&str[0], str.size());
- picobench::scope scope(s);
size_t result = 0;
MapStr map;
+ map.max_load_factor(MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (s.iterations()) {
randomize(&str[0], str.size());
map[str] = str;
@@ -193,15 +206,15 @@ static void ins_and_access_s(picobench::state& s) s.set_result(result);
}
-static void ins_and_access_s_cmap_s_(picobench::state& s)
+static void ins_and_access_cmap_s(picobench::state& s)
{
- stc64_srandom(seed);
cstr str = cstr_with_size(s.user_data(), 'x');
- randomize(str.str, cstr_size(str));
- picobench::scope scope(s);
size_t result = 0;
cmap_s map = cmap_inits;
- cmap_s_set_load_factors(&map, 0.0, 0.80);
+ cmap_s_set_load_factors(&map, 0.0, MaxLoadFactor100 / 100.0);
+ stc64_srandom(seed);
+
+ picobench::scope scope(s);
c_forrange (s.iterations()) {
randomize(str.str, cstr_size(str));
cmap_s_put(&map, str.str, cstr_clone(str));
@@ -217,12 +230,12 @@ static void ins_and_access_s_cmap_s_(picobench::state& s) cmap_s_del(&map);
}
-#define P samples(2).iterations({N1/5, N1/5, N1/5, N1/10, N1/40}).user_data({7, 8, 13, 100, 1000})
+#define P samples(S1).iterations({N1/5, N1/5, N1/5, N1/10, N1/40}).user_data({13, 7, 8, 100, 1000})
PICOBENCH(ins_and_access_s<umap_s>).P.baseline();
PICOBENCH(ins_and_access_s<bmap_s>).P;
PICOBENCH(ins_and_access_s<fmap_s>).P;
PICOBENCH(ins_and_access_s<hmap_s>).P;
PICOBENCH(ins_and_access_s<smap_s>).P;
PICOBENCH(ins_and_access_s<rmap_s>).P;
-PICOBENCH(ins_and_access_s_cmap_s_).P;
-#undef P
\ No newline at end of file +PICOBENCH(ins_and_access_cmap_s).P;
+#undef P
diff --git a/benchmarks/others/robin_hood.hpp b/benchmarks/others/robin_hood.hpp index 2cf9e029..a602dac9 100644 --- a/benchmarks/others/robin_hood.hpp +++ b/benchmarks/others/robin_hood.hpp @@ -6,7 +6,6 @@ // _/_____/
//
// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20
-// version 3.8.1
// https://github.com/martinus/robin-hood-hashing
//
// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
@@ -35,9 +34,9 @@ #define ROBIN_HOOD_H_INCLUDED
// see https://semver.org/
-#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes
-#define ROBIN_HOOD_VERSION_MINOR 8 // for adding functionality in a backwards-compatible manner
-#define ROBIN_HOOD_VERSION_PATCH 1 // for backwards-compatible bug fixes
+#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes
+#define ROBIN_HOOD_VERSION_MINOR 10 // for adding functionality in a backwards-compatible manner
+#define ROBIN_HOOD_VERSION_PATCH 0 // for backwards-compatible bug fixes
#include <algorithm>
#include <cstdlib>
@@ -55,7 +54,8 @@ // #define ROBIN_HOOD_LOG_ENABLED
#ifdef ROBIN_HOOD_LOG_ENABLED
# include <iostream>
-# define ROBIN_HOOD_LOG(x) std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl
+# define ROBIN_HOOD_LOG(...) \
+ std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
#else
# define ROBIN_HOOD_LOG(x)
#endif
@@ -63,8 +63,8 @@ // #define ROBIN_HOOD_TRACE_ENABLED
#ifdef ROBIN_HOOD_TRACE_ENABLED
# include <iostream>
-# define ROBIN_HOOD_TRACE(x) \
- std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl
+# define ROBIN_HOOD_TRACE(...) \
+ std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl;
#else
# define ROBIN_HOOD_TRACE(x)
#endif
@@ -132,42 +132,32 @@ static Counts& counts() { #endif
// count leading/trailing bits
-#if ((defined __i386 || defined __x86_64__) && defined __BMI__) || defined _M_IX86 || defined _M_X64
+#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS)
# ifdef _MSC_VER
+# if ROBIN_HOOD(BITNESS) == 32
+# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
+# else
+# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
+# endif
# include <intrin.h>
+# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
+# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \
+ [](size_t mask) noexcept -> int { \
+ unsigned long index; \
+ return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
+ : ROBIN_HOOD(BITNESS); \
+ }(x)
# else
-# include <x86intrin.h>
+# if ROBIN_HOOD(BITNESS) == 32
+# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
+# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
+# else
+# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
+# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
+# endif
+# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
+# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
# endif
-# if ROBIN_HOOD(BITNESS) == 32
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u32
-# else
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u64
-# endif
-# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ROBIN_HOOD(CTZ)(x)
-#elif defined _MSC_VER
-# if ROBIN_HOOD(BITNESS) == 32
-# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
-# else
-# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
-# endif
-# include <intrin.h>
-# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
-# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \
- [](size_t mask) noexcept -> int { \
- unsigned long index; \
- return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
- : ROBIN_HOOD(BITNESS); \
- }(x)
-#else
-# if ROBIN_HOOD(BITNESS) == 32
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
-# else
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
-# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
-# endif
-# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
-# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
#endif
// fallthrough
@@ -301,6 +291,13 @@ using index_sequence_for = make_index_sequence<sizeof...(T)>; namespace detail {
+// make sure we static_cast to the correct type for hash_int
+#if ROBIN_HOOD(BITNESS) == 64
+using SizeT = uint64_t;
+#else
+using SizeT = uint32_t;
+#endif
+
template <typename T>
T rotr(T x, unsigned k) {
return (x >> k) | (x << (8U * sizeof(T) - k));
@@ -322,14 +319,14 @@ inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept { // make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
// inlinings more difficult. Throws are also generally the slow path.
template <typename E, typename... Args>
-ROBIN_HOOD(NOINLINE)
+[[noreturn]] ROBIN_HOOD(NOINLINE)
#if ROBIN_HOOD(HAS_EXCEPTIONS)
-void doThrow(Args&&... args) {
+ void doThrow(Args&&... args) {
// NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
throw E(std::forward<Args>(args)...);
}
#else
-void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
+ void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
abort();
}
#endif
@@ -395,7 +392,8 @@ public: void reset() noexcept {
while (mListForFree) {
T* tmp = *mListForFree;
- free(mListForFree);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(mListForFree);
mListForFree = reinterpret_cast_no_cast_align_warning<T**>(tmp);
}
mHead = nullptr;
@@ -430,8 +428,10 @@ public: // calculate number of available elements in ptr
if (numBytes < ALIGNMENT + ALIGNED_SIZE) {
// not enough data for at least one element. Free and return.
- free(ptr);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(ptr);
} else {
+ ROBIN_HOOD_LOG("add to buffer")
add(ptr, numBytes);
}
}
@@ -495,9 +495,10 @@ private: size_t const numElementsToAlloc = calcNumElementsToAlloc();
// alloc new memory: [prev |T, T, ... T]
- // std::cout << (sizeof(T*) + ALIGNED_SIZE * numElementsToAlloc) << " bytes" << std::endl;
size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc;
- add(assertNotNull<std::bad_alloc>(malloc(bytes)), bytes);
+ ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE
+ << " * " << numElementsToAlloc)
+ add(assertNotNull<std::bad_alloc>(std::malloc(bytes)), bytes);
return mHead;
}
@@ -533,7 +534,8 @@ struct NodeAllocator<T, MinSize, MaxSize, true> { // we are not using the data, so just free it.
void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept {
- free(ptr);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(ptr);
}
};
@@ -677,7 +679,7 @@ inline constexpr bool operator>=(pair<A, B> const& x, pair<A, B> const& y) { return !(x < y);
}
-inline size_t hash_bytes(void const* ptr, size_t const len) noexcept {
+inline size_t hash_bytes(void const* ptr, size_t len) noexcept {
static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
static constexpr uint64_t seed = UINT64_C(0xe17a1465);
static constexpr unsigned int r = 47;
@@ -737,8 +739,8 @@ inline size_t hash_int(uint64_t x) noexcept { //
// Instead of shifts, we use rotations so we don't lose any bits.
//
- // Added a final multiplcation with a constant for more mixing. It is most important that the
- // lower bits are well mixed.
+ // Added a final multiplcation with a constant for more mixing. It is most important that
+ // the lower bits are well mixed.
auto h1 = x * UINT64_C(0xA24BAED4963EE407);
auto h2 = detail::rotr(x, 32U) * UINT64_C(0x9FB21C651E98DF25);
auto h = detail::rotr(h1 + h2, 32U);
@@ -746,14 +748,14 @@ inline size_t hash_int(uint64_t x) noexcept { }
// A thin wrapper around std::hash, performing an additional simple mixing step of the result.
-template <typename T>
+template <typename T, typename Enable = void>
struct hash : public std::hash<T> {
size_t operator()(T const& obj) const
noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>()))) {
// call base hash
auto result = std::hash<T>::operator()(obj);
// return mixed of that, to be save against identity has
- return hash_int(static_cast<uint64_t>(result));
+ return hash_int(static_cast<detail::SizeT>(result));
}
};
@@ -776,21 +778,29 @@ struct hash<std::basic_string_view<CharT>> { template <class T>
struct hash<T*> {
size_t operator()(T* ptr) const noexcept {
- return hash_int(reinterpret_cast<size_t>(ptr));
+ return hash_int(reinterpret_cast<detail::SizeT>(ptr));
}
};
template <class T>
struct hash<std::unique_ptr<T>> {
size_t operator()(std::unique_ptr<T> const& ptr) const noexcept {
- return hash_int(reinterpret_cast<size_t>(ptr.get()));
+ return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
}
};
template <class T>
struct hash<std::shared_ptr<T>> {
size_t operator()(std::shared_ptr<T> const& ptr) const noexcept {
- return hash_int(reinterpret_cast<size_t>(ptr.get()));
+ return hash_int(reinterpret_cast<detail::SizeT>(ptr.get()));
+ }
+};
+
+template <typename Enum>
+struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
+ size_t operator()(Enum e) const noexcept {
+ using Underlying = typename std::underlying_type<Enum>::type;
+ return hash<Underlying>{}(static_cast<Underlying>(e));
}
};
@@ -923,7 +933,8 @@ private: static constexpr size_t InitialNumElements = sizeof(uint64_t);
static constexpr uint32_t InitialInfoNumBits = 5;
static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits;
- static constexpr uint8_t InitialInfoHashShift = sizeof(size_t) * 8 - InitialInfoNumBits;
+ static constexpr size_t InfoMask = InitialInfoInc - 1U;
+ static constexpr uint8_t InitialInfoHashShift = 0;
using DataPool = detail::NodeAllocator<value_type, 4, 16384, IsFlat>;
// type needs to be wider than uint8_t.
@@ -1287,13 +1298,29 @@ private: mInfo += sizeof(size_t);
mKeyVals += sizeof(size_t);
}
-#if ROBIN_HOOD(LITTLE_ENDIAN)
- auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
+#if defined(ROBIN_HOOD_DISABLE_INTRINSICS)
+ // we know for certain that within the next 8 bytes we'll find a non-zero one.
+ if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint32_t>(mInfo))) {
+ mInfo += 4;
+ mKeyVals += 4;
+ }
+ if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load<uint16_t>(mInfo))) {
+ mInfo += 2;
+ mKeyVals += 2;
+ }
+ if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) {
+ mInfo += 1;
+ mKeyVals += 1;
+ }
#else
+# if ROBIN_HOOD(LITTLE_ENDIAN)
+ auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
+# else
auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8;
-#endif
+# endif
mInfo += inc;
mKeyVals += inc;
+#endif
}
friend class Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
@@ -1315,10 +1342,11 @@ private: typename std::conditional<std::is_same<::robin_hood::hash<key_type>, hasher>::value,
::robin_hood::detail::identity_hash<size_t>,
::robin_hood::hash<size_t>>::type;
- *idx = Mix{}(WHash::operator()(key));
- *info = mInfoInc + static_cast<InfoType>(*idx >> mInfoHashShift);
- *idx &= mMask;
+ // the lower InitialInfoNumBits are reserved for info.
+ auto h = Mix{}(WHash::operator()(key));
+ *info = mInfoInc + static_cast<InfoType>((h & InfoMask) >> mInfoHashShift);
+ *idx = (h >> InitialInfoNumBits) & mMask;
}
// forwards the index by one, wrapping around at the end
@@ -1457,13 +1485,19 @@ public: using iterator = Iter<false>;
using const_iterator = Iter<true>;
+ Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual()))
+ : WHash()
+ , WKeyEqual() {
+ ROBIN_HOOD_TRACE(this)
+ }
+
// Creates an empty hash map. Nothing is allocated yet, this happens at the first insert.
// This tremendously speeds up ctor & dtor of a map that never receives an element. The
// penalty is payed at the first insert, and not before. Lookup of this empty map works
// because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the
// standard, but we can ignore it.
explicit Table(
- size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
+ size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{},
const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal)))
: WHash(h)
, WKeyEqual(equal) {
@@ -1543,8 +1577,12 @@ public: // elements and insert them, but copying is probably faster.
auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
- mKeyVals = static_cast<Node*>(detail::assertNotNull<std::bad_alloc>(
- malloc(calcNumBytesTotal(numElementsWithBuffer))));
+ auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+
+ ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+ << numElementsWithBuffer << ")")
+ mKeyVals = static_cast<Node*>(
+ detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
// no need for calloc because clonData does memcpy
mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
mNumElements = o.mNumElements;
@@ -1592,12 +1630,16 @@ public: // no luck: we don't have the same array size allocated, so we need to realloc.
if (0 != mMask) {
// only deallocate if we actually have data!
- free(mKeyVals);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(mKeyVals);
}
auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
- mKeyVals = static_cast<Node*>(detail::assertNotNull<std::bad_alloc>(
- malloc(calcNumBytesTotal(numElementsWithBuffer))));
+ auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+ ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal("
+ << numElementsWithBuffer << ")")
+ mKeyVals = static_cast<Node*>(
+ detail::assertNotNull<std::bad_alloc>(std::malloc(numBytesTotal)));
// no need for calloc here because cloneData performs a memcpy.
mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
@@ -1939,23 +1981,36 @@ public: // reserves space for the specified number of elements. Makes sure the old data fits.
// exactly the same as reserve(c).
void rehash(size_t c) {
- reserve(c);
+ // forces a reserve
+ reserve(c, true);
}
// reserves space for the specified number of elements. Makes sure the old data fits.
- // Exactly the same as resize(c). Use resize(0) to shrink to fit.
+ // Exactly the same as rehash(c). Use rehash(0) to shrink to fit.
void reserve(size_t c) {
+ // reserve, but don't force rehash
+ reserve(c, false);
+ }
+
+ // If possible reallocates the map to a smaller one. This frees the underlying table.
+ // Does not do anything if load_factor is too large for decreasing the table's size.
+ void compact() {
ROBIN_HOOD_TRACE(this)
- auto const minElementsAllowed = (std::max)(c, mNumElements);
auto newSize = InitialNumElements;
- while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
+ while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) {
newSize *= 2;
}
if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
throwOverflowError();
}
- rehashPowerOfTwo(newSize);
+ ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+ // only actually do anything when the new size is bigger than the old one. This prevents to
+ // continuously allocate for each reserve() call.
+ if (newSize < mMask + 1) {
+ rehashPowerOfTwo(newSize, true);
+ }
}
size_type size() const noexcept { // NOLINT(modernize-use-nodiscard)
@@ -1989,6 +2044,17 @@ public: return mMask;
}
+#ifndef IGNORE_STANDARD_METHODS
+ ROBIN_HOOD(NODISCARD) size_t bucket_count() const noexcept {
+ ROBIN_HOOD_TRACE(this)
+ return mMask;
+ }
+ void max_load_factor(float) {
+ ROBIN_HOOD_TRACE(this)
+ // warning, not used
+ }
+#endif
+
ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept {
if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits<size_t>::max)() / 100)) {
return maxElements * MaxLoadFactor100 / 100;
@@ -2046,9 +2112,29 @@ private: return find(e) != end();
}
+ void reserve(size_t c, bool forceRehash) {
+ ROBIN_HOOD_TRACE(this)
+ auto const minElementsAllowed = (std::max)(c, mNumElements);
+ auto newSize = InitialNumElements;
+ while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
+ newSize *= 2;
+ }
+ if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+ throwOverflowError();
+ }
+
+ ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1")
+
+ // only actually do anything when the new size is bigger than the old one. This prevents to
+ // continuously allocate for each reserve() call.
+ if (forceRehash || newSize > mMask + 1) {
+ rehashPowerOfTwo(newSize, false);
+ }
+ }
+
// reserves space for at least the specified number of elements.
// only works if numBuckets if power of two
- void rehashPowerOfTwo(size_t numBuckets) {
+ void rehashPowerOfTwo(size_t numBuckets, bool forceFree) {
ROBIN_HOOD_TRACE(this)
Node* const oldKeyVals = mKeyVals;
@@ -2067,8 +2153,17 @@ private: }
}
- // don't destroy old data: put it into the pool instead
- DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
+ // this check is not necessary as it's guarded by the previous if, but it helps silence
+ // g++'s overeager "attempt to free a non-heap object 'map'
+ // [-Werror=free-nonheap-object]" warning.
+ if (oldKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+ // don't destroy old data: put it into the pool instead
+ if (forceFree) {
+ std::free(oldKeyVals);
+ } else {
+ DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
+ }
+ }
}
}
@@ -2111,8 +2206,11 @@ private: auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements);
// calloc also zeroes everything
- mKeyVals = reinterpret_cast<Node*>(detail::assertNotNull<std::bad_alloc>(
- calloc(1, calcNumBytesTotal(numElementsWithBuffer))));
+ auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer);
+ ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal("
+ << numElementsWithBuffer << ")")
+ mKeyVals = reinterpret_cast<Node*>(
+ detail::assertNotNull<std::bad_alloc>(std::calloc(1, numBytesTotal)));
mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
// set sentinel
@@ -2282,7 +2380,7 @@ private: throwOverflowError();
}
- rehashPowerOfTwo((mMask + 1) * 2);
+ rehashPowerOfTwo((mMask + 1) * 2, false);
}
void destroy() {
@@ -2296,10 +2394,11 @@ private: // This protection against not deleting mMask shouldn't be needed as it's sufficiently
// protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise
- // reports a compile error: attempt to free a non-heap object ‘fm’
+ // reports a compile error: attempt to free a non-heap object 'fm'
// [-Werror=free-nonheap-object]
if (mKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
- free(mKeyVals);
+ ROBIN_HOOD_LOG("std::free")
+ std::free(mKeyVals);
}
}
@@ -2363,4 +2462,4 @@ using unordered_set = detail::Table<sizeof(Key) <= sizeof(size_t) * 6 && } // namespace robin_hood
-#endif
+#endif
\ No newline at end of file diff --git a/benchmarks/picobench.hpp b/benchmarks/picobench.hpp index fc06709c..bf6204f4 100644 --- a/benchmarks/picobench.hpp +++ b/benchmarks/picobench.hpp @@ -444,7 +444,7 @@ public: } line(out, width); out << - " Name (* = baseline) | Dim | User data | Total ms | ns/op | Baseline| Ops/second\n"; + " Name (* = baseline) |Iterations | User data | Total ms | ns/op |Baseline | Ops/second\n"; line(out, width); auto problem_space_view = get_problem_space_view(suite); @@ -467,7 +467,7 @@ public: out << " |" << setw(10) << ps.first.first << " |" << setw(10) << bm.user_data << " |" - << setw(10) << fixed << setprecision(3) << double(bm.total_time_ns) / 1000000.0 << " |"; + << setw(10) << fixed << setprecision(2) << double(bm.total_time_ns) / 1000000.0 << " |"; auto ns_op = (bm.total_time_ns / ps.first.first); if (ns_op > 99999999) { @@ -561,16 +561,18 @@ public: out << " |" << setw(10) << ns_per_op << " |"; - if (&bm == baseline) + if (bm.is_baseline) { out << " - |"; + baseline_total_time = total_time; + baseline_total_iterations = total_iterations; + baseline_ns_per_op = ns_per_op; } else { out << setw(9) << fixed << setprecision(3) << double(ns_per_op) / double(baseline_ns_per_op) << " |"; } - auto ops_per_sec = total_iterations * (1000000000.0 / double(total_time)); out << setw(12) << fixed << setprecision(1) << ops_per_sec << "\n"; } @@ -579,61 +581,60 @@ public: } } - void to_csv(std::ostream& out, bool header = true, bool dec = '.') const + void to_csv(std::ostream& out) const { using namespace std; - - if (header) - { - out << "Suite, Benchmark, Baseline, Dimension, User data, Samples, Total ns, Result, ns/op, Baseline ratio\n"; - } + const char* sep = ","; for (auto& suite : suites) { - const benchmark* baseline = nullptr; - for (auto& bm : suite.benchmarks) + out << "Suite, Baseline, Benchmark, Iterations, User data, Total ms, ns/op, Ratio, Ops/second\n"; + + auto problem_space_view = get_problem_space_view(suite); + for (auto& ps : problem_space_view) { - if (bm.is_baseline) + const problem_space_benchmark* baseline = nullptr; + for (auto& bm : ps.second) { - baseline = &bm; - break; + if (bm.is_baseline) + { + baseline = &bm; + break; + } } - } - I_PICOBENCH_ASSERT(baseline); - for (auto& bm : suite.benchmarks) - { - for (auto& d : bm.data) + for (auto& bm : ps.second) { out << '"' << (suite.name ? suite.name : "") << '"'; - out << ",\"" << bm.name << "\", "; - out << (&bm == baseline ? "true" : "false"); - out << ", " - << d.dimension << ", " - << d.user_data << ", " - << d.samples << ", " - << d.total_time_ns << ", " - << d.result << ", " - << (d.total_time_ns / d.dimension) << ", "; - - if (baseline) + out << sep << (bm.is_baseline ? "true" : "false"); + out << sep << '"' << bm.name << '"'; + out << sep << ps.first.first + << sep << bm.user_data + << sep << fixed << setprecision(3) << bm.total_time_ns / 1000000.0; + + auto ns_op = (bm.total_time_ns / ps.first.first); + out << sep << ns_op << sep; + if (baseline == &bm) { - for (auto& bd : baseline->data) - { - if (bd.dimension == d.dimension && bd.user_data == d.user_data) - { - out << fixed << setprecision(3) << (double(d.total_time_ns) / double(bd.total_time_ns)); - } - } + out << 1.0; + } + else if (baseline) + { + out << fixed << setprecision(3) << bm.total_time_ns / double(baseline->total_time_ns); } else - out << -999; - out << '\n'; + { + out << -1.0; // no baseline to compare to + } + + auto ops_per_sec = ps.first.first * (1000000000.0 / bm.total_time_ns); + out << sep << fixed << setprecision(1) << ops_per_sec << "\n"; } } } } + struct problem_space_benchmark { const char* name; @@ -720,7 +721,8 @@ enum class report_output_format { text, concise_text, - csv + csv, + all, }; #if !defined(PICOBENCH_DEFAULT_ITERATIONS) @@ -805,28 +807,47 @@ public: auto report = generate_report(); std::ostream* out = _stdout; std::ofstream fout; - if (preferred_output_filename()) + report_output_format fmt[] = {report_output_format::text, + report_output_format::concise_text, + report_output_format::csv}; + const char *ext[] = {".txt", ".lst", ".csv"}, *fn = preferred_output_filename(); + bool all = preferred_output_format() == report_output_format::all; + for (int i = 0; i < 3; ++i) { - fout.open(preferred_output_filename()); - if (!fout.is_open()) + if (all || preferred_output_format() == fmt[i]) { - std::cerr << "Error: Could not open output file `" << preferred_output_filename() << "`\n"; - return 1; - } - out = &fout; - } + if (fn) + { + std::string name(fn); - switch (preferred_output_format()) - { - case picobench::report_output_format::text: - report.to_text(*out); - break; - case picobench::report_output_format::concise_text: - report.to_text_concise(*out); - break; - case picobench::report_output_format::csv: - report.to_csv(*out); - break; + if (all || name.find(".") == std::string::npos) + { + name += ext[i]; + } + fout.close(); + fout.open(name.c_str()); + if (!fout.is_open()) + { + std::cerr << "Error: Could not open output file `" << fn << "`\n"; + return 1; + } + out = &fout; + } + + switch (fmt[i]) + { + case report_output_format::text: + report.to_text(*out); + break; + case report_output_format::concise_text: + report.to_text_concise(*out); + break; + case report_output_format::csv: + report.to_csv(*out); + break; + default: break; + } + } } } return error(); @@ -1103,8 +1124,8 @@ public: _opts.emplace_back("-samples=", "<n>", "Sets default number of samples for benchmarks", &runner::cmd_samples); - _opts.emplace_back("-out-fmt=", "<txt|con|csv>", - "Outputs text or concise or csv", + _opts.emplace_back("-out-fmt=", "<txt|lst|csv|all>", + "Outputs text, concise, csv or all", &runner::cmd_out_fmt); _opts.emplace_back("-output=", "<filename>", "Sets output filename or `stdout`", @@ -1310,7 +1331,7 @@ private: { _output_format = report_output_format::text; } - else if (strcmp(line, "con") == 0) + else if (strcmp(line, "lst") == 0) { _output_format = report_output_format::concise_text; } @@ -1318,6 +1339,10 @@ private: { _output_format = report_output_format::csv; } + else if (strcmp(line, "all") == 0) + { + _output_format = report_output_format::all; + } else { return false; |
