Removed MACRO functions in API, like cvec_size(c), cvec_empty(c). Use cvec_X_size(c) etc. Restructured benchmarks / examples.

author: Tyge Løvset <[email protected]> 2020-12-27 16:03:58 +0100
committer: Tyge Løvset <[email protected]> 2020-12-27 16:03:58 +0100
commit: 83b7be31a1d0fc0be4e013dbfc97bb6cdc3600db (patch)
tree: df69b4e6a7a85b5ed8c8bbd6d1baf52794b44966 /benchmarks
parent: 5a444c90db6372749cbdc629ec999871cd20af72 (diff)
download: STC-modified-83b7be31a1d0fc0be4e013dbfc97bb6cdc3600db.tar.gz
STC-modified-83b7be31a1d0fc0be4e013dbfc97bb6cdc3600db.zip
19 files changed, 19067 insertions, 2 deletions
diff --git a/benchmarks/cmap_benchmark.cpp b/benchmarks/cmap_benchmark.cpp
index a159d24b..aaf9fc73 100644
--- a/benchmarks/cmap_benchmark.cpp
+++ b/benchmarks/cmap_benchmark.cpp
@@ -41,7 +41,7 @@ crand_t rng;
 #define CMAP_FIND(X, key)         (cmap_##X##_find(map, key) != NULL)
 #define CMAP_FOR(X, i)            c_foreach (i, cmap_##X, map)
 #define CMAP_ITEM(X, i)           i.ref->second
-#define CMAP_SIZE(X)              cmap_size(map)
+#define CMAP_SIZE(X)              cmap_##X##_size(map)
 #define CMAP_BUCKETS(X)           cmap_##X##_bucket_count(map)
 #define CMAP_CLEAR(X)             cmap_##X##_clear(&map)
 #define CMAP_DTOR(X)              cmap_##X##_del(&map)
diff --git a/benchmarks/crand_benchmark2.cpp b/benchmarks/crand_benchmark2.cpp
index 3ae6f8ab..ac7296fc 100644
--- a/benchmarks/crand_benchmark2.cpp
+++ b/benchmarks/crand_benchmark2.cpp
@@ -30,7 +30,7 @@ void test1(void)
     diff = clock() - before;
     printf("std::uniform:\t\t%.02f, %zu\n\n", (float) diff / CLOCKS_PER_SEC, sum);
 
-    c_forrange (30) printf("%02zd ", idist(rng));
+    c_forrange (30) printf("%02d ", idist(rng));
     puts("");
     c_forrange (8) printf("%f ", fdist(rng));
     puts("\n");
diff --git a/benchmarks/others/bytell_hash_map.hpp b/benchmarks/others/bytell_hash_map.hpp
new file mode 100644
index 00000000..2e348cdb
--- /dev/null
+++ b/benchmarks/others/bytell_hash_map.hpp
@@ -0,0 +1,1260 @@
+//          Copyright Malte Skarupke 2017.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma once
+
+#include <cstdint>
+#include <cstddef>
+#include <cmath>
+#include <algorithm>
+#include <iterator>
+#include <utility>
+#include <type_traits>
+#include "flat_hash_map.hpp"
+#include <vector>
+#include <array>
+
+namespace ska
+{
+
+namespace detailv8
+{
+using ska::detailv3::functor_storage;
+using ska::detailv3::KeyOrValueHasher;
+using ska::detailv3::KeyOrValueEquality;
+using ska::detailv3::AssignIfTrue;
+using ska::detailv3::HashPolicySelector;
+
+template<typename = void>
+struct sherwood_v8_constants
+{
+    static constexpr int8_t magic_for_empty = int8_t(0b11111111);
+    static constexpr int8_t magic_for_reserved = int8_t(0b11111110);
+    static constexpr int8_t bits_for_direct_hit = int8_t(0b10000000);
+    static constexpr int8_t magic_for_direct_hit = int8_t(0b00000000);
+    static constexpr int8_t magic_for_list_entry = int8_t(0b10000000);
+
+    static constexpr int8_t bits_for_distance = int8_t(0b01111111);
+    inline static int distance_from_metadata(int8_t metadata)
+    {
+        return metadata & bits_for_distance;
+    }
+
+    static constexpr int num_jump_distances = 126;
+    // jump distances chosen like this:
+    // 1. pick the first 16 integers to promote staying in the same block
+    // 2. add the next 66 triangular numbers to get even jumps when
+    // the hash table is a power of two
+    // 3. add 44 more triangular numbers at a much steeper growth rate
+    // to get a sequence that allows large jumps so that a table
+    // with 10000 sequential numbers doesn't endlessly re-allocate
+    static constexpr size_t jump_distances[num_jump_distances]
+    {
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+
+        21, 28, 36, 45, 55, 66, 78, 91, 105, 120, 136, 153, 171, 190, 210, 231,
+        253, 276, 300, 325, 351, 378, 406, 435, 465, 496, 528, 561, 595, 630,
+        666, 703, 741, 780, 820, 861, 903, 946, 990, 1035, 1081, 1128, 1176,
+        1225, 1275, 1326, 1378, 1431, 1485, 1540, 1596, 1653, 1711, 1770, 1830,
+        1891, 1953, 2016, 2080, 2145, 2211, 2278, 2346, 2415, 2485, 2556,
+
+        3741, 8385, 18915, 42486, 95703, 215496, 485605, 1091503, 2456436,
+        5529475, 12437578, 27986421, 62972253, 141700195, 318819126, 717314626,
+        1614000520, 3631437253, 8170829695, 18384318876, 41364501751,
+        93070021080, 209407709220, 471167588430, 1060127437995, 2385287281530,
+        5366895564381, 12075513791265, 27169907873235, 61132301007778,
+        137547673121001, 309482258302503, 696335090510256, 1566753939653640,
+        3525196427195653, 7931691866727775, 17846306747368716,
+        40154190394120111, 90346928493040500, 203280588949935750,
+        457381324898247375, 1029107980662394500, 2315492957028380766,
+        5209859150892887590,
+    };
+};
+template<typename T>
+constexpr int8_t sherwood_v8_constants<T>::magic_for_empty;
+template<typename T>
+constexpr int8_t sherwood_v8_constants<T>::magic_for_reserved;
+template<typename T>
+constexpr int8_t sherwood_v8_constants<T>::bits_for_direct_hit;
+template<typename T>
+constexpr int8_t sherwood_v8_constants<T>::magic_for_direct_hit;
+template<typename T>
+constexpr int8_t sherwood_v8_constants<T>::magic_for_list_entry;
+
+template<typename T>
+constexpr int8_t sherwood_v8_constants<T>::bits_for_distance;
+
+template<typename T>
+constexpr int sherwood_v8_constants<T>::num_jump_distances;
+template<typename T>
+constexpr size_t sherwood_v8_constants<T>::jump_distances[num_jump_distances];
+
+template<typename T, uint8_t BlockSize>
+struct sherwood_v8_block
+{
+    sherwood_v8_block()
+    {
+    }
+    ~sherwood_v8_block()
+    {
+    }
+    int8_t control_bytes[BlockSize];
+    union
+    {
+        T data[BlockSize];
+    };
+
+    static sherwood_v8_block * empty_block()
+    {
+        static std::array<int8_t, BlockSize> empty_bytes = []
+        {
+            std::array<int8_t, BlockSize> result;
+            result.fill(sherwood_v8_constants<>::magic_for_empty);
+            return result;
+        }();
+        return reinterpret_cast<sherwood_v8_block *>(&empty_bytes);
+    }
+
+    int first_empty_index() const
+    {
+        for (int i = 0; i < BlockSize; ++i)
+        {
+            if (control_bytes[i] == sherwood_v8_constants<>::magic_for_empty)
+                return i;
+        }
+        return -1;
+    }
+
+    void fill_control_bytes(int8_t value)
+    {
+        std::fill(std::begin(control_bytes), std::end(control_bytes), value);
+    }
+};
+
+template<typename T, typename FindKey, typename ArgumentHash, typename Hasher, typename ArgumentEqual, typename Equal, typename ArgumentAlloc, typename ByteAlloc, uint8_t BlockSize>
+class sherwood_v8_table : private ByteAlloc, private Hasher, private Equal
+{
+    using AllocatorTraits = std::allocator_traits<ByteAlloc>;
+    using BlockType = sherwood_v8_block<T, BlockSize>;
+    using BlockPointer = BlockType *;
+    using BytePointer = typename AllocatorTraits::pointer;
+    struct convertible_to_iterator;
+    using Constants = sherwood_v8_constants<>;
+
+public:
+
+    using value_type = T;
+    using size_type = size_t;
+    using difference_type = std::ptrdiff_t;
+    using hasher = ArgumentHash;
+    using key_equal = ArgumentEqual;
+    using allocator_type = ByteAlloc;
+    using reference = value_type &;
+    using const_reference = const value_type &;
+    using pointer = value_type *;
+    using const_pointer = const value_type *;
+
+    sherwood_v8_table()
+    {
+    }
+    explicit sherwood_v8_table(size_type bucket_count, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc())
+        : ByteAlloc(alloc), Hasher(hash), Equal(equal)
+    {
+        if (bucket_count)
+            rehash(bucket_count);
+    }
+    sherwood_v8_table(size_type bucket_count, const ArgumentAlloc & alloc)
+        : sherwood_v8_table(bucket_count, ArgumentHash(), ArgumentEqual(), alloc)
+    {
+    }
+    sherwood_v8_table(size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc)
+        : sherwood_v8_table(bucket_count, hash, ArgumentEqual(), alloc)
+    {
+    }
+    explicit sherwood_v8_table(const ArgumentAlloc & alloc)
+        : ByteAlloc(alloc)
+    {
+    }
+    template<typename It>
+    sherwood_v8_table(It first, It last, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc())
+        : sherwood_v8_table(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    template<typename It>
+    sherwood_v8_table(It first, It last, size_type bucket_count, const ArgumentAlloc & alloc)
+        : sherwood_v8_table(first, last, bucket_count, ArgumentHash(), ArgumentEqual(), alloc)
+    {
+    }
+    template<typename It>
+    sherwood_v8_table(It first, It last, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc)
+        : sherwood_v8_table(first, last, bucket_count, hash, ArgumentEqual(), alloc)
+    {
+    }
+    sherwood_v8_table(std::initializer_list<T> il, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc())
+        : sherwood_v8_table(bucket_count, hash, equal, alloc)
+    {
+        if (bucket_count == 0)
+            rehash(il.size());
+        insert(il.begin(), il.end());
+    }
+    sherwood_v8_table(std::initializer_list<T> il, size_type bucket_count, const ArgumentAlloc & alloc)
+        : sherwood_v8_table(il, bucket_count, ArgumentHash(), ArgumentEqual(), alloc)
+    {
+    }
+    sherwood_v8_table(std::initializer_list<T> il, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc)
+        : sherwood_v8_table(il, bucket_count, hash, ArgumentEqual(), alloc)
+    {
+    }
+    sherwood_v8_table(const sherwood_v8_table & other)
+        : sherwood_v8_table(other, AllocatorTraits::select_on_container_copy_construction(other.get_allocator()))
+    {
+    }
+    sherwood_v8_table(const sherwood_v8_table & other, const ArgumentAlloc & alloc)
+        : ByteAlloc(alloc), Hasher(other), Equal(other), _max_load_factor(other._max_load_factor)
+    {
+        rehash_for_other_container(other);
+        try
+        {
+            insert(other.begin(), other.end());
+        }
+        catch(...)
+        {
+            clear();
+            deallocate_data(entries, num_slots_minus_one);
+            throw;
+        }
+    }
+    sherwood_v8_table(sherwood_v8_table && other) noexcept
+        : ByteAlloc(std::move(other)), Hasher(std::move(other)), Equal(std::move(other))
+        , _max_load_factor(other._max_load_factor)
+    {
+        swap_pointers(other);
+    }
+    sherwood_v8_table(sherwood_v8_table && other, const ArgumentAlloc & alloc) noexcept
+        : ByteAlloc(alloc), Hasher(std::move(other)), Equal(std::move(other))
+        , _max_load_factor(other._max_load_factor)
+    {
+        swap_pointers(other);
+    }
+    sherwood_v8_table & operator=(const sherwood_v8_table & other)
+    {
+        if (this == std::addressof(other))
+            return *this;
+
+        clear();
+        if (AllocatorTraits::propagate_on_container_copy_assignment::value)
+        {
+            if (static_cast<ByteAlloc &>(*this) != static_cast<const ByteAlloc &>(other))
+            {
+                reset_to_empty_state();
+            }
+            AssignIfTrue<ByteAlloc, AllocatorTraits::propagate_on_container_copy_assignment::value>()(*this, other);
+        }
+        _max_load_factor = other._max_load_factor;
+        static_cast<Hasher &>(*this) = other;
+        static_cast<Equal &>(*this) = other;
+        rehash_for_other_container(other);
+        insert(other.begin(), other.end());
+        return *this;
+    }
+    sherwood_v8_table & operator=(sherwood_v8_table && other) noexcept
+    {
+        if (this == std::addressof(other))
+            return *this;
+        else if (AllocatorTraits::propagate_on_container_move_assignment::value)
+        {
+            clear();
+            reset_to_empty_state();
+            AssignIfTrue<ByteAlloc, AllocatorTraits::propagate_on_container_move_assignment::value>()(*this, std::move(other));
+            swap_pointers(other);
+        }
+        else if (static_cast<ByteAlloc &>(*this) == static_cast<ByteAlloc &>(other))
+        {
+            swap_pointers(other);
+        }
+        else
+        {
+            clear();
+            _max_load_factor = other._max_load_factor;
+            rehash_for_other_container(other);
+            for (T & elem : other)
+                emplace(std::move(elem));
+            other.clear();
+        }
+        static_cast<Hasher &>(*this) = std::move(other);
+        static_cast<Equal &>(*this) = std::move(other);
+        return *this;
+    }
+    ~sherwood_v8_table()
+    {
+        clear();
+        deallocate_data(entries, num_slots_minus_one);
+    }
+
+    const allocator_type & get_allocator() const
+    {
+        return static_cast<const allocator_type &>(*this);
+    }
+    const ArgumentEqual & key_eq() const
+    {
+        return static_cast<const ArgumentEqual &>(*this);
+    }
+    const ArgumentHash & hash_function() const
+    {
+        return static_cast<const ArgumentHash &>(*this);
+    }
+
+    template<typename ValueType>
+    struct templated_iterator
+    {
+    private:
+        friend class sherwood_v8_table;
+        BlockPointer current = BlockPointer();
+        size_t index = 0;
+
+    public:
+        templated_iterator()
+        {
+        }
+        templated_iterator(BlockPointer entries, size_t index)
+            : current(entries)
+            , index(index)
+        {
+        }
+
+        using iterator_category = std::forward_iterator_tag;
+        using value_type = ValueType;
+        using difference_type = ptrdiff_t;
+        using pointer = ValueType *;
+        using reference = ValueType &;
+
+        friend bool operator==(const templated_iterator & lhs, const templated_iterator & rhs)
+        {
+            return lhs.index == rhs.index;
+        }
+        friend bool operator!=(const templated_iterator & lhs, const templated_iterator & rhs)
+        {
+            return !(lhs == rhs);
+        }
+
+        templated_iterator & operator++()
+        {
+            do
+            {
+                if (index % BlockSize == 0)
+                    --current;
+                if (index-- == 0)
+                    break;
+            }
+            while(current->control_bytes[index % BlockSize] == Constants::magic_for_empty);
+            return *this;
+        }
+        templated_iterator operator++(int)
+        {
+            templated_iterator copy(*this);
+            ++*this;
+            return copy;
+        }
+
+        ValueType & operator*() const
+        {
+            return current->data[index % BlockSize];
+        }
+        ValueType * operator->() const
+        {
+            return current->data + index % BlockSize;
+        }
+
+        operator templated_iterator<const value_type>() const
+        {
+            return { current, index };
+        }
+    };
+    using iterator = templated_iterator<value_type>;
+    using const_iterator = templated_iterator<const value_type>;
+
+    iterator begin()
+    {
+        size_t num_slots = num_slots_minus_one ? num_slots_minus_one + 1 : 0;
+        return ++iterator{ entries + num_slots / BlockSize, num_slots };
+    }
+    const_iterator begin() const
+    {
+        size_t num_slots = num_slots_minus_one ? num_slots_minus_one + 1 : 0;
+        return ++iterator{ entries + num_slots / BlockSize, num_slots };
+    }
+    const_iterator cbegin() const
+    {
+        return begin();
+    }
+    iterator end()
+    {
+        return { entries - 1, std::numeric_limits<size_t>::max() };
+    }
+    const_iterator end() const
+    {
+        return { entries - 1, std::numeric_limits<size_t>::max() };
+    }
+    const_iterator cend() const
+    {
+        return end();
+    }
+
+    inline iterator find(const FindKey & key)
+    {
+        size_t index = hash_object(key);
+        size_t num_slots_minus_one = this->num_slots_minus_one;
+        BlockPointer entries = this->entries;
+        index = hash_policy.index_for_hash(index, num_slots_minus_one);
+        bool first = true;
+        for (;;)
+        {
+            size_t block_index = index / BlockSize;
+            int index_in_block = index % BlockSize;
+            BlockPointer block = entries + block_index;
+            int8_t metadata = block->control_bytes[index_in_block];
+            if (first)
+            {
+                if ((metadata & Constants::bits_for_direct_hit) != Constants::magic_for_direct_hit)
+                    return end();
+                first = false;
+            }
+            if (compares_equal(key, block->data[index_in_block]))
+                return { block, index };
+            int8_t to_next_index = metadata & Constants::bits_for_distance;
+            if (to_next_index == 0)
+                return end();
+            index += Constants::jump_distances[to_next_index];
+            index = hash_policy.keep_in_range(index, num_slots_minus_one);
+        }
+    }
+    inline const_iterator find(const FindKey & key) const
+    {
+        return const_cast<sherwood_v8_table *>(this)->find(key);
+    }
+    size_t count(const FindKey & key) const
+    {
+        return find(key) == end() ? 0 : 1;
+    }
+    std::pair<iterator, iterator> equal_range(const FindKey & key)
+    {
+        iterator found = find(key);
+        if (found == end())
+            return { found, found };
+        else
+            return { found, std::next(found) };
+    }
+    std::pair<const_iterator, const_iterator> equal_range(const FindKey & key) const
+    {
+        const_iterator found = find(key);
+        if (found == end())
+            return { found, found };
+        else
+            return { found, std::next(found) };
+    }
+
+
+    template<typename Key, typename... Args>
+    inline std::pair<iterator, bool> emplace(Key && key, Args &&... args)
+    {
+        size_t index = hash_object(key);
+        size_t num_slots_minus_one = this->num_slots_minus_one;
+        BlockPointer entries = this->entries;
+        index = hash_policy.index_for_hash(index, num_slots_minus_one);
+        bool first = true;
+        for (;;)
+        {
+            size_t block_index = index / BlockSize;
+            int index_in_block = index % BlockSize;
+            BlockPointer block = entries + block_index;
+            int8_t metadata = block->control_bytes[index_in_block];
+            if (first)
+            {
+                if ((metadata & Constants::bits_for_direct_hit) != Constants::magic_for_direct_hit)
+                    return emplace_direct_hit({ index, block }, std::forward<Key>(key), std::forward<Args>(args)...);
+                first = false;
+            }
+            if (compares_equal(key, block->data[index_in_block]))
+                return { { block, index }, false };
+            int8_t to_next_index = metadata & Constants::bits_for_distance;
+            if (to_next_index == 0)
+                return emplace_new_key({ index, block }, std::forward<Key>(key), std::forward<Args>(args)...);
+            index += Constants::jump_distances[to_next_index];
+            index = hash_policy.keep_in_range(index, num_slots_minus_one);
+        }
+    }
+
+    std::pair<iterator, bool> insert(const value_type & value)
+    {
+        return emplace(value);
+    }
+    std::pair<iterator, bool> insert(value_type && value)
+    {
+        return emplace(std::move(value));
+    }
+    template<typename... Args>
+    iterator emplace_hint(const_iterator, Args &&... args)
+    {
+        return emplace(std::forward<Args>(args)...).first;
+    }
+    iterator insert(const_iterator, const value_type & value)
+    {
+        return emplace(value).first;
+    }
+    iterator insert(const_iterator, value_type && value)
+    {
+        return emplace(std::move(value)).first;
+    }
+
+    template<typename It>
+    void insert(It begin, It end)
+    {
+        for (; begin != end; ++begin)
+        {
+            emplace(*begin);
+        }
+    }
+    void insert(std::initializer_list<value_type> il)
+    {
+        insert(il.begin(), il.end());
+    }
+
+    void rehash(size_t num_items)
+    {
+        num_items = std::max(num_items, static_cast<size_t>(std::ceil(num_elements / static_cast<double>(_max_load_factor))));
+        if (num_items == 0)
+        {
+            reset_to_empty_state();
+            return;
+        }
+        auto new_prime_index = hash_policy.next_size_over(num_items);
+        if (num_items == num_slots_minus_one + 1)
+            return;
+        size_t num_blocks = num_items / BlockSize;
+        if (num_items % BlockSize)
+            ++num_blocks;
+        size_t memory_requirement = calculate_memory_requirement(num_blocks);
+        unsigned char * new_memory = &*AllocatorTraits::allocate(*this, memory_requirement);
+
+        BlockPointer new_buckets = reinterpret_cast<BlockPointer>(new_memory);
+
+        BlockPointer special_end_item = new_buckets + num_blocks;
+        for (BlockPointer it = new_buckets; it <= special_end_item; ++it)
+            it->fill_control_bytes(Constants::magic_for_empty);
+        using std::swap;
+        swap(entries, new_buckets);
+        swap(num_slots_minus_one, num_items);
+        --num_slots_minus_one;
+        hash_policy.commit(new_prime_index);
+        num_elements = 0;
+        if (num_items)
+            ++num_items;
+        size_t old_num_blocks = num_items / BlockSize;
+        if (num_items % BlockSize)
+            ++old_num_blocks;
+        for (BlockPointer it = new_buckets, end = new_buckets + old_num_blocks; it != end; ++it)
+        {
+            for (int i = 0; i < BlockSize; ++i)
+            {
+                int8_t metadata = it->control_bytes[i];
+                if (metadata != Constants::magic_for_empty && metadata != Constants::magic_for_reserved)
+                {
+                    emplace(std::move(it->data[i]));
+                    AllocatorTraits::destroy(*this, it->data + i);
+                }
+            }
+        }
+        deallocate_data(new_buckets, num_items - 1);
+    }
+
+    void reserve(size_t num_elements)
+    {
+        size_t required_buckets = num_buckets_for_reserve(num_elements);
+        if (required_buckets > bucket_count())
+            rehash(required_buckets);
+    }
+
+    // the return value is a type that can be converted to an iterator
+    // the reason for doing this is that it's not free to find the
+    // iterator pointing at the next element. if you care about the
+    // next iterator, turn the return value into an iterator
+    convertible_to_iterator erase(const_iterator to_erase)
+    {
+        LinkedListIt current = { to_erase.index, to_erase.current };
+        if (current.has_next())
+        {
+            LinkedListIt previous = current;
+            LinkedListIt next = current.next(*this);
+            while (next.has_next())
+            {
+                previous = next;
+                next = next.next(*this);
+            }
+            AllocatorTraits::destroy(*this, std::addressof(*current));
+            AllocatorTraits::construct(*this, std::addressof(*current), std::move(*next));
+            AllocatorTraits::destroy(*this, std::addressof(*next));
+            next.set_metadata(Constants::magic_for_empty);
+            previous.clear_next();
+        }
+        else
+        {
+            if (!current.is_direct_hit())
+                find_parent_block(current).clear_next();
+            AllocatorTraits::destroy(*this, std::addressof(*current));
+            current.set_metadata(Constants::magic_for_empty);
+        }
+        --num_elements;
+        return { to_erase.current, to_erase.index };
+    }
+
+    iterator erase(const_iterator begin_it, const_iterator end_it)
+    {
+        if (begin_it == end_it)
+            return { begin_it.current, begin_it.index };
+        if (std::next(begin_it) == end_it)
+            return erase(begin_it);
+        if (begin_it == begin() && end_it == end())
+        {
+            clear();
+            return { end_it.current, end_it.index };
+        }
+        std::vector<std::pair<int, LinkedListIt>> depth_in_chain;
+        for (const_iterator it = begin_it; it != end_it; ++it)
+        {
+            LinkedListIt list_it(it.index, it.current);
+            if (list_it.is_direct_hit())
+                depth_in_chain.emplace_back(0, list_it);
+            else
+            {
+                LinkedListIt root = find_direct_hit(list_it);
+                int distance = 1;
+                for (;;)
+                {
+                    LinkedListIt next = root.next(*this);
+                    if (next == list_it)
+                        break;
+                    ++distance;
+                    root = next;
+                }
+                depth_in_chain.emplace_back(distance, list_it);
+            }
+        }
+        std::sort(depth_in_chain.begin(), depth_in_chain.end(), [](const auto & a, const auto & b) { return a.first < b.first; });
+        for (auto it = depth_in_chain.rbegin(), end = depth_in_chain.rend(); it != end; ++it)
+        {
+            erase(it->second.it());
+        }
+
+        if (begin_it.current->control_bytes[begin_it.index % BlockSize] == Constants::magic_for_empty)
+            return ++iterator{ begin_it.current, begin_it.index };
+        else
+            return { begin_it.current, begin_it.index };
+    }
+
+    size_t erase(const FindKey & key)
+    {
+        auto found = find(key);
+        if (found == end())
+            return 0;
+        else
+        {
+            erase(found);
+            return 1;
+        }
+    }
+
+    void clear()
+    {
+        if (!num_slots_minus_one)
+            return;
+        size_t num_slots = num_slots_minus_one + 1;
+        size_t num_blocks = num_slots / BlockSize;
+        if (num_slots % BlockSize)
+            ++num_blocks;
+        for (BlockPointer it = entries, end = it + num_blocks; it != end; ++it)
+        {
+            for (int i = 0; i < BlockSize; ++i)
+            {
+                if (it->control_bytes[i] != Constants::magic_for_empty)
+                {
+                    AllocatorTraits::destroy(*this, std::addressof(it->data[i]));
+                    it->control_bytes[i] = Constants::magic_for_empty;
+                }
+            }
+        }
+        num_elements = 0;
+    }
+
+    void shrink_to_fit()
+    {
+        rehash_for_other_container(*this);
+    }
+
+    void swap(sherwood_v8_table & other)
+    {
+        using std::swap;
+        swap_pointers(other);
+        swap(static_cast<ArgumentHash &>(*this), static_cast<ArgumentHash &>(other));
+        swap(static_cast<ArgumentEqual &>(*this), static_cast<ArgumentEqual &>(other));
+        if (AllocatorTraits::propagate_on_container_swap::value)
+            swap(static_cast<ByteAlloc &>(*this), static_cast<ByteAlloc &>(other));
+    }
+
+    size_t size() const
+    {
+        return num_elements;
+    }
+    size_t max_size() const
+    {
+        return (AllocatorTraits::max_size(*this)) / sizeof(T);
+    }
+    size_t bucket_count() const
+    {
+        return num_slots_minus_one ? num_slots_minus_one + 1 : 0;
+    }
+    size_type max_bucket_count() const
+    {
+        return (AllocatorTraits::max_size(*this)) / sizeof(T);
+    }
+    size_t bucket(const FindKey & key) const
+    {
+        return hash_policy.index_for_hash(hash_object(key), num_slots_minus_one);
+    }
+    float load_factor() const
+    {
+        return static_cast<double>(num_elements) / (num_slots_minus_one + 1);
+    }
+    void max_load_factor(float value)
+    {
+        _max_load_factor = value;
+    }
+    float max_load_factor() const
+    {
+        return _max_load_factor;
+    }
+
+    bool empty() const
+    {
+        return num_elements == 0;
+    }
+
+private:
+    BlockPointer entries = BlockType::empty_block();
+    size_t num_slots_minus_one = 0;
+    typename HashPolicySelector<ArgumentHash>::type hash_policy;
+    float _max_load_factor = 0.9375f;
+    size_t num_elements = 0;
+
+    size_t num_buckets_for_reserve(size_t num_elements) const
+    {
+        return static_cast<size_t>(std::ceil(num_elements / static_cast<double>(_max_load_factor)));
+    }
+    void rehash_for_other_container(const sherwood_v8_table & other)
+    {
+        rehash(std::min(num_buckets_for_reserve(other.size()), other.bucket_count()));
+    }
+    bool is_full() const
+    {
+        if (!num_slots_minus_one)
+            return true;
+        else
+            return num_elements + 1 > (num_slots_minus_one + 1) * static_cast<double>(_max_load_factor);
+    }
+
+    void swap_pointers(sherwood_v8_table & other)
+    {
+        using std::swap;
+        swap(hash_policy, other.hash_policy);
+        swap(entries, other.entries);
+        swap(num_slots_minus_one, other.num_slots_minus_one);
+        swap(num_elements, other.num_elements);
+        swap(_max_load_factor, other._max_load_factor);
+    }
+
+    struct LinkedListIt
+    {
+        size_t index = 0;
+        BlockPointer block = nullptr;
+
+        LinkedListIt()
+        {
+        }
+        LinkedListIt(size_t index, BlockPointer block)
+            : index(index), block(block)
+        {
+        }
+
+        iterator it() const
+        {
+            return { block, index };
+        }
+        int index_in_block() const
+        {
+            return index % BlockSize;
+        }
+        bool is_direct_hit() const
+        {
+            return (metadata() & Constants::bits_for_direct_hit) == Constants::magic_for_direct_hit;
+        }
+        bool is_empty() const
+        {
+            return metadata() == Constants::magic_for_empty;
+        }
+        bool has_next() const
+        {
+            return jump_index() != 0;
+        }
+        int8_t jump_index() const
+        {
+            return Constants::distance_from_metadata(metadata());
+        }
+        int8_t metadata() const
+        {
+            return block->control_bytes[index_in_block()];
+        }
+        void set_metadata(int8_t metadata)
+        {
+            block->control_bytes[index_in_block()] = metadata;
+        }
+
+        LinkedListIt next(sherwood_v8_table & table) const
+        {
+            int8_t distance = jump_index();
+            size_t next_index = table.hash_policy.keep_in_range(index + Constants::jump_distances[distance], table.num_slots_minus_one);
+            return { next_index, table.entries + next_index / BlockSize };
+        }
+        void set_next(int8_t jump_index)
+        {
+            int8_t & metadata = block->control_bytes[index_in_block()];
+            metadata = (metadata & ~Constants::bits_for_distance) | jump_index;
+        }
+        void clear_next()
+        {
+            set_next(0);
+        }
+
+        value_type & operator*() const
+        {
+            return block->data[index_in_block()];
+        }
+        bool operator!() const
+        {
+            return !block;
+        }
+        explicit operator bool() const
+        {
+            return block != nullptr;
+        }
+        bool operator==(const LinkedListIt & other) const
+        {
+            return index == other.index;
+        }
+        bool operator!=(const LinkedListIt & other) const
+        {
+            return !(*this == other);
+        }
+    };
+
+    template<typename... Args>
+    SKA_NOINLINE(std::pair<iterator, bool>) emplace_direct_hit(LinkedListIt block, Args &&... args)
+    {
+        using std::swap;
+        if (is_full())
+        {
+            grow();
+            return emplace(std::forward<Args>(args)...);
+        }
+        if (block.metadata() == Constants::magic_for_empty)
+        {
+            AllocatorTraits::construct(*this, std::addressof(*block), std::forward<Args>(args)...);
+            block.set_metadata(Constants::magic_for_direct_hit);
+            ++num_elements;
+            return { block.it(), true };
+        }
+        else
+        {
+            LinkedListIt parent_block = find_parent_block(block);
+            std::pair<int8_t, LinkedListIt> free_block = find_free_index(parent_block);
+            if (!free_block.first)
+            {
+                grow();
+                return emplace(std::forward<Args>(args)...);
+            }
+            value_type new_value(std::forward<Args>(args)...);
+            for (LinkedListIt it = block;;)
+            {
+                AllocatorTraits::construct(*this, std::addressof(*free_block.second), std::move(*it));
+                AllocatorTraits::destroy(*this, std::addressof(*it));
+                parent_block.set_next(free_block.first);
+                free_block.second.set_metadata(Constants::magic_for_list_entry);
+                if (!it.has_next())
+                {
+                    it.set_metadata(Constants::magic_for_empty);
+                    break;
+                }
+                LinkedListIt next = it.next(*this);
+                it.set_metadata(Constants::magic_for_empty);
+                block.set_metadata(Constants::magic_for_reserved);
+                it = next;
+                parent_block = free_block.second;
+                free_block = find_free_index(free_block.second);
+                if (!free_block.first)
+                {
+                    grow();
+                    return emplace(std::move(new_value));
+                }
+            }
+            AllocatorTraits::construct(*this, std::addressof(*block), std::move(new_value));
+            block.set_metadata(Constants::magic_for_direct_hit);
+            ++num_elements;
+            return { block.it(), true };
+        }
+    }
+
+    template<typename... Args>
+    SKA_NOINLINE(std::pair<iterator, bool>) emplace_new_key(LinkedListIt parent, Args &&... args)
+    {
+        if (is_full())
+        {
+            grow();
+            return emplace(std::forward<Args>(args)...);
+        }
+        std::pair<int8_t, LinkedListIt> free_block = find_free_index(parent);
+        if (!free_block.first)
+        {
+            grow();
+            return emplace(std::forward<Args>(args)...);
+        }
+        AllocatorTraits::construct(*this, std::addressof(*free_block.second), std::forward<Args>(args)...);
+        free_block.second.set_metadata(Constants::magic_for_list_entry);
+        parent.set_next(free_block.first);
+        ++num_elements;
+        return { free_block.second.it(), true };
+    }
+
+    LinkedListIt find_direct_hit(LinkedListIt child) const
+    {
+        size_t to_move_hash = hash_object(*child);
+        size_t to_move_index = hash_policy.index_for_hash(to_move_hash, num_slots_minus_one);
+        return { to_move_index, entries + to_move_index / BlockSize };
+    }
+    LinkedListIt find_parent_block(LinkedListIt child)
+    {
+        LinkedListIt parent_block = find_direct_hit(child);
+        for (;;)
+        {
+            LinkedListIt next = parent_block.next(*this);
+            if (next == child)
+                return parent_block;
+            parent_block = next;
+        }
+    }
+
+    std::pair<int8_t, LinkedListIt> find_free_index(LinkedListIt parent) const
+    {
+        for (int8_t jump_index = 1; jump_index < Constants::num_jump_distances; ++jump_index)
+        {
+            size_t index = hash_policy.keep_in_range(parent.index + Constants::jump_distances[jump_index], num_slots_minus_one);
+            BlockPointer block = entries + index / BlockSize;
+            if (block->control_bytes[index % BlockSize] == Constants::magic_for_empty)
+                return { jump_index, { index, block } };
+        }
+        return { 0, {} };
+    }
+
+    void grow()
+    {
+        rehash(std::max(size_t(10), 2 * bucket_count()));
+    }
+
+    size_t calculate_memory_requirement(size_t num_blocks)
+    {
+        size_t memory_required = sizeof(BlockType) * num_blocks;
+        memory_required += BlockSize; // for metadata of past-the-end pointer
+        return memory_required;
+    }
+
+    void deallocate_data(BlockPointer begin, size_t num_slots_minus_one)
+    {
+        if (begin == BlockType::empty_block())
+            return;
+
+        ++num_slots_minus_one;
+        size_t num_blocks = num_slots_minus_one / BlockSize;
+        if (num_slots_minus_one % BlockSize)
+            ++num_blocks;
+        size_t memory = calculate_memory_requirement(num_blocks);
+        unsigned char * as_byte_pointer = reinterpret_cast<unsigned char *>(begin);
+        AllocatorTraits::deallocate(*this, typename AllocatorTraits::pointer(as_byte_pointer), memory);
+    }
+
+    void reset_to_empty_state()
+    {
+        deallocate_data(entries, num_slots_minus_one);
+        entries = BlockType::empty_block();
+        num_slots_minus_one = 0;
+        hash_policy.reset();
+    }
+
+    template<typename U>
+    size_t hash_object(const U & key)
+    {
+        return static_cast<Hasher &>(*this)(key);
+    }
+    template<typename U>
+    size_t hash_object(const U & key) const
+    {
+        return static_cast<const Hasher &>(*this)(key);
+    }
+    template<typename L, typename R>
+    bool compares_equal(const L & lhs, const R & rhs)
+    {
+        return static_cast<Equal &>(*this)(lhs, rhs);
+    }
+
+    struct convertible_to_iterator
+    {
+        BlockPointer it;
+        size_t index;
+
+        operator iterator()
+        {
+            if (it->control_bytes[index % BlockSize] == Constants::magic_for_empty)
+                return ++iterator{it, index};
+            else
+                return { it, index };
+        }
+        operator const_iterator()
+        {
+            if (it->control_bytes[index % BlockSize] == Constants::magic_for_empty)
+                return ++iterator{it, index};
+            else
+                return { it, index };
+        }
+    };
+};
+template<typename T, typename Enable = void>
+struct AlignmentOr8Bytes
+{
+    static constexpr size_t value = 8;
+};
+template<typename T>
+struct AlignmentOr8Bytes<T, typename std::enable_if<alignof(T) >= 1>::type>
+{
+    static constexpr size_t value = alignof(T);
+};
+template<typename... Args>
+struct CalculateBytellBlockSize;
+template<typename First, typename... More>
+struct CalculateBytellBlockSize<First, More...>
+{
+    static constexpr size_t this_value = AlignmentOr8Bytes<First>::value;
+    static constexpr size_t base_value = CalculateBytellBlockSize<More...>::value;
+    static constexpr size_t value = this_value > base_value ? this_value : base_value;
+};
+template<>
+struct CalculateBytellBlockSize<>
+{
+    static constexpr size_t value = 8;
+};
+}
+
+template<typename K, typename V, typename H = std::hash<K>, typename E = std::equal_to<K>, typename A = std::allocator<std::pair<K, V> > >
+class bytell_hash_map
+        : public detailv8::sherwood_v8_table
+        <
+            std::pair<K, V>,
+            K,
+            H,
+            detailv8::KeyOrValueHasher<K, std::pair<K, V>, H>,
+            E,
+            detailv8::KeyOrValueEquality<K, std::pair<K, V>, E>,
+            A,
+            typename std::allocator_traits<A>::template rebind_alloc<unsigned char>,
+            detailv8::CalculateBytellBlockSize<K, V>::value
+        >
+{
+    using Table = detailv8::sherwood_v8_table
+    <
+        std::pair<K, V>,
+        K,
+        H,
+        detailv8::KeyOrValueHasher<K, std::pair<K, V>, H>,
+        E,
+        detailv8::KeyOrValueEquality<K, std::pair<K, V>, E>,
+        A,
+        typename std::allocator_traits<A>::template rebind_alloc<unsigned char>,
+        detailv8::CalculateBytellBlockSize<K, V>::value
+    >;
+public:
+
+    using key_type = K;
+    using mapped_type = V;
+
+    using Table::Table;
+    bytell_hash_map()
+    {
+    }
+
+    inline V & operator[](const K & key)
+    {
+        return emplace(key, convertible_to_value()).first->second;
+    }
+    inline V & operator[](K && key)
+    {
+        return emplace(std::move(key), convertible_to_value()).first->second;
+    }
+    V & at(const K & key)
+    {
+        auto found = this->find(key);
+        if (found == this->end())
+            throw std::out_of_range("Argument passed to at() was not in the map.");
+        return found->second;
+    }
+    const V & at(const K & key) const
+    {
+        auto found = this->find(key);
+        if (found == this->end())
+            throw std::out_of_range("Argument passed to at() was not in the map.");
+        return found->second;
+    }
+
+    using Table::emplace;
+    std::pair<typename Table::iterator, bool> emplace()
+    {
+        return emplace(key_type(), convertible_to_value());
+    }
+    template<typename M>
+    std::pair<typename Table::iterator, bool> insert_or_assign(const key_type & key, M && m)
+    {
+        auto emplace_result = emplace(key, std::forward<M>(m));
+        if (!emplace_result.second)
+            emplace_result.first->second = std::forward<M>(m);
+        return emplace_result;
+    }
+    template<typename M>
+    std::pair<typename Table::iterator, bool> insert_or_assign(key_type && key, M && m)
+    {
+        auto emplace_result = emplace(std::move(key), std::forward<M>(m));
+        if (!emplace_result.second)
+            emplace_result.first->second = std::forward<M>(m);
+        return emplace_result;
+    }
+    template<typename M>
+    typename Table::iterator insert_or_assign(typename Table::const_iterator, const key_type & key, M && m)
+    {
+        return insert_or_assign(key, std::forward<M>(m)).first;
+    }
+    template<typename M>
+    typename Table::iterator insert_or_assign(typename Table::const_iterator, key_type && key, M && m)
+    {
+        return insert_or_assign(std::move(key), std::forward<M>(m)).first;
+    }
+
+    friend bool operator==(const bytell_hash_map & lhs, const bytell_hash_map & rhs)
+    {
+        if (lhs.size() != rhs.size())
+            return false;
+        for (const typename Table::value_type & value : lhs)
+        {
+            auto found = rhs.find(value.first);
+            if (found == rhs.end())
+                return false;
+            else if (value.second != found->second)
+                return false;
+        }
+        return true;
+    }
+    friend bool operator!=(const bytell_hash_map & lhs, const bytell_hash_map & rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+private:
+    struct convertible_to_value
+    {
+        operator V() const
+        {
+            return V();
+        }
+    };
+};
+
+template<typename T, typename H = std::hash<T>, typename E = std::equal_to<T>, typename A = std::allocator<T> >
+class bytell_hash_set
+        : public detailv8::sherwood_v8_table
+        <
+            T,
+            T,
+            H,
+            detailv8::functor_storage<size_t, H>,
+            E,
+            detailv8::functor_storage<bool, E>,
+            A,
+            typename std::allocator_traits<A>::template rebind_alloc<unsigned char>,
+            detailv8::CalculateBytellBlockSize<T>::value
+        >
+{
+    using Table = detailv8::sherwood_v8_table
+    <
+        T,
+        T,
+        H,
+        detailv8::functor_storage<size_t, H>,
+        E,
+        detailv8::functor_storage<bool, E>,
+        A,
+        typename std::allocator_traits<A>::template rebind_alloc<unsigned char>,
+        detailv8::CalculateBytellBlockSize<T>::value
+    >;
+public:
+
+    using key_type = T;
+
+    using Table::Table;
+    bytell_hash_set()
+    {
+    }
+
+    template<typename... Args>
+    std::pair<typename Table::iterator, bool> emplace(Args &&... args)
+    {
+        return Table::emplace(T(std::forward<Args>(args)...));
+    }
+    std::pair<typename Table::iterator, bool> emplace(const key_type & arg)
+    {
+        return Table::emplace(arg);
+    }
+    std::pair<typename Table::iterator, bool> emplace(key_type & arg)
+    {
+        return Table::emplace(arg);
+    }
+    std::pair<typename Table::iterator, bool> emplace(const key_type && arg)
+    {
+        return Table::emplace(std::move(arg));
+    }
+    std::pair<typename Table::iterator, bool> emplace(key_type && arg)
+    {
+        return Table::emplace(std::move(arg));
+    }
+
+    friend bool operator==(const bytell_hash_set & lhs, const bytell_hash_set & rhs)
+    {
+        if (lhs.size() != rhs.size())
+            return false;
+        for (const T & value : lhs)
+        {
+            if (rhs.find(value) == rhs.end())
+                return false;
+        }
+        return true;
+    }
+    friend bool operator!=(const bytell_hash_set & lhs, const bytell_hash_set & rhs)
+    {
+        return !(lhs == rhs);
+    }
+};
+
+} // end namespace ska
diff --git a/benchmarks/others/flat_hash_map.hpp b/benchmarks/others/flat_hash_map.hpp
new file mode 100644
index 00000000..ea20af93
--- /dev/null
+++ b/benchmarks/others/flat_hash_map.hpp
@@ -0,0 +1,1496 @@
+//          Copyright Malte Skarupke 2017.
+// Distributed under the Boost Software License, Version 1.0.
+//    (See http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma once
+
+#include <cstdint>
+#include <cstddef>
+#include <functional>
+#include <cmath>
+#include <algorithm>
+#include <iterator>
+#include <utility>
+#include <type_traits>
+
+#ifdef _MSC_VER
+#define SKA_NOINLINE(...) __declspec(noinline) __VA_ARGS__
+#else
+#define SKA_NOINLINE(...) __VA_ARGS__ __attribute__((noinline))
+#endif
+
+namespace ska
+{
+struct prime_number_hash_policy;
+struct power_of_two_hash_policy;
+struct fibonacci_hash_policy;
+
+namespace detailv3
+{
+template<typename Result, typename Functor>
+struct functor_storage : Functor
+{
+    functor_storage() = default;
+    functor_storage(const Functor & functor)
+        : Functor(functor)
+    {
+    }
+    template<typename... Args>
+    Result operator()(Args &&... args)
+    {
+        return static_cast<Functor &>(*this)(std::forward<Args>(args)...);
+    }
+    template<typename... Args>
+    Result operator()(Args &&... args) const
+    {
+        return static_cast<const Functor &>(*this)(std::forward<Args>(args)...);
+    }
+};
+template<typename Result, typename... Args>
+struct functor_storage<Result, Result (*)(Args...)>
+{
+    typedef Result (*function_ptr)(Args...);
+    function_ptr function;
+    functor_storage(function_ptr function)
+        : function(function)
+    {
+    }
+    Result operator()(Args... args) const
+    {
+        return function(std::forward<Args>(args)...);
+    }
+    operator function_ptr &()
+    {
+        return function;
+    }
+    operator const function_ptr &()
+    {
+        return function;
+    }
+};
+template<typename key_type, typename value_type, typename hasher>
+struct KeyOrValueHasher : functor_storage<size_t, hasher>
+{
+    typedef functor_storage<size_t, hasher> hasher_storage;
+    KeyOrValueHasher() = default;
+    KeyOrValueHasher(const hasher & hash)
+        : hasher_storage(hash)
+    {
+    }
+    size_t operator()(const key_type & key)
+    {
+        return static_cast<hasher_storage &>(*this)(key);
+    }
+    size_t operator()(const key_type & key) const
+    {
+        return static_cast<const hasher_storage &>(*this)(key);
+    }
+    size_t operator()(const value_type & value)
+    {
+        return static_cast<hasher_storage &>(*this)(value.first);
+    }
+    size_t operator()(const value_type & value) const
+    {
+        return static_cast<const hasher_storage &>(*this)(value.first);
+    }
+    template<typename F, typename S>
+    size_t operator()(const std::pair<F, S> & value)
+    {
+        return static_cast<hasher_storage &>(*this)(value.first);
+    }
+    template<typename F, typename S>
+    size_t operator()(const std::pair<F, S> & value) const
+    {
+        return static_cast<const hasher_storage &>(*this)(value.first);
+    }
+};
+template<typename key_type, typename value_type, typename key_equal>
+struct KeyOrValueEquality : functor_storage<bool, key_equal>
+{
+    typedef functor_storage<bool, key_equal> equality_storage;
+    KeyOrValueEquality() = default;
+    KeyOrValueEquality(const key_equal & equality)
+        : equality_storage(equality)
+    {
+    }
+    bool operator()(const key_type & lhs, const key_type & rhs)
+    {
+        return static_cast<equality_storage &>(*this)(lhs, rhs);
+    }
+    bool operator()(const key_type & lhs, const value_type & rhs)
+    {
+        return static_cast<equality_storage &>(*this)(lhs, rhs.first);
+    }
+    bool operator()(const value_type & lhs, const key_type & rhs)
+    {
+        return static_cast<equality_storage &>(*this)(lhs.first, rhs);
+    }
+    bool operator()(const value_type & lhs, const value_type & rhs)
+    {
+        return static_cast<equality_storage &>(*this)(lhs.first, rhs.first);
+    }
+    template<typename F, typename S>
+    bool operator()(const key_type & lhs, const std::pair<F, S> & rhs)
+    {
+        return static_cast<equality_storage &>(*this)(lhs, rhs.first);
+    }
+    template<typename F, typename S>
+    bool operator()(const std::pair<F, S> & lhs, const key_type & rhs)
+    {
+        return static_cast<equality_storage &>(*this)(lhs.first, rhs);
+    }
+    template<typename F, typename S>
+    bool operator()(const value_type & lhs, const std::pair<F, S> & rhs)
+    {
+        return static_cast<equality_storage &>(*this)(lhs.first, rhs.first);
+    }
+    template<typename F, typename S>
+    bool operator()(const std::pair<F, S> & lhs, const value_type & rhs)
+    {
+        return static_cast<equality_storage &>(*this)(lhs.first, rhs.first);
+    }
+    template<typename FL, typename SL, typename FR, typename SR>
+    bool operator()(const std::pair<FL, SL> & lhs, const std::pair<FR, SR> & rhs)
+    {
+        return static_cast<equality_storage &>(*this)(lhs.first, rhs.first);
+    }
+};
+static constexpr int8_t min_lookups = 4;
+template<typename T>
+struct sherwood_v3_entry
+{
+    sherwood_v3_entry()
+    {
+    }
+    sherwood_v3_entry(int8_t distance_from_desired)
+        : distance_from_desired(distance_from_desired)
+    {
+    }
+    ~sherwood_v3_entry()
+    {
+    }
+    static sherwood_v3_entry * empty_default_table()
+    {
+        static sherwood_v3_entry result[min_lookups] = { {}, {}, {}, {special_end_value} };
+        return result;
+    }
+
+    bool has_value() const
+    {
+        return distance_from_desired >= 0;
+    }
+    bool is_empty() const
+    {
+        return distance_from_desired < 0;
+    }
+    bool is_at_desired_position() const
+    {
+        return distance_from_desired <= 0;
+    }
+    template<typename... Args>
+    void emplace(int8_t distance, Args &&... args)
+    {
+        new (std::addressof(value)) T(std::forward<Args>(args)...);
+        distance_from_desired = distance;
+    }
+
+    void destroy_value()
+    {
+        value.~T();
+        distance_from_desired = -1;
+    }
+
+    int8_t distance_from_desired = -1;
+    static constexpr int8_t special_end_value = 0;
+    union { T value; };
+};
+
+inline int8_t log2(size_t value)
+{
+    static constexpr int8_t table[64] =
+    {
+        63,  0, 58,  1, 59, 47, 53,  2,
+        60, 39, 48, 27, 54, 33, 42,  3,
+        61, 51, 37, 40, 49, 18, 28, 20,
+        55, 30, 34, 11, 43, 14, 22,  4,
+        62, 57, 46, 52, 38, 26, 32, 41,
+        50, 36, 17, 19, 29, 10, 13, 21,
+        56, 45, 25, 31, 35, 16,  9, 12,
+        44, 24, 15,  8, 23,  7,  6,  5
+    };
+    value |= value >> 1;
+    value |= value >> 2;
+    value |= value >> 4;
+    value |= value >> 8;
+    value |= value >> 16;
+    value |= value >> 32;
+    return table[((value - (value >> 1)) * 0x07EDD5E59A4E28C2) >> 58];
+}
+
+template<typename T, bool>
+struct AssignIfTrue
+{
+    void operator()(T & lhs, const T & rhs)
+    {
+        lhs = rhs;
+    }
+    void operator()(T & lhs, T && rhs)
+    {
+        lhs = std::move(rhs);
+    }
+};
+template<typename T>
+struct AssignIfTrue<T, false>
+{
+    void operator()(T &, const T &)
+    {
+    }
+    void operator()(T &, T &&)
+    {
+    }
+};
+
+inline size_t next_power_of_two(size_t i)
+{
+    --i;
+    i |= i >> 1;
+    i |= i >> 2;
+    i |= i >> 4;
+    i |= i >> 8;
+    i |= i >> 16;
+    i |= i >> 32;
+    ++i;
+    return i;
+}
+
+template<typename...> using void_t = void;
+
+template<typename T, typename = void>
+struct HashPolicySelector
+{
+    typedef fibonacci_hash_policy type;
+};
+template<typename T>
+struct HashPolicySelector<T, void_t<typename T::hash_policy>>
+{
+    typedef typename T::hash_policy type;
+};
+
+template<typename T, typename FindKey, typename ArgumentHash, typename Hasher, typename ArgumentEqual, typename Equal, typename ArgumentAlloc, typename EntryAlloc>
+class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal
+{
+    using Entry = detailv3::sherwood_v3_entry<T>;
+    using AllocatorTraits = std::allocator_traits<EntryAlloc>;
+    using EntryPointer = typename AllocatorTraits::pointer;
+    struct convertible_to_iterator;
+
+public:
+
+    using value_type = T;
+    using size_type = size_t;
+    using difference_type = std::ptrdiff_t;
+    using hasher = ArgumentHash;
+    using key_equal = ArgumentEqual;
+    using allocator_type = EntryAlloc;
+    using reference = value_type &;
+    using const_reference = const value_type &;
+    using pointer = value_type *;
+    using const_pointer = const value_type *;
+
+    sherwood_v3_table()
+    {
+    }
+    explicit sherwood_v3_table(size_type bucket_count, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc())
+        : EntryAlloc(alloc), Hasher(hash), Equal(equal)
+    {
+        rehash(bucket_count);
+    }
+    sherwood_v3_table(size_type bucket_count, const ArgumentAlloc & alloc)
+        : sherwood_v3_table(bucket_count, ArgumentHash(), ArgumentEqual(), alloc)
+    {
+    }
+    sherwood_v3_table(size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc)
+        : sherwood_v3_table(bucket_count, hash, ArgumentEqual(), alloc)
+    {
+    }
+    explicit sherwood_v3_table(const ArgumentAlloc & alloc)
+        : EntryAlloc(alloc)
+    {
+    }
+    template<typename It>
+    sherwood_v3_table(It first, It last, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc())
+        : sherwood_v3_table(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    template<typename It>
+    sherwood_v3_table(It first, It last, size_type bucket_count, const ArgumentAlloc & alloc)
+        : sherwood_v3_table(first, last, bucket_count, ArgumentHash(), ArgumentEqual(), alloc)
+    {
+    }
+    template<typename It>
+    sherwood_v3_table(It first, It last, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc)
+        : sherwood_v3_table(first, last, bucket_count, hash, ArgumentEqual(), alloc)
+    {
+    }
+    sherwood_v3_table(std::initializer_list<T> il, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc())
+        : sherwood_v3_table(bucket_count, hash, equal, alloc)
+    {
+        if (bucket_count == 0)
+            rehash(il.size());
+        insert(il.begin(), il.end());
+    }
+    sherwood_v3_table(std::initializer_list<T> il, size_type bucket_count, const ArgumentAlloc & alloc)
+        : sherwood_v3_table(il, bucket_count, ArgumentHash(), ArgumentEqual(), alloc)
+    {
+    }
+    sherwood_v3_table(std::initializer_list<T> il, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc)
+        : sherwood_v3_table(il, bucket_count, hash, ArgumentEqual(), alloc)
+    {
+    }
+    sherwood_v3_table(const sherwood_v3_table & other)
+        : sherwood_v3_table(other, AllocatorTraits::select_on_container_copy_construction(other.get_allocator()))
+    {
+    }
+    sherwood_v3_table(const sherwood_v3_table & other, const ArgumentAlloc & alloc)
+        : EntryAlloc(alloc), Hasher(other), Equal(other), _max_load_factor(other._max_load_factor)
+    {
+        rehash_for_other_container(other);
+        try
+        {
+            insert(other.begin(), other.end());
+        }
+        catch(...)
+        {
+            clear();
+            deallocate_data(entries, num_slots_minus_one, max_lookups);
+            throw;
+        }
+    }
+    sherwood_v3_table(sherwood_v3_table && other) noexcept
+        : EntryAlloc(std::move(other)), Hasher(std::move(other)), Equal(std::move(other))
+    {
+        swap_pointers(other);
+    }
+    sherwood_v3_table(sherwood_v3_table && other, const ArgumentAlloc & alloc) noexcept
+        : EntryAlloc(alloc), Hasher(std::move(other)), Equal(std::move(other))
+    {
+        swap_pointers(other);
+    }
+    sherwood_v3_table & operator=(const sherwood_v3_table & other)
+    {
+        if (this == std::addressof(other))
+            return *this;
+
+        clear();
+        if (AllocatorTraits::propagate_on_container_copy_assignment::value)
+        {
+            if (static_cast<EntryAlloc &>(*this) != static_cast<const EntryAlloc &>(other))
+            {
+                reset_to_empty_state();
+            }
+            AssignIfTrue<EntryAlloc, AllocatorTraits::propagate_on_container_copy_assignment::value>()(*this, other);
+        }
+        _max_load_factor = other._max_load_factor;
+        static_cast<Hasher &>(*this) = other;
+        static_cast<Equal &>(*this) = other;
+        rehash_for_other_container(other);
+        insert(other.begin(), other.end());
+        return *this;
+    }
+    sherwood_v3_table & operator=(sherwood_v3_table && other) noexcept
+    {
+        if (this == std::addressof(other))
+            return *this;
+        else if (AllocatorTraits::propagate_on_container_move_assignment::value)
+        {
+            clear();
+            reset_to_empty_state();
+            AssignIfTrue<EntryAlloc, AllocatorTraits::propagate_on_container_move_assignment::value>()(*this, std::move(other));
+            swap_pointers(other);
+        }
+        else if (static_cast<EntryAlloc &>(*this) == static_cast<EntryAlloc &>(other))
+        {
+            swap_pointers(other);
+        }
+        else
+        {
+            clear();
+            _max_load_factor = other._max_load_factor;
+            rehash_for_other_container(other);
+            for (T & elem : other)
+                emplace(std::move(elem));
+            other.clear();
+        }
+        static_cast<Hasher &>(*this) = std::move(other);
+        static_cast<Equal &>(*this) = std::move(other);
+        return *this;
+    }
+    ~sherwood_v3_table()
+    {
+        clear();
+        deallocate_data(entries, num_slots_minus_one, max_lookups);
+    }
+
+    const allocator_type & get_allocator() const
+    {
+        return static_cast<const allocator_type &>(*this);
+    }
+    const ArgumentEqual & key_eq() const
+    {
+        return static_cast<const ArgumentEqual &>(*this);
+    }
+    const ArgumentHash & hash_function() const
+    {
+        return static_cast<const ArgumentHash &>(*this);
+    }
+
+    template<typename ValueType>
+    struct templated_iterator
+    {
+        templated_iterator() = default;
+        templated_iterator(EntryPointer current)
+            : current(current)
+        {
+        }
+        EntryPointer current = EntryPointer();
+
+        using iterator_category = std::forward_iterator_tag;
+        using value_type = ValueType;
+        using difference_type = ptrdiff_t;
+        using pointer = ValueType *;
+        using reference = ValueType &;
+
+        friend bool operator==(const templated_iterator & lhs, const templated_iterator & rhs)
+        {
+            return lhs.current == rhs.current;
+        }
+        friend bool operator!=(const templated_iterator & lhs, const templated_iterator & rhs)
+        {
+            return !(lhs == rhs);
+        }
+
+        templated_iterator & operator++()
+        {
+            do
+            {
+                ++current;
+            }
+            while(current->is_empty());
+            return *this;
+        }
+        templated_iterator operator++(int)
+        {
+            templated_iterator copy(*this);
+            ++*this;
+            return copy;
+        }
+
+        ValueType & operator*() const
+        {
+            return current->value;
+        }
+        ValueType * operator->() const
+        {
+            return std::addressof(current->value);
+        }
+
+        operator templated_iterator<const value_type>() const
+        {
+            return { current };
+        }
+    };
+    using iterator = templated_iterator<value_type>;
+    using const_iterator = templated_iterator<const value_type>;
+
+    iterator begin()
+    {
+        for (EntryPointer it = entries;; ++it)
+        {
+            if (it->has_value())
+                return { it };
+        }
+    }
+    const_iterator begin() const
+    {
+        for (EntryPointer it = entries;; ++it)
+        {
+            if (it->has_value())
+                return { it };
+        }
+    }
+    const_iterator cbegin() const
+    {
+        return begin();
+    }
+    iterator end()
+    {
+        return { entries + static_cast<ptrdiff_t>(num_slots_minus_one + max_lookups) };
+    }
+    const_iterator end() const
+    {
+        return { entries + static_cast<ptrdiff_t>(num_slots_minus_one + max_lookups) };
+    }
+    const_iterator cend() const
+    {
+        return end();
+    }
+
+    iterator find(const FindKey & key)
+    {
+        size_t index = hash_policy.index_for_hash(hash_object(key), num_slots_minus_one);
+        EntryPointer it = entries + ptrdiff_t(index);
+        for (int8_t distance = 0; it->distance_from_desired >= distance; ++distance, ++it)
+        {
+            if (compares_equal(key, it->value))
+                return { it };
+        }
+        return end();
+    }
+    const_iterator find(const FindKey & key) const
+    {
+        return const_cast<sherwood_v3_table *>(this)->find(key);
+    }
+    size_t count(const FindKey & key) const
+    {
+        return find(key) == end() ? 0 : 1;
+    }
+    std::pair<iterator, iterator> equal_range(const FindKey & key)
+    {
+        iterator found = find(key);
+        if (found == end())
+            return { found, found };
+        else
+            return { found, std::next(found) };
+    }
+    std::pair<const_iterator, const_iterator> equal_range(const FindKey & key) const
+    {
+        const_iterator found = find(key);
+        if (found == end())
+            return { found, found };
+        else
+            return { found, std::next(found) };
+    }
+
+    template<typename Key, typename... Args>
+    std::pair<iterator, bool> emplace(Key && key, Args &&... args)
+    {
+        size_t index = hash_policy.index_for_hash(hash_object(key), num_slots_minus_one);
+        EntryPointer current_entry = entries + ptrdiff_t(index);
+        int8_t distance_from_desired = 0;
+        for (; current_entry->distance_from_desired >= distance_from_desired; ++current_entry, ++distance_from_desired)
+        {
+            if (compares_equal(key, current_entry->value))
+                return { { current_entry }, false };
+        }
+        return emplace_new_key(distance_from_desired, current_entry, std::forward<Key>(key), std::forward<Args>(args)...);
+    }
+
+    std::pair<iterator, bool> insert(const value_type & value)
+    {
+        return emplace(value);
+    }
+    std::pair<iterator, bool> insert(value_type && value)
+    {
+        return emplace(std::move(value));
+    }
+    template<typename... Args>
+    iterator emplace_hint(const_iterator, Args &&... args)
+    {
+        return emplace(std::forward<Args>(args)...).first;
+    }
+    iterator insert(const_iterator, const value_type & value)
+    {
+        return emplace(value).first;
+    }
+    iterator insert(const_iterator, value_type && value)
+    {
+        return emplace(std::move(value)).first;
+    }
+
+    template<typename It>
+    void insert(It begin, It end)
+    {
+        for (; begin != end; ++begin)
+        {
+            emplace(*begin);
+        }
+    }
+    void insert(std::initializer_list<value_type> il)
+    {
+        insert(il.begin(), il.end());
+    }
+
+    void rehash(size_t num_buckets)
+    {
+        num_buckets = std::max(num_buckets, static_cast<size_t>(std::ceil(num_elements / static_cast<double>(_max_load_factor))));
+        if (num_buckets == 0)
+        {
+            reset_to_empty_state();
+            return;
+        }
+        auto new_prime_index = hash_policy.next_size_over(num_buckets);
+        if (num_buckets == bucket_count())
+            return;
+        int8_t new_max_lookups = compute_max_lookups(num_buckets);
+        EntryPointer new_buckets(AllocatorTraits::allocate(*this, num_buckets + new_max_lookups));
+        EntryPointer special_end_item = new_buckets + static_cast<ptrdiff_t>(num_buckets + new_max_lookups - 1);
+        for (EntryPointer it = new_buckets; it != special_end_item; ++it)
+            it->distance_from_desired = -1;
+        special_end_item->distance_from_desired = Entry::special_end_value;
+        std::swap(entries, new_buckets);
+        std::swap(num_slots_minus_one, num_buckets);
+        --num_slots_minus_one;
+        hash_policy.commit(new_prime_index);
+        int8_t old_max_lookups = max_lookups;
+        max_lookups = new_max_lookups;
+        num_elements = 0;
+        for (EntryPointer it = new_buckets, end = it + static_cast<ptrdiff_t>(num_buckets + old_max_lookups); it != end; ++it)
+        {
+            if (it->has_value())
+            {
+                emplace(std::move(it->value));
+                it->destroy_value();
+            }
+        }
+        deallocate_data(new_buckets, num_buckets, old_max_lookups);
+    }
+
+    void reserve(size_t num_elements)
+    {
+        size_t required_buckets = num_buckets_for_reserve(num_elements);
+        if (required_buckets > bucket_count())
+            rehash(required_buckets);
+    }
+
+    // the return value is a type that can be converted to an iterator
+    // the reason for doing this is that it's not free to find the
+    // iterator pointing at the next element. if you care about the
+    // next iterator, turn the return value into an iterator
+    convertible_to_iterator erase(const_iterator to_erase)
+    {
+        EntryPointer current = to_erase.current;
+        current->destroy_value();
+        --num_elements;
+        for (EntryPointer next = current + ptrdiff_t(1); !next->is_at_desired_position(); ++current, ++next)
+        {
+            current->emplace(next->distance_from_desired - 1, std::move(next->value));
+            next->destroy_value();
+        }
+        return { to_erase.current };
+    }
+
+    iterator erase(const_iterator begin_it, const_iterator end_it)
+    {
+        if (begin_it == end_it)
+            return { begin_it.current };
+        for (EntryPointer it = begin_it.current, end = end_it.current; it != end; ++it)
+        {
+            if (it->has_value())
+            {
+                it->destroy_value();
+                --num_elements;
+            }
+        }
+        if (end_it == this->end())
+            return this->end();
+        ptrdiff_t num_to_move = std::min(static_cast<ptrdiff_t>(end_it.current->distance_from_desired), end_it.current - begin_it.current);
+        EntryPointer to_return = end_it.current - num_to_move;
+        for (EntryPointer it = end_it.current; !it->is_at_desired_position();)
+        {
+            EntryPointer target = it - num_to_move;
+            target->emplace(it->distance_from_desired - num_to_move, std::move(it->value));
+            it->destroy_value();
+            ++it;
+            num_to_move = std::min(static_cast<ptrdiff_t>(it->distance_from_desired), num_to_move);
+        }
+        return { to_return };
+    }
+
+    size_t erase(const FindKey & key)
+    {
+        auto found = find(key);
+        if (found == end())
+            return 0;
+        else
+        {
+            erase(found);
+            return 1;
+        }
+    }
+
+    void clear()
+    {
+        for (EntryPointer it = entries, end = it + static_cast<ptrdiff_t>(num_slots_minus_one + max_lookups); it != end; ++it)
+        {
+            if (it->has_value())
+                it->destroy_value();
+        }
+        num_elements = 0;
+    }
+
+    void shrink_to_fit()
+    {
+        rehash_for_other_container(*this);
+    }
+
+    void swap(sherwood_v3_table & other)
+    {
+        using std::swap;
+        swap_pointers(other);
+        swap(static_cast<ArgumentHash &>(*this), static_cast<ArgumentHash &>(other));
+        swap(static_cast<ArgumentEqual &>(*this), static_cast<ArgumentEqual &>(other));
+        if (AllocatorTraits::propagate_on_container_swap::value)
+            swap(static_cast<EntryAlloc &>(*this), static_cast<EntryAlloc &>(other));
+    }
+
+    size_t size() const
+    {
+        return num_elements;
+    }
+    size_t max_size() const
+    {
+        return (AllocatorTraits::max_size(*this)) / sizeof(Entry);
+    }
+    size_t bucket_count() const
+    {
+        return num_slots_minus_one ? num_slots_minus_one + 1 : 0;
+    }
+    size_type max_bucket_count() const
+    {
+        return (AllocatorTraits::max_size(*this) - min_lookups) / sizeof(Entry);
+    }
+    size_t bucket(const FindKey & key) const
+    {
+        return hash_policy.index_for_hash(hash_object(key), num_slots_minus_one);
+    }
+    float load_factor() const
+    {
+        size_t buckets = bucket_count();
+        if (buckets)
+            return static_cast<float>(num_elements) / bucket_count();
+        else
+            return 0;
+    }
+    void max_load_factor(float value)
+    {
+        _max_load_factor = value;
+    }
+    float max_load_factor() const
+    {
+        return _max_load_factor;
+    }
+
+    bool empty() const
+    {
+        return num_elements == 0;
+    }
+
+private:
+    EntryPointer entries = Entry::empty_default_table();
+    size_t num_slots_minus_one = 0;
+    typename HashPolicySelector<ArgumentHash>::type hash_policy;
+    int8_t max_lookups = detailv3::min_lookups - 1;
+    float _max_load_factor = 0.5f;
+    size_t num_elements = 0;
+
+    static int8_t compute_max_lookups(size_t num_buckets)
+    {
+        int8_t desired = detailv3::log2(num_buckets);
+        return std::max(detailv3::min_lookups, desired);
+    }
+
+    size_t num_buckets_for_reserve(size_t num_elements) const
+    {
+        return static_cast<size_t>(std::ceil(num_elements / std::min(0.5, static_cast<double>(_max_load_factor))));
+    }
+    void rehash_for_other_container(const sherwood_v3_table & other)
+    {
+        rehash(std::min(num_buckets_for_reserve(other.size()), other.bucket_count()));
+    }
+
+    void swap_pointers(sherwood_v3_table & other)
+    {
+        using std::swap;
+        swap(hash_policy, other.hash_policy);
+        swap(entries, other.entries);
+        swap(num_slots_minus_one, other.num_slots_minus_one);
+        swap(num_elements, other.num_elements);
+        swap(max_lookups, other.max_lookups);
+        swap(_max_load_factor, other._max_load_factor);
+    }
+
+    template<typename Key, typename... Args>
+    SKA_NOINLINE(std::pair<iterator, bool>) emplace_new_key(int8_t distance_from_desired, EntryPointer current_entry, Key && key, Args &&... args)
+    {
+        using std::swap;
+        if (num_slots_minus_one == 0 || distance_from_desired == max_lookups || num_elements + 1 > (num_slots_minus_one + 1) * static_cast<double>(_max_load_factor))
+        {
+            grow();
+            return emplace(std::forward<Key>(key), std::forward<Args>(args)...);
+        }
+        else if (current_entry->is_empty())
+        {
+            current_entry->emplace(distance_from_desired, std::forward<Key>(key), std::forward<Args>(args)...);
+            ++num_elements;
+            return { { current_entry }, true };
+        }
+        value_type to_insert(std::forward<Key>(key), std::forward<Args>(args)...);
+        swap(distance_from_desired, current_entry->distance_from_desired);
+        swap(to_insert, current_entry->value);
+        iterator result = { current_entry };
+        for (++distance_from_desired, ++current_entry;; ++current_entry)
+        {
+            if (current_entry->is_empty())
+            {
+                current_entry->emplace(distance_from_desired, std::move(to_insert));
+                ++num_elements;
+                return { result, true };
+            }
+            else if (current_entry->distance_from_desired < distance_from_desired)
+            {
+                swap(distance_from_desired, current_entry->distance_from_desired);
+                swap(to_insert, current_entry->value);
+                ++distance_from_desired;
+            }
+            else
+            {
+                ++distance_from_desired;
+                if (distance_from_desired == max_lookups)
+                {
+                    swap(to_insert, result.current->value);
+                    grow();
+                    return emplace(std::move(to_insert));
+                }
+            }
+        }
+    }
+
+    void grow()
+    {
+        rehash(std::max(size_t(4), 2 * bucket_count()));
+    }
+
+    void deallocate_data(EntryPointer begin, size_t num_slots_minus_one, int8_t max_lookups)
+    {
+        if (begin != Entry::empty_default_table())
+        {
+            AllocatorTraits::deallocate(*this, begin, num_slots_minus_one + max_lookups + 1);
+        }
+    }
+
+    void reset_to_empty_state()
+    {
+        deallocate_data(entries, num_slots_minus_one, max_lookups);
+        entries = Entry::empty_default_table();
+        num_slots_minus_one = 0;
+        hash_policy.reset();
+        max_lookups = detailv3::min_lookups - 1;
+    }
+
+    template<typename U>
+    size_t hash_object(const U & key)
+    {
+        return static_cast<Hasher &>(*this)(key);
+    }
+    template<typename U>
+    size_t hash_object(const U & key) const
+    {
+        return static_cast<const Hasher &>(*this)(key);
+    }
+    template<typename L, typename R>
+    bool compares_equal(const L & lhs, const R & rhs)
+    {
+        return static_cast<Equal &>(*this)(lhs, rhs);
+    }
+
+    struct convertible_to_iterator
+    {
+        EntryPointer it;
+
+        operator iterator()
+        {
+            if (it->has_value())
+                return { it };
+            else
+                return ++iterator{it};
+        }
+        operator const_iterator()
+        {
+            if (it->has_value())
+                return { it };
+            else
+                return ++const_iterator{it};
+        }
+    };
+
+};
+}
+
+struct prime_number_hash_policy
+{
+    static size_t mod0(size_t) { return 0llu; }
+    static size_t mod2(size_t hash) { return hash % 2llu; }
+    static size_t mod3(size_t hash) { return hash % 3llu; }
+    static size_t mod5(size_t hash) { return hash % 5llu; }
+    static size_t mod7(size_t hash) { return hash % 7llu; }
+    static size_t mod11(size_t hash) { return hash % 11llu; }
+    static size_t mod13(size_t hash) { return hash % 13llu; }
+    static size_t mod17(size_t hash) { return hash % 17llu; }
+    static size_t mod23(size_t hash) { return hash % 23llu; }
+    static size_t mod29(size_t hash) { return hash % 29llu; }
+    static size_t mod37(size_t hash) { return hash % 37llu; }
+    static size_t mod47(size_t hash) { return hash % 47llu; }
+    static size_t mod59(size_t hash) { return hash % 59llu; }
+    static size_t mod73(size_t hash) { return hash % 73llu; }
+    static size_t mod97(size_t hash) { return hash % 97llu; }
+    static size_t mod127(size_t hash) { return hash % 127llu; }
+    static size_t mod151(size_t hash) { return hash % 151llu; }
+    static size_t mod197(size_t hash) { return hash % 197llu; }
+    static size_t mod251(size_t hash) { return hash % 251llu; }
+    static size_t mod313(size_t hash) { return hash % 313llu; }
+    static size_t mod397(size_t hash) { return hash % 397llu; }
+    static size_t mod499(size_t hash) { return hash % 499llu; }
+    static size_t mod631(size_t hash) { return hash % 631llu; }
+    static size_t mod797(size_t hash) { return hash % 797llu; }
+    static size_t mod1009(size_t hash) { return hash % 1009llu; }
+    static size_t mod1259(size_t hash) { return hash % 1259llu; }
+    static size_t mod1597(size_t hash) { return hash % 1597llu; }
+    static size_t mod2011(size_t hash) { return hash % 2011llu; }
+    static size_t mod2539(size_t hash) { return hash % 2539llu; }
+    static size_t mod3203(size_t hash) { return hash % 3203llu; }
+    static size_t mod4027(size_t hash) { return hash % 4027llu; }
+    static size_t mod5087(size_t hash) { return hash % 5087llu; }
+    static size_t mod6421(size_t hash) { return hash % 6421llu; }
+    static size_t mod8089(size_t hash) { return hash % 8089llu; }
+    static size_t mod10193(size_t hash) { return hash % 10193llu; }
+    static size_t mod12853(size_t hash) { return hash % 12853llu; }
+    static size_t mod16193(size_t hash) { return hash % 16193llu; }
+    static size_t mod20399(size_t hash) { return hash % 20399llu; }
+    static size_t mod25717(size_t hash) { return hash % 25717llu; }
+    static size_t mod32401(size_t hash) { return hash % 32401llu; }
+    static size_t mod40823(size_t hash) { return hash % 40823llu; }
+    static size_t mod51437(size_t hash) { return hash % 51437llu; }
+    static size_t mod64811(size_t hash) { return hash % 64811llu; }
+    static size_t mod81649(size_t hash) { return hash % 81649llu; }
+    static size_t mod102877(size_t hash) { return hash % 102877llu; }
+    static size_t mod129607(size_t hash) { return hash % 129607llu; }
+    static size_t mod163307(size_t hash) { return hash % 163307llu; }
+    static size_t mod205759(size_t hash) { return hash % 205759llu; }
+    static size_t mod259229(size_t hash) { return hash % 259229llu; }
+    static size_t mod326617(size_t hash) { return hash % 326617llu; }
+    static size_t mod411527(size_t hash) { return hash % 411527llu; }
+    static size_t mod518509(size_t hash) { return hash % 518509llu; }
+    static size_t mod653267(size_t hash) { return hash % 653267llu; }
+    static size_t mod823117(size_t hash) { return hash % 823117llu; }
+    static size_t mod1037059(size_t hash) { return hash % 1037059llu; }
+    static size_t mod1306601(size_t hash) { return hash % 1306601llu; }
+    static size_t mod1646237(size_t hash) { return hash % 1646237llu; }
+    static size_t mod2074129(size_t hash) { return hash % 2074129llu; }
+    static size_t mod2613229(size_t hash) { return hash % 2613229llu; }
+    static size_t mod3292489(size_t hash) { return hash % 3292489llu; }
+    static size_t mod4148279(size_t hash) { return hash % 4148279llu; }
+    static size_t mod5226491(size_t hash) { return hash % 5226491llu; }
+    static size_t mod6584983(size_t hash) { return hash % 6584983llu; }
+    static size_t mod8296553(size_t hash) { return hash % 8296553llu; }
+    static size_t mod10453007(size_t hash) { return hash % 10453007llu; }
+    static size_t mod13169977(size_t hash) { return hash % 13169977llu; }
+    static size_t mod16593127(size_t hash) { return hash % 16593127llu; }
+    static size_t mod20906033(size_t hash) { return hash % 20906033llu; }
+    static size_t mod26339969(size_t hash) { return hash % 26339969llu; }
+    static size_t mod33186281(size_t hash) { return hash % 33186281llu; }
+    static size_t mod41812097(size_t hash) { return hash % 41812097llu; }
+    static size_t mod52679969(size_t hash) { return hash % 52679969llu; }
+    static size_t mod66372617(size_t hash) { return hash % 66372617llu; }
+    static size_t mod83624237(size_t hash) { return hash % 83624237llu; }
+    static size_t mod105359939(size_t hash) { return hash % 105359939llu; }
+    static size_t mod132745199(size_t hash) { return hash % 132745199llu; }
+    static size_t mod167248483(size_t hash) { return hash % 167248483llu; }
+    static size_t mod210719881(size_t hash) { return hash % 210719881llu; }
+    static size_t mod265490441(size_t hash) { return hash % 265490441llu; }
+    static size_t mod334496971(size_t hash) { return hash % 334496971llu; }
+    static size_t mod421439783(size_t hash) { return hash % 421439783llu; }
+    static size_t mod530980861(size_t hash) { return hash % 530980861llu; }
+    static size_t mod668993977(size_t hash) { return hash % 668993977llu; }
+    static size_t mod842879579(size_t hash) { return hash % 842879579llu; }
+    static size_t mod1061961721(size_t hash) { return hash % 1061961721llu; }
+    static size_t mod1337987929(size_t hash) { return hash % 1337987929llu; }
+    static size_t mod1685759167(size_t hash) { return hash % 1685759167llu; }
+    static size_t mod2123923447(size_t hash) { return hash % 2123923447llu; }
+    static size_t mod2675975881(size_t hash) { return hash % 2675975881llu; }
+    static size_t mod3371518343(size_t hash) { return hash % 3371518343llu; }
+    static size_t mod4247846927(size_t hash) { return hash % 4247846927llu; }
+    static size_t mod5351951779(size_t hash) { return hash % 5351951779llu; }
+    static size_t mod6743036717(size_t hash) { return hash % 6743036717llu; }
+    static size_t mod8495693897(size_t hash) { return hash % 8495693897llu; }
+    static size_t mod10703903591(size_t hash) { return hash % 10703903591llu; }
+    static size_t mod13486073473(size_t hash) { return hash % 13486073473llu; }
+    static size_t mod16991387857(size_t hash) { return hash % 16991387857llu; }
+    static size_t mod21407807219(size_t hash) { return hash % 21407807219llu; }
+    static size_t mod26972146961(size_t hash) { return hash % 26972146961llu; }
+    static size_t mod33982775741(size_t hash) { return hash % 33982775741llu; }
+    static size_t mod42815614441(size_t hash) { return hash % 42815614441llu; }
+    static size_t mod53944293929(size_t hash) { return hash % 53944293929llu; }
+    static size_t mod67965551447(size_t hash) { return hash % 67965551447llu; }
+    static size_t mod85631228929(size_t hash) { return hash % 85631228929llu; }
+    static size_t mod107888587883(size_t hash) { return hash % 107888587883llu; }
+    static size_t mod135931102921(size_t hash) { return hash % 135931102921llu; }
+    static size_t mod171262457903(size_t hash) { return hash % 171262457903llu; }
+    static size_t mod215777175787(size_t hash) { return hash % 215777175787llu; }
+    static size_t mod271862205833(size_t hash) { return hash % 271862205833llu; }
+    static size_t mod342524915839(size_t hash) { return hash % 342524915839llu; }
+    static size_t mod431554351609(size_t hash) { return hash % 431554351609llu; }
+    static size_t mod543724411781(size_t hash) { return hash % 543724411781llu; }
+    static size_t mod685049831731(size_t hash) { return hash % 685049831731llu; }
+    static size_t mod863108703229(size_t hash) { return hash % 863108703229llu; }
+    static size_t mod1087448823553(size_t hash) { return hash % 1087448823553llu; }
+    static size_t mod1370099663459(size_t hash) { return hash % 1370099663459llu; }
+    static size_t mod1726217406467(size_t hash) { return hash % 1726217406467llu; }
+    static size_t mod2174897647073(size_t hash) { return hash % 2174897647073llu; }
+    static size_t mod2740199326961(size_t hash) { return hash % 2740199326961llu; }
+    static size_t mod3452434812973(size_t hash) { return hash % 3452434812973llu; }
+    static size_t mod4349795294267(size_t hash) { return hash % 4349795294267llu; }
+    static size_t mod5480398654009(size_t hash) { return hash % 5480398654009llu; }
+    static size_t mod6904869625999(size_t hash) { return hash % 6904869625999llu; }
+    static size_t mod8699590588571(size_t hash) { return hash % 8699590588571llu; }
+    static size_t mod10960797308051(size_t hash) { return hash % 10960797308051llu; }
+    static size_t mod13809739252051(size_t hash) { return hash % 13809739252051llu; }
+    static size_t mod17399181177241(size_t hash) { return hash % 17399181177241llu; }
+    static size_t mod21921594616111(size_t hash) { return hash % 21921594616111llu; }
+    static size_t mod27619478504183(size_t hash) { return hash % 27619478504183llu; }
+    static size_t mod34798362354533(size_t hash) { return hash % 34798362354533llu; }
+    static size_t mod43843189232363(size_t hash) { return hash % 43843189232363llu; }
+    static size_t mod55238957008387(size_t hash) { return hash % 55238957008387llu; }
+    static size_t mod69596724709081(size_t hash) { return hash % 69596724709081llu; }
+    static size_t mod87686378464759(size_t hash) { return hash % 87686378464759llu; }
+    static size_t mod110477914016779(size_t hash) { return hash % 110477914016779llu; }
+    static size_t mod139193449418173(size_t hash) { return hash % 139193449418173llu; }
+    static size_t mod175372756929481(size_t hash) { return hash % 175372756929481llu; }
+    static size_t mod220955828033581(size_t hash) { return hash % 220955828033581llu; }
+    static size_t mod278386898836457(size_t hash) { return hash % 278386898836457llu; }
+    static size_t mod350745513859007(size_t hash) { return hash % 350745513859007llu; }
+    static size_t mod441911656067171(size_t hash) { return hash % 441911656067171llu; }
+    static size_t mod556773797672909(size_t hash) { return hash % 556773797672909llu; }
+    static size_t mod701491027718027(size_t hash) { return hash % 701491027718027llu; }
+    static size_t mod883823312134381(size_t hash) { return hash % 883823312134381llu; }
+    static size_t mod1113547595345903(size_t hash) { return hash % 1113547595345903llu; }
+    static size_t mod1402982055436147(size_t hash) { return hash % 1402982055436147llu; }
+    static size_t mod1767646624268779(size_t hash) { return hash % 1767646624268779llu; }
+    static size_t mod2227095190691797(size_t hash) { return hash % 2227095190691797llu; }
+    static size_t mod2805964110872297(size_t hash) { return hash % 2805964110872297llu; }
+    static size_t mod3535293248537579(size_t hash) { return hash % 3535293248537579llu; }
+    static size_t mod4454190381383713(size_t hash) { return hash % 4454190381383713llu; }
+    static size_t mod5611928221744609(size_t hash) { return hash % 5611928221744609llu; }
+    static size_t mod7070586497075177(size_t hash) { return hash % 7070586497075177llu; }
+    static size_t mod8908380762767489(size_t hash) { return hash % 8908380762767489llu; }
+    static size_t mod11223856443489329(size_t hash) { return hash % 11223856443489329llu; }
+    static size_t mod14141172994150357(size_t hash) { return hash % 14141172994150357llu; }
+    static size_t mod17816761525534927(size_t hash) { return hash % 17816761525534927llu; }
+    static size_t mod22447712886978529(size_t hash) { return hash % 22447712886978529llu; }
+    static size_t mod28282345988300791(size_t hash) { return hash % 28282345988300791llu; }
+    static size_t mod35633523051069991(size_t hash) { return hash % 35633523051069991llu; }
+    static size_t mod44895425773957261(size_t hash) { return hash % 44895425773957261llu; }
+    static size_t mod56564691976601587(size_t hash) { return hash % 56564691976601587llu; }
+    static size_t mod71267046102139967(size_t hash) { return hash % 71267046102139967llu; }
+    static size_t mod89790851547914507(size_t hash) { return hash % 89790851547914507llu; }
+    static size_t mod113129383953203213(size_t hash) { return hash % 113129383953203213llu; }
+    static size_t mod142534092204280003(size_t hash) { return hash % 142534092204280003llu; }
+    static size_t mod179581703095829107(size_t hash) { return hash % 179581703095829107llu; }
+    static size_t mod226258767906406483(size_t hash) { return hash % 226258767906406483llu; }
+    static size_t mod285068184408560057(size_t hash) { return hash % 285068184408560057llu; }
+    static size_t mod359163406191658253(size_t hash) { return hash % 359163406191658253llu; }
+    static size_t mod452517535812813007(size_t hash) { return hash % 452517535812813007llu; }
+    static size_t mod570136368817120201(size_t hash) { return hash % 570136368817120201llu; }
+    static size_t mod718326812383316683(size_t hash) { return hash % 718326812383316683llu; }
+    static size_t mod905035071625626043(size_t hash) { return hash % 905035071625626043llu; }
+    static size_t mod1140272737634240411(size_t hash) { return hash % 1140272737634240411llu; }
+    static size_t mod1436653624766633509(size_t hash) { return hash % 1436653624766633509llu; }
+    static size_t mod1810070143251252131(size_t hash) { return hash % 1810070143251252131llu; }
+    static size_t mod2280545475268481167(size_t hash) { return hash % 2280545475268481167llu; }
+    static size_t mod2873307249533267101(size_t hash) { return hash % 2873307249533267101llu; }
+    static size_t mod3620140286502504283(size_t hash) { return hash % 3620140286502504283llu; }
+    static size_t mod4561090950536962147(size_t hash) { return hash % 4561090950536962147llu; }
+    static size_t mod5746614499066534157(size_t hash) { return hash % 5746614499066534157llu; }
+    static size_t mod7240280573005008577(size_t hash) { return hash % 7240280573005008577llu; }
+    static size_t mod9122181901073924329(size_t hash) { return hash % 9122181901073924329llu; }
+    static size_t mod11493228998133068689(size_t hash) { return hash % 11493228998133068689llu; }
+    static size_t mod14480561146010017169(size_t hash) { return hash % 14480561146010017169llu; }
+    static size_t mod18446744073709551557(size_t hash) { return hash % 18446744073709551557llu; }
+
+    using mod_function = size_t (*)(size_t);
+
+    mod_function next_size_over(size_t & size) const
+    {
+        // prime numbers generated by the following method:
+        // 1. start with a prime p = 2
+        // 2. go to wolfram alpha and get p = NextPrime(2 * p)
+        // 3. repeat 2. until you overflow 64 bits
+        // you now have large gaps which you would hit if somebody called reserve() with an unlucky number.
+        // 4. to fill the gaps for every prime p go to wolfram alpha and get ClosestPrime(p * 2^(1/3)) and ClosestPrime(p * 2^(2/3)) and put those in the gaps
+        // 5. get PrevPrime(2^64) and put it at the end
+        static constexpr const size_t prime_list[] =
+        {
+            2llu, 3llu, 5llu, 7llu, 11llu, 13llu, 17llu, 23llu, 29llu, 37llu, 47llu,
+            59llu, 73llu, 97llu, 127llu, 151llu, 197llu, 251llu, 313llu, 397llu,
+            499llu, 631llu, 797llu, 1009llu, 1259llu, 1597llu, 2011llu, 2539llu,
+            3203llu, 4027llu, 5087llu, 6421llu, 8089llu, 10193llu, 12853llu, 16193llu,
+            20399llu, 25717llu, 32401llu, 40823llu, 51437llu, 64811llu, 81649llu,
+            102877llu, 129607llu, 163307llu, 205759llu, 259229llu, 326617llu,
+            411527llu, 518509llu, 653267llu, 823117llu, 1037059llu, 1306601llu,
+            1646237llu, 2074129llu, 2613229llu, 3292489llu, 4148279llu, 5226491llu,
+            6584983llu, 8296553llu, 10453007llu, 13169977llu, 16593127llu, 20906033llu,
+            26339969llu, 33186281llu, 41812097llu, 52679969llu, 66372617llu,
+            83624237llu, 105359939llu, 132745199llu, 167248483llu, 210719881llu,
+            265490441llu, 334496971llu, 421439783llu, 530980861llu, 668993977llu,
+            842879579llu, 1061961721llu, 1337987929llu, 1685759167llu, 2123923447llu,
+            2675975881llu, 3371518343llu, 4247846927llu, 5351951779llu, 6743036717llu,
+            8495693897llu, 10703903591llu, 13486073473llu, 16991387857llu,
+            21407807219llu, 26972146961llu, 33982775741llu, 42815614441llu,
+            53944293929llu, 67965551447llu, 85631228929llu, 107888587883llu,
+            135931102921llu, 171262457903llu, 215777175787llu, 271862205833llu,
+            342524915839llu, 431554351609llu, 543724411781llu, 685049831731llu,
+            863108703229llu, 1087448823553llu, 1370099663459llu, 1726217406467llu,
+            2174897647073llu, 2740199326961llu, 3452434812973llu, 4349795294267llu,
+            5480398654009llu, 6904869625999llu, 8699590588571llu, 10960797308051llu,
+            13809739252051llu, 17399181177241llu, 21921594616111llu, 27619478504183llu,
+            34798362354533llu, 43843189232363llu, 55238957008387llu, 69596724709081llu,
+            87686378464759llu, 110477914016779llu, 139193449418173llu,
+            175372756929481llu, 220955828033581llu, 278386898836457llu,
+            350745513859007llu, 441911656067171llu, 556773797672909llu,
+            701491027718027llu, 883823312134381llu, 1113547595345903llu,
+            1402982055436147llu, 1767646624268779llu, 2227095190691797llu,
+            2805964110872297llu, 3535293248537579llu, 4454190381383713llu,
+            5611928221744609llu, 7070586497075177llu, 8908380762767489llu,
+            11223856443489329llu, 14141172994150357llu, 17816761525534927llu,
+            22447712886978529llu, 28282345988300791llu, 35633523051069991llu,
+            44895425773957261llu, 56564691976601587llu, 71267046102139967llu,
+            89790851547914507llu, 113129383953203213llu, 142534092204280003llu,
+            179581703095829107llu, 226258767906406483llu, 285068184408560057llu,
+            359163406191658253llu, 452517535812813007llu, 570136368817120201llu,
+            718326812383316683llu, 905035071625626043llu, 1140272737634240411llu,
+            1436653624766633509llu, 1810070143251252131llu, 2280545475268481167llu,
+            2873307249533267101llu, 3620140286502504283llu, 4561090950536962147llu,
+            5746614499066534157llu, 7240280573005008577llu, 9122181901073924329llu,
+            11493228998133068689llu, 14480561146010017169llu, 18446744073709551557llu
+        };
+        static constexpr size_t (* const mod_functions[])(size_t) =
+        {
+            &mod0, &mod2, &mod3, &mod5, &mod7, &mod11, &mod13, &mod17, &mod23, &mod29, &mod37,
+            &mod47, &mod59, &mod73, &mod97, &mod127, &mod151, &mod197, &mod251, &mod313, &mod397,
+            &mod499, &mod631, &mod797, &mod1009, &mod1259, &mod1597, &mod2011, &mod2539, &mod3203,
+            &mod4027, &mod5087, &mod6421, &mod8089, &mod10193, &mod12853, &mod16193, &mod20399,
+            &mod25717, &mod32401, &mod40823, &mod51437, &mod64811, &mod81649, &mod102877,
+            &mod129607, &mod163307, &mod205759, &mod259229, &mod326617, &mod411527, &mod518509,
+            &mod653267, &mod823117, &mod1037059, &mod1306601, &mod1646237, &mod2074129,
+            &mod2613229, &mod3292489, &mod4148279, &mod5226491, &mod6584983, &mod8296553,
+            &mod10453007, &mod13169977, &mod16593127, &mod20906033, &mod26339969, &mod33186281,
+            &mod41812097, &mod52679969, &mod66372617, &mod83624237, &mod105359939, &mod132745199,
+            &mod167248483, &mod210719881, &mod265490441, &mod334496971, &mod421439783,
+            &mod530980861, &mod668993977, &mod842879579, &mod1061961721, &mod1337987929,
+            &mod1685759167, &mod2123923447, &mod2675975881, &mod3371518343, &mod4247846927,
+            &mod5351951779, &mod6743036717, &mod8495693897, &mod10703903591, &mod13486073473,
+            &mod16991387857, &mod21407807219, &mod26972146961, &mod33982775741, &mod42815614441,
+            &mod53944293929, &mod67965551447, &mod85631228929, &mod107888587883, &mod135931102921,
+            &mod171262457903, &mod215777175787, &mod271862205833, &mod342524915839,
+            &mod431554351609, &mod543724411781, &mod685049831731, &mod863108703229,
+            &mod1087448823553, &mod1370099663459, &mod1726217406467, &mod2174897647073,
+            &mod2740199326961, &mod3452434812973, &mod4349795294267, &mod5480398654009,
+            &mod6904869625999, &mod8699590588571, &mod10960797308051, &mod13809739252051,
+            &mod17399181177241, &mod21921594616111, &mod27619478504183, &mod34798362354533,
+            &mod43843189232363, &mod55238957008387, &mod69596724709081, &mod87686378464759,
+            &mod110477914016779, &mod139193449418173, &mod175372756929481, &mod220955828033581,
+            &mod278386898836457, &mod350745513859007, &mod441911656067171, &mod556773797672909,
+            &mod701491027718027, &mod883823312134381, &mod1113547595345903, &mod1402982055436147,
+            &mod1767646624268779, &mod2227095190691797, &mod2805964110872297, &mod3535293248537579,
+            &mod4454190381383713, &mod5611928221744609, &mod7070586497075177, &mod8908380762767489,
+            &mod11223856443489329, &mod14141172994150357, &mod17816761525534927,
+            &mod22447712886978529, &mod28282345988300791, &mod35633523051069991,
+            &mod44895425773957261, &mod56564691976601587, &mod71267046102139967,
+            &mod89790851547914507, &mod113129383953203213, &mod142534092204280003,
+            &mod179581703095829107, &mod226258767906406483, &mod285068184408560057,
+            &mod359163406191658253, &mod452517535812813007, &mod570136368817120201,
+            &mod718326812383316683, &mod905035071625626043, &mod1140272737634240411,
+            &mod1436653624766633509, &mod1810070143251252131, &mod2280545475268481167,
+            &mod2873307249533267101, &mod3620140286502504283, &mod4561090950536962147,
+            &mod5746614499066534157, &mod7240280573005008577, &mod9122181901073924329,
+            &mod11493228998133068689, &mod14480561146010017169, &mod18446744073709551557
+        };
+        const size_t * found = std::lower_bound(std::begin(prime_list), std::end(prime_list) - 1, size);
+        size = *found;
+        return mod_functions[1 + found - prime_list];
+    }
+    void commit(mod_function new_mod_function)
+    {
+        current_mod_function = new_mod_function;
+    }
+    void reset()
+    {
+        current_mod_function = &mod0;
+    }
+
+    size_t index_for_hash(size_t hash, size_t /*num_slots_minus_one*/) const
+    {
+        return current_mod_function(hash);
+    }
+    size_t keep_in_range(size_t index, size_t num_slots_minus_one) const
+    {
+        return index > num_slots_minus_one ? current_mod_function(index) : index;
+    }
+
+private:
+    mod_function current_mod_function = &mod0;
+};
+
+struct power_of_two_hash_policy
+{
+    size_t index_for_hash(size_t hash, size_t num_slots_minus_one) const
+    {
+        return hash & num_slots_minus_one;
+    }
+    size_t keep_in_range(size_t index, size_t num_slots_minus_one) const
+    {
+        return index_for_hash(index, num_slots_minus_one);
+    }
+    int8_t next_size_over(size_t & size) const
+    {
+        size = detailv3::next_power_of_two(size);
+        return 0;
+    }
+    void commit(int8_t)
+    {
+    }
+    void reset()
+    {
+    }
+
+};
+
+struct fibonacci_hash_policy
+{
+    size_t index_for_hash(size_t hash, size_t /*num_slots_minus_one*/) const
+    {
+        return (11400714819323198485ull * hash) >> shift;
+    }
+    size_t keep_in_range(size_t index, size_t num_slots_minus_one) const
+    {
+        return index & num_slots_minus_one;
+    }
+
+    int8_t next_size_over(size_t & size) const
+    {
+        size = std::max(size_t(2), detailv3::next_power_of_two(size));
+        return 64 - detailv3::log2(size);
+    }
+    void commit(int8_t shift)
+    {
+        this->shift = shift;
+    }
+    void reset()
+    {
+        shift = 63;
+    }
+
+private:
+    int8_t shift = 63;
+};
+
+template<typename K, typename V, typename H = std::hash<K>, typename E = std::equal_to<K>, typename A = std::allocator<std::pair<K, V> > >
+class flat_hash_map
+        : public detailv3::sherwood_v3_table
+        <
+            std::pair<K, V>,
+            K,
+            H,
+            detailv3::KeyOrValueHasher<K, std::pair<K, V>, H>,
+            E,
+            detailv3::KeyOrValueEquality<K, std::pair<K, V>, E>,
+            A,
+            typename std::allocator_traits<A>::template rebind_alloc<detailv3::sherwood_v3_entry<std::pair<K, V>>>
+        >
+{
+    using Table = detailv3::sherwood_v3_table
+    <
+        std::pair<K, V>,
+        K,
+        H,
+        detailv3::KeyOrValueHasher<K, std::pair<K, V>, H>,
+        E,
+        detailv3::KeyOrValueEquality<K, std::pair<K, V>, E>,
+        A,
+        typename std::allocator_traits<A>::template rebind_alloc<detailv3::sherwood_v3_entry<std::pair<K, V>>>
+    >;
+public:
+
+    using key_type = K;
+    using mapped_type = V;
+
+    using Table::Table;
+    flat_hash_map()
+    {
+    }
+
+    inline V & operator[](const K & key)
+    {
+        return emplace(key, convertible_to_value()).first->second;
+    }
+    inline V & operator[](K && key)
+    {
+        return emplace(std::move(key), convertible_to_value()).first->second;
+    }
+    V & at(const K & key)
+    {
+        auto found = this->find(key);
+        if (found == this->end())
+            throw std::out_of_range("Argument passed to at() was not in the map.");
+        return found->second;
+    }
+    const V & at(const K & key) const
+    {
+        auto found = this->find(key);
+        if (found == this->end())
+            throw std::out_of_range("Argument passed to at() was not in the map.");
+        return found->second;
+    }
+
+    using Table::emplace;
+    std::pair<typename Table::iterator, bool> emplace()
+    {
+        return emplace(key_type(), convertible_to_value());
+    }
+    template<typename M>
+    std::pair<typename Table::iterator, bool> insert_or_assign(const key_type & key, M && m)
+    {
+        auto emplace_result = emplace(key, std::forward<M>(m));
+        if (!emplace_result.second)
+            emplace_result.first->second = std::forward<M>(m);
+        return emplace_result;
+    }
+    template<typename M>
+    std::pair<typename Table::iterator, bool> insert_or_assign(key_type && key, M && m)
+    {
+        auto emplace_result = emplace(std::move(key), std::forward<M>(m));
+        if (!emplace_result.second)
+            emplace_result.first->second = std::forward<M>(m);
+        return emplace_result;
+    }
+    template<typename M>
+    typename Table::iterator insert_or_assign(typename Table::const_iterator, const key_type & key, M && m)
+    {
+        return insert_or_assign(key, std::forward<M>(m)).first;
+    }
+    template<typename M>
+    typename Table::iterator insert_or_assign(typename Table::const_iterator, key_type && key, M && m)
+    {
+        return insert_or_assign(std::move(key), std::forward<M>(m)).first;
+    }
+
+    friend bool operator==(const flat_hash_map & lhs, const flat_hash_map & rhs)
+    {
+        if (lhs.size() != rhs.size())
+            return false;
+        for (const typename Table::value_type & value : lhs)
+        {
+            auto found = rhs.find(value.first);
+            if (found == rhs.end())
+                return false;
+            else if (value.second != found->second)
+                return false;
+        }
+        return true;
+    }
+    friend bool operator!=(const flat_hash_map & lhs, const flat_hash_map & rhs)
+    {
+        return !(lhs == rhs);
+    }
+
+private:
+    struct convertible_to_value
+    {
+        operator V() const
+        {
+            return V();
+        }
+    };
+};
+
+template<typename T, typename H = std::hash<T>, typename E = std::equal_to<T>, typename A = std::allocator<T> >
+class flat_hash_set
+        : public detailv3::sherwood_v3_table
+        <
+            T,
+            T,
+            H,
+            detailv3::functor_storage<size_t, H>,
+            E,
+            detailv3::functor_storage<bool, E>,
+            A,
+            typename std::allocator_traits<A>::template rebind_alloc<detailv3::sherwood_v3_entry<T>>
+        >
+{
+    using Table = detailv3::sherwood_v3_table
+    <
+        T,
+        T,
+        H,
+        detailv3::functor_storage<size_t, H>,
+        E,
+        detailv3::functor_storage<bool, E>,
+        A,
+        typename std::allocator_traits<A>::template rebind_alloc<detailv3::sherwood_v3_entry<T>>
+    >;
+public:
+
+    using key_type = T;
+
+    using Table::Table;
+    flat_hash_set()
+    {
+    }
+
+    template<typename... Args>
+    std::pair<typename Table::iterator, bool> emplace(Args &&... args)
+    {
+        return Table::emplace(T(std::forward<Args>(args)...));
+    }
+    std::pair<typename Table::iterator, bool> emplace(const key_type & arg)
+    {
+        return Table::emplace(arg);
+    }
+    std::pair<typename Table::iterator, bool> emplace(key_type & arg)
+    {
+        return Table::emplace(arg);
+    }
+    std::pair<typename Table::iterator, bool> emplace(const key_type && arg)
+    {
+        return Table::emplace(std::move(arg));
+    }
+    std::pair<typename Table::iterator, bool> emplace(key_type && arg)
+    {
+        return Table::emplace(std::move(arg));
+    }
+
+    friend bool operator==(const flat_hash_set & lhs, const flat_hash_set & rhs)
+    {
+        if (lhs.size() != rhs.size())
+            return false;
+        for (const T & value : lhs)
+        {
+            if (rhs.find(value) == rhs.end())
+                return false;
+        }
+        return true;
+    }
+    friend bool operator!=(const flat_hash_set & lhs, const flat_hash_set & rhs)
+    {
+        return !(lhs == rhs);
+    }
+};
+
+
+template<typename T>
+struct power_of_two_std_hash : std::hash<T>
+{
+    typedef ska::power_of_two_hash_policy hash_policy;
+};
+
+} // end namespace ska
+\ No newline at end of file
diff --git a/benchmarks/others/hopscotch_growth_policy.h b/benchmarks/others/hopscotch_growth_policy.h
new file mode 100644
index 00000000..8c9f9694
--- /dev/null
+++ b/benchmarks/others/hopscotch_growth_policy.h
@@ -0,0 +1,346 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2018 Thibaut Goetghebuer-Planchon <[email protected]>
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H
+#define TSL_HOPSCOTCH_GROWTH_POLICY_H 
+
+
+#include <algorithm>
+#include <array>
+#include <climits>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <ratio>
+#include <stdexcept>
+
+
+/**
+ * Only activate tsl_hh_assert if TSL_DEBUG is defined. 
+ * This way we avoid the performance hit when NDEBUG is not defined with assert as tsl_hh_assert is used a lot
+ * (people usually compile with "-O3" and not "-O3 -DNDEBUG").
+ */
+#ifdef TSL_DEBUG
+#    define tsl_hh_assert(expr) assert(expr)
+#else
+#    define tsl_hh_assert(expr) (static_cast<void>(0))
+#endif
+
+
+/**
+ * If exceptions are enabled, throw the exception passed in parameter, otherwise call std::terminate.
+ */
+#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (defined (_MSC_VER) && defined (_CPPUNWIND))) && !defined(TSL_NO_EXCEPTIONS)
+#    define TSL_HH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
+#else
+#    define TSL_HH_NO_EXCEPTIONS
+#    ifdef NDEBUG
+#        define TSL_HH_THROW_OR_TERMINATE(ex, msg) std::terminate()
+#    else
+#        include <iostream>
+#        define TSL_HH_THROW_OR_TERMINATE(ex, msg) do { std::cerr << msg << std::endl; std::terminate(); } while(0)
+#    endif
+#endif
+
+
+namespace tsl {
+namespace hh {
+
+/**
+ * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows
+ * the table to use a mask operation instead of a modulo operation to map a hash to a bucket.
+ * 
+ * GrowthFactor must be a power of two >= 2.
+ */
+template<std::size_t GrowthFactor>
+class power_of_two_growth_policy {
+public:
+    /**
+     * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter.
+     * This number is a minimum, the policy may update this value with a higher value if needed (but not lower).
+     *
+     * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
+     * bucket_for_hash must always return 0 in this case.
+     */
+    explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
+        if(min_bucket_count_in_out > max_bucket_count()) {
+            TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
+        }
+        
+        if(min_bucket_count_in_out > 0) {
+            min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
+            m_mask = min_bucket_count_in_out - 1;
+        }
+        else {
+            m_mask = 0;
+        }
+    }
+    
+    /**
+     * Return the bucket [0, bucket_count()) to which the hash belongs. 
+     * If bucket_count() is 0, it must always return 0.
+     */
+    std::size_t bucket_for_hash(std::size_t hash) const noexcept {
+        return hash & m_mask;
+    }
+    
+    /**
+     * Return the bucket count to use when the bucket array grows on rehash.
+     */
+    std::size_t next_bucket_count() const {
+        if((m_mask + 1) > max_bucket_count() / GrowthFactor) {
+            TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
+        }
+        
+        return (m_mask + 1) * GrowthFactor;
+    }
+    
+    /**
+     * Return the maximum number of buckets supported by the policy.
+     */
+    std::size_t max_bucket_count() const {
+        // Largest power of two.
+        return (std::numeric_limits<std::size_t>::max() / 2) + 1;
+    }
+    
+    /**
+     * Reset the growth policy as if it was created with a bucket count of 0.
+     * After a clear, the policy must always return 0 when bucket_for_hash is called.
+     */
+    void clear() noexcept {
+        m_mask = 0;
+    }
+    
+private:
+    static std::size_t round_up_to_power_of_two(std::size_t value) {
+        if(is_power_of_two(value)) {
+            return value;
+        }
+        
+        if(value == 0) {
+            return 1;
+        }
+            
+        --value;
+        for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
+            value |= value >> i;
+        }
+        
+        return value + 1;
+    }
+    
+    static constexpr bool is_power_of_two(std::size_t value) {
+        return value != 0 && (value & (value - 1)) == 0;
+    }
+    
+private:
+    static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2.");
+    
+    std::size_t m_mask;
+};
+
+
+/**
+ * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
+ * to a bucket. Slower but it can be useful if you want a slower growth.
+ */
+template<class GrowthFactor = std::ratio<3, 2>>
+class mod_growth_policy {
+public:
+    explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
+        if(min_bucket_count_in_out > max_bucket_count()) {
+            TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
+        }
+        
+        if(min_bucket_count_in_out > 0) {
+            m_mod = min_bucket_count_in_out;
+        }
+        else {
+            m_mod = 1;
+        }
+    }
+    
+    std::size_t bucket_for_hash(std::size_t hash) const noexcept {
+        return hash % m_mod;
+    }
+    
+    std::size_t next_bucket_count() const {
+        if(m_mod == max_bucket_count()) {
+            TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
+        }
+        
+        const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
+        if(!std::isnormal(next_bucket_count)) {
+            TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
+        }
+        
+        if(next_bucket_count > double(max_bucket_count())) {
+            return max_bucket_count();
+        }
+        else {
+            return std::size_t(next_bucket_count);
+        }
+    }
+    
+    std::size_t max_bucket_count() const {
+        return MAX_BUCKET_COUNT;
+    }
+    
+    void clear() noexcept {
+        m_mod = 1;
+    }
+    
+private:
+    static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den;
+    static const std::size_t MAX_BUCKET_COUNT = 
+            std::size_t(double(
+                    std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR
+            ));
+            
+    static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
+    
+    std::size_t m_mod;
+};
+
+
+
+namespace detail {
+
+#if SIZE_MAX >= ULLONG_MAX
+#define TSL_HH_NB_PRIMES 51
+#elif SIZE_MAX >= ULONG_MAX
+#define TSL_HH_NB_PRIMES 40
+#else
+#define TSL_HH_NB_PRIMES 23
+#endif
+
+static constexpr const std::array<std::size_t, TSL_HH_NB_PRIMES> PRIMES = {{
+    1u, 5u, 17u, 29u, 37u, 53u, 67u, 79u, 97u, 131u, 193u, 257u, 389u, 521u, 769u, 1031u, 
+    1543u, 2053u, 3079u, 6151u, 12289u, 24593u, 49157u,
+#if SIZE_MAX >= ULONG_MAX
+    98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 
+    25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 
+    3221225473ul, 4294967291ul,
+#endif
+#if SIZE_MAX >= ULLONG_MAX
+    6442450939ull, 12884901893ull, 25769803751ull, 51539607551ull, 103079215111ull, 206158430209ull, 
+    412316860441ull, 824633720831ull, 1649267441651ull, 3298534883309ull, 6597069766657ull,
+#endif
+}};
+
+template<unsigned int IPrime>
+static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
+
+// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
+// compiler can optimize the modulo code better with a constant known at the compilation.
+static constexpr const std::array<std::size_t(*)(std::size_t), TSL_HH_NB_PRIMES> MOD_PRIME = {{ 
+    &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, 
+    &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, 
+    &mod<21>, &mod<22>,  
+#if SIZE_MAX >= ULONG_MAX
+    &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, &mod<31>, &mod<32>, 
+    &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39>,
+#endif
+#if SIZE_MAX >= ULLONG_MAX
+    &mod<40>, &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>, &mod<46>, &mod<47>, &mod<48>, &mod<49>, 
+    &mod<50>,
+#endif
+}};
+
+}
+
+/**
+ * Grow the hash table by using prime numbers as bucket count. Slower than tsl::hh::power_of_two_growth_policy in  
+ * general but will probably distribute the values around better in the buckets with a poor hash function.
+ * 
+ * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers.
+ * 
+ * With a switch the code would look like:
+ * \code
+ * switch(iprime) { // iprime is the current prime of the hash table
+ *     case 0: hash % 5ul;
+ *             break;
+ *     case 1: hash % 17ul;
+ *             break;
+ *     case 2: hash % 29ul;
+ *             break;
+ *     ...
+ * }    
+ * \endcode
+ * 
+ * Due to the constant variable in the modulo the compiler is able to optimize the operation
+ * by a series of multiplications, substractions and shifts. 
+ * 
+ * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environment.
+ */
+class prime_growth_policy {
+public:
+    explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
+        auto it_prime = std::lower_bound(detail::PRIMES.begin(), 
+                                         detail::PRIMES.end(), min_bucket_count_in_out);
+        if(it_prime == detail::PRIMES.end()) {
+            TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
+        }
+        
+        m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
+        if(min_bucket_count_in_out > 0) {
+            min_bucket_count_in_out = *it_prime;
+        }
+        else {
+            min_bucket_count_in_out = 0;
+        }
+    }
+    
+    std::size_t bucket_for_hash(std::size_t hash) const noexcept {
+        return detail::MOD_PRIME[m_iprime](hash);
+    }
+    
+    std::size_t next_bucket_count() const {
+        if(m_iprime + 1 >= detail::PRIMES.size()) {
+            TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
+        }
+        
+        return detail::PRIMES[m_iprime + 1];
+    }   
+    
+    std::size_t max_bucket_count() const {
+        return detail::PRIMES.back();
+    }
+    
+    void clear() noexcept {
+        m_iprime = 0;
+    }
+    
+private:
+    unsigned int m_iprime;
+    
+    static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(), 
+                  "The type of m_iprime is not big enough.");
+}; 
+
+}
+}
+
+#endif
diff --git a/benchmarks/others/hopscotch_hash.h b/benchmarks/others/hopscotch_hash.h
new file mode 100644
index 00000000..a97fa2b4
--- /dev/null
+++ b/benchmarks/others/hopscotch_hash.h
@@ -0,0 +1,1827 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Thibaut Goetghebuer-Planchon <[email protected]>
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_HASH_H
+#define TSL_HOPSCOTCH_HASH_H
+
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <exception>
+#include <functional>
+#include <initializer_list>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <stdexcept>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+#include "hopscotch_growth_policy.h"
+
+
+#if (defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9))
+#    define TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR
+#endif
+
+
+namespace tsl {
+namespace detail_hopscotch_hash {
+    
+    
+template<typename T>
+struct make_void {
+    using type = void;
+};
+
+
+template<typename T, typename = void>
+struct has_is_transparent : std::false_type {
+};
+
+template<typename T>
+struct has_is_transparent<T, typename make_void<typename T::is_transparent>::type> : std::true_type {
+};
+
+
+template<typename T, typename = void>
+struct has_key_compare : std::false_type {
+};
+
+template<typename T>
+struct has_key_compare<T, typename make_void<typename T::key_compare>::type> : std::true_type {
+};
+
+
+template<typename U>
+struct is_power_of_two_policy: std::false_type {
+};
+
+template<std::size_t GrowthFactor>
+struct is_power_of_two_policy<tsl::hh::power_of_two_growth_policy<GrowthFactor>>: std::true_type {
+};
+
+
+template<typename T, typename U>
+static T numeric_cast(U value, const char* error_message = "numeric_cast() failed.") {
+    T ret = static_cast<T>(value);
+    if(static_cast<U>(ret) != value) {
+        TSL_HH_THROW_OR_TERMINATE(std::runtime_error, error_message);
+    }
+    
+    const bool is_same_signedness = (std::is_unsigned<T>::value && std::is_unsigned<U>::value) ||
+                                    (std::is_signed<T>::value && std::is_signed<U>::value);
+    if(!is_same_signedness && (ret < T{}) != (value < U{})) {
+        TSL_HH_THROW_OR_TERMINATE(std::runtime_error, error_message);
+    }
+    
+    return ret;
+}
+
+
+/*
+ * smallest_type_for_min_bits::type returns the smallest type that can fit MinBits.
+ */
+static const std::size_t SMALLEST_TYPE_MAX_BITS_SUPPORTED = 64;
+template<unsigned int MinBits, typename Enable = void>
+class smallest_type_for_min_bits {
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 0) && (MinBits <= 8)>::type> {
+public:
+    using type = std::uint_least8_t;
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 8) && (MinBits <= 16)>::type> {
+public:
+    using type = std::uint_least16_t;
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 16) && (MinBits <= 32)>::type> {
+public:
+    using type = std::uint_least32_t;
+};
+
+template<unsigned int MinBits>
+class smallest_type_for_min_bits<MinBits, typename std::enable_if<(MinBits > 32) && (MinBits <= 64)>::type> {
+public:
+    using type = std::uint_least64_t;
+};
+        
+
+
+/*
+ * Each bucket may store up to three elements:
+ * - An aligned storage to store a value_type object with placement-new.
+ * - An (optional) hash of the value in the bucket.
+ * - An unsigned integer of type neighborhood_bitmap used to tell us which buckets in the neighborhood of the 
+ *   current bucket contain a value with a hash belonging to the current bucket. 
+ * 
+ * For a bucket 'bct', a bit 'i' (counting from 0 and from the least significant bit to the most significant)
+ * set to 1 means that the bucket 'bct + i' contains a value with a hash belonging to bucket 'bct'.
+ * The bits used for that, start from the third least significant bit.
+ * The two least significant bits are reserved:
+ * - The least significant bit is set to 1 if there is a value in the bucket storage.
+ * - The second least significant bit is set to 1 if there is an overflow. More than NeighborhoodSize values
+  * give the same hash, all overflow values are stored in the m_overflow_elements list of the map.
+ *
+ * Details regarding hopscotch hashing an its implementation can be found here:
+ *  https://tessil.github.io/2016/08/29/hopscotch-hashing.html
+ */
+static const std::size_t NB_RESERVED_BITS_IN_NEIGHBORHOOD = 2; 
+
+
+using truncated_hash_type = std::uint_least32_t;
+
+/**
+ * Helper class that stores a truncated hash if StoreHash is true and nothing otherwise.
+ */
+template<bool StoreHash>
+class hopscotch_bucket_hash {
+public:
+    bool bucket_hash_equal(std::size_t /*hash*/) const noexcept {
+        return true;
+    }
+    
+    truncated_hash_type truncated_bucket_hash() const noexcept {
+        return 0;
+    }
+    
+protected:    
+    void copy_hash(const hopscotch_bucket_hash& ) noexcept {
+    }
+    
+    void set_hash(truncated_hash_type /*hash*/) noexcept {
+    }
+};
+
+template<>
+class hopscotch_bucket_hash<true> {
+public:
+    bool bucket_hash_equal(std::size_t hash) const noexcept {
+        return m_hash == truncated_hash_type(hash);
+    }
+    
+    truncated_hash_type truncated_bucket_hash() const noexcept {
+        return m_hash;
+    }
+    
+protected:    
+    void copy_hash(const hopscotch_bucket_hash& bucket) noexcept {
+        m_hash = bucket.m_hash;
+    }
+    
+    void set_hash(truncated_hash_type hash) noexcept {
+        m_hash = hash;
+    }
+    
+private:    
+    truncated_hash_type m_hash;
+};
+
+
+template<typename ValueType, unsigned int NeighborhoodSize, bool StoreHash>
+class hopscotch_bucket: public hopscotch_bucket_hash<StoreHash> {
+private:
+    static const std::size_t MIN_NEIGHBORHOOD_SIZE = 4;
+    static const std::size_t MAX_NEIGHBORHOOD_SIZE = SMALLEST_TYPE_MAX_BITS_SUPPORTED - NB_RESERVED_BITS_IN_NEIGHBORHOOD; 
+    
+    
+    static_assert(NeighborhoodSize >= 4, "NeighborhoodSize should be >= 4.");
+    // We can't put a variable in the message, ensure coherence
+    static_assert(MIN_NEIGHBORHOOD_SIZE == 4, ""); 
+    
+    static_assert(NeighborhoodSize <= 62, "NeighborhoodSize should be <= 62.");
+    // We can't put a variable in the message, ensure coherence
+    static_assert(MAX_NEIGHBORHOOD_SIZE == 62, ""); 
+    
+    
+    static_assert(!StoreHash || NeighborhoodSize <= 30, 
+                  "NeighborhoodSize should be <= 30 if StoreHash is true.");
+    // We can't put a variable in the message, ensure coherence
+    static_assert(MAX_NEIGHBORHOOD_SIZE - 32 == 30, "");
+    
+    using bucket_hash = hopscotch_bucket_hash<StoreHash>;
+    
+public:
+    using value_type = ValueType;
+    using neighborhood_bitmap = 
+                typename smallest_type_for_min_bits<NeighborhoodSize + NB_RESERVED_BITS_IN_NEIGHBORHOOD>::type;
+
+
+    hopscotch_bucket() noexcept: bucket_hash(), m_neighborhood_infos(0) {
+        tsl_hh_assert(empty());
+    }
+    
+    
+    hopscotch_bucket(const hopscotch_bucket& bucket) 
+        noexcept(std::is_nothrow_copy_constructible<value_type>::value): bucket_hash(bucket), 
+                                                                         m_neighborhood_infos(0) 
+    {
+        if(!bucket.empty()) {
+            ::new (static_cast<void*>(std::addressof(m_value))) value_type(bucket.value());
+        }
+        
+        m_neighborhood_infos = bucket.m_neighborhood_infos;
+    }
+    
+    hopscotch_bucket(hopscotch_bucket&& bucket)
+        noexcept(std::is_nothrow_move_constructible<value_type>::value) : bucket_hash(std::move(bucket)),
+                                                                          m_neighborhood_infos(0) 
+    {
+        if(!bucket.empty()) {
+            ::new (static_cast<void*>(std::addressof(m_value))) value_type(std::move(bucket.value()));
+        }
+        
+        m_neighborhood_infos = bucket.m_neighborhood_infos;
+    }
+     
+    hopscotch_bucket& operator=(const hopscotch_bucket& bucket) 
+        noexcept(std::is_nothrow_copy_constructible<value_type>::value) 
+    {
+        if(this != &bucket) {
+            remove_value();
+            
+            bucket_hash::operator=(bucket);
+            if(!bucket.empty()) {
+                ::new (static_cast<void*>(std::addressof(m_value))) value_type(bucket.value());
+            }
+            
+            m_neighborhood_infos = bucket.m_neighborhood_infos;
+        }
+        
+        return *this;
+    }
+    
+    hopscotch_bucket& operator=(hopscotch_bucket&& ) = delete;
+     
+    ~hopscotch_bucket() noexcept {
+        if(!empty()) {
+            destroy_value();
+        }
+    }
+    
+    neighborhood_bitmap neighborhood_infos() const noexcept {
+        return neighborhood_bitmap(m_neighborhood_infos >> NB_RESERVED_BITS_IN_NEIGHBORHOOD);
+    }
+    
+    void set_overflow(bool has_overflow) noexcept {
+        if(has_overflow) {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 2);
+        }
+        else {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~2);
+        }
+    }
+    
+    bool has_overflow() const noexcept {
+        return (m_neighborhood_infos & 2) != 0;
+    }
+    
+    bool empty() const noexcept {
+        return (m_neighborhood_infos & 1) == 0;
+    }
+    
+    void toggle_neighbor_presence(std::size_t ineighbor) noexcept {
+        tsl_hh_assert(ineighbor <= NeighborhoodSize);
+        m_neighborhood_infos = neighborhood_bitmap(
+                                    m_neighborhood_infos ^ (1ull << (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)));
+    }
+    
+    bool check_neighbor_presence(std::size_t ineighbor) const noexcept {
+        tsl_hh_assert(ineighbor <= NeighborhoodSize);
+        if(((m_neighborhood_infos >> (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)) & 1) == 1) {
+            return true;
+        }
+        
+        return false;
+    }
+    
+    value_type& value() noexcept {
+        tsl_hh_assert(!empty());
+        return *reinterpret_cast<value_type*>(std::addressof(m_value));
+    }
+    
+    const value_type& value() const noexcept {
+        tsl_hh_assert(!empty());
+        return *reinterpret_cast<const value_type*>(std::addressof(m_value));
+    }
+    
+    template<typename... Args>
+    void set_value_of_empty_bucket(truncated_hash_type hash, Args&&... value_type_args) {
+        tsl_hh_assert(empty());
+        
+        ::new (static_cast<void*>(std::addressof(m_value))) value_type(std::forward<Args>(value_type_args)...);
+        set_empty(false);
+        this->set_hash(hash);
+    }
+    
+    void swap_value_into_empty_bucket(hopscotch_bucket& empty_bucket) {
+        tsl_hh_assert(empty_bucket.empty());
+        if(!empty()) {
+            ::new (static_cast<void*>(std::addressof(empty_bucket.m_value))) value_type(std::move(value()));
+            empty_bucket.copy_hash(*this);
+            empty_bucket.set_empty(false);
+            
+            destroy_value();
+            set_empty(true);
+        }
+    }
+    
+    void remove_value() noexcept {
+        if(!empty()) {
+            destroy_value();
+            set_empty(true);
+        }
+    }
+    
+    void clear() noexcept {
+        if(!empty()) {
+            destroy_value();
+        }
+        
+        m_neighborhood_infos = 0;
+        tsl_hh_assert(empty());
+    }
+    
+    static truncated_hash_type truncate_hash(std::size_t hash) noexcept {
+        return truncated_hash_type(hash);
+    }
+    
+private:
+    void set_empty(bool is_empty) noexcept {
+        if(is_empty) {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~1);
+        }
+        else {
+            m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 1);
+        }
+    }
+    
+    void destroy_value() noexcept {
+        tsl_hh_assert(!empty());
+        value().~value_type();
+    }
+    
+private:
+    using storage = typename std::aligned_storage<sizeof(value_type), alignof(value_type)>::type;
+    
+    neighborhood_bitmap m_neighborhood_infos;
+    storage m_value;
+};
+
+
+/**
+ * Internal common class used by (b)hopscotch_map and (b)hopscotch_set.
+ * 
+ * ValueType is what will be stored by hopscotch_hash (usually std::pair<Key, T> for a map and Key for a set).
+ * 
+ * KeySelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the key.
+ * 
+ * ValueSelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the value.
+ * ValueSelect should be void if there is no value (in a set for example).
+ * 
+ * OverflowContainer will be used as containers for overflown elements. Usually it should be a list<ValueType>
+ * or a set<Key>/map<Key, T>.
+ */
+template<class ValueType,
+         class KeySelect,
+         class ValueSelect,
+         class Hash,
+         class KeyEqual,
+         class Allocator,
+         unsigned int NeighborhoodSize,
+         bool StoreHash,
+         class GrowthPolicy,
+         class OverflowContainer>
+class hopscotch_hash: private Hash, private KeyEqual, private GrowthPolicy {
+private:
+    template<typename U>
+    using has_mapped_type = typename std::integral_constant<bool, !std::is_same<U, void>::value>;
+    
+    static_assert(noexcept(std::declval<GrowthPolicy>().bucket_for_hash(std::size_t(0))), "GrowthPolicy::bucket_for_hash must be noexcept.");
+    static_assert(noexcept(std::declval<GrowthPolicy>().clear()), "GrowthPolicy::clear must be noexcept.");
+    
+public:
+    template<bool IsConst>
+    class hopscotch_iterator;
+    
+    using key_type = typename KeySelect::key_type;
+    using value_type = ValueType;
+    using size_type = std::size_t;
+    using difference_type = std::ptrdiff_t;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using allocator_type = Allocator;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using pointer = value_type*;
+    using const_pointer = const value_type*;
+    using iterator = hopscotch_iterator<false>;
+    using const_iterator = hopscotch_iterator<true>;
+    
+private:
+    using hopscotch_bucket = tsl::detail_hopscotch_hash::hopscotch_bucket<ValueType, NeighborhoodSize, StoreHash>;
+    using neighborhood_bitmap = typename hopscotch_bucket::neighborhood_bitmap;
+    
+    using buckets_allocator = typename std::allocator_traits<allocator_type>::template rebind_alloc<hopscotch_bucket>;
+    using buckets_container_type = std::vector<hopscotch_bucket, buckets_allocator>;  
+    
+    using overflow_container_type = OverflowContainer;
+    
+    static_assert(std::is_same<typename overflow_container_type::value_type, ValueType>::value, 
+                  "OverflowContainer should have ValueType as type.");
+    
+    static_assert(std::is_same<typename overflow_container_type::allocator_type, Allocator>::value, 
+                  "Invalid allocator, not the same type as the value_type.");
+    
+    
+    using iterator_buckets = typename buckets_container_type::iterator; 
+    using const_iterator_buckets = typename buckets_container_type::const_iterator;
+    
+    using iterator_overflow = typename overflow_container_type::iterator; 
+    using const_iterator_overflow = typename overflow_container_type::const_iterator; 
+    
+public:    
+    /**
+     * The `operator*()` and `operator->()` methods return a const reference and const pointer respectively to the 
+     * stored value type.
+     * 
+     * In case of a map, to get a modifiable reference to the value associated to a key (the `.second` in the 
+     * stored pair), you have to call `value()`.
+     */
+    template<bool IsConst>
+    class hopscotch_iterator {
+        friend class hopscotch_hash;
+    private:
+        using iterator_bucket = typename std::conditional<IsConst, 
+                                                            typename hopscotch_hash::const_iterator_buckets, 
+                                                            typename hopscotch_hash::iterator_buckets>::type;
+        using iterator_overflow = typename std::conditional<IsConst, 
+                                                            typename hopscotch_hash::const_iterator_overflow, 
+                                                            typename hopscotch_hash::iterator_overflow>::type;
+    
+        
+        hopscotch_iterator(iterator_bucket buckets_iterator, iterator_bucket buckets_end_iterator, 
+                           iterator_overflow overflow_iterator) noexcept : 
+            m_buckets_iterator(buckets_iterator), m_buckets_end_iterator(buckets_end_iterator),
+            m_overflow_iterator(overflow_iterator)
+        {
+        }
+        
+    public:
+        using iterator_category = std::forward_iterator_tag;
+        using value_type = const typename hopscotch_hash::value_type;
+        using difference_type = std::ptrdiff_t;
+        using reference = value_type&;
+        using pointer = value_type*;
+        
+        
+        hopscotch_iterator() noexcept {
+        }
+        
+        // Copy constructor from iterator to const_iterator.
+        template<bool TIsConst = IsConst, typename std::enable_if<TIsConst>::type* = nullptr>
+        hopscotch_iterator(const hopscotch_iterator<!TIsConst>& other) noexcept :
+            m_buckets_iterator(other.m_buckets_iterator), m_buckets_end_iterator(other.m_buckets_end_iterator),
+            m_overflow_iterator(other.m_overflow_iterator)
+        {
+        }
+
+        hopscotch_iterator(const hopscotch_iterator& other) = default;
+        hopscotch_iterator(hopscotch_iterator&& other) = default;
+        hopscotch_iterator& operator=(const hopscotch_iterator& other) = default;
+        hopscotch_iterator& operator=(hopscotch_iterator&& other) = default;
+        
+        const typename hopscotch_hash::key_type& key() const {
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return KeySelect()(m_buckets_iterator->value());
+            }
+            
+            return KeySelect()(*m_overflow_iterator);
+        }
+
+        template<class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+        typename std::conditional<
+                        IsConst, 
+                        const typename U::value_type&, 
+                        typename U::value_type&>::type value() const
+        {
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return U()(m_buckets_iterator->value());
+            }
+            
+            return U()(*m_overflow_iterator);
+        }
+        
+        reference operator*() const { 
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return m_buckets_iterator->value();
+            }
+            
+            return *m_overflow_iterator;
+        }
+        
+        pointer operator->() const { 
+            if(m_buckets_iterator != m_buckets_end_iterator) {
+                return std::addressof(m_buckets_iterator->value()); 
+            }
+            
+            return std::addressof(*m_overflow_iterator); 
+        }
+        
+        hopscotch_iterator& operator++() {
+            if(m_buckets_iterator == m_buckets_end_iterator) {
+                ++m_overflow_iterator;
+                return *this;
+            }
+            
+            do {
+                ++m_buckets_iterator;
+            } while(m_buckets_iterator != m_buckets_end_iterator && m_buckets_iterator->empty());
+            
+            return *this; 
+        }
+        
+        hopscotch_iterator operator++(int) {
+            hopscotch_iterator tmp(*this);
+            ++*this;
+            
+            return tmp;
+        }
+        
+        friend bool operator==(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { 
+            return lhs.m_buckets_iterator == rhs.m_buckets_iterator && 
+                   lhs.m_overflow_iterator == rhs.m_overflow_iterator; 
+        }
+        
+        friend bool operator!=(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { 
+            return !(lhs == rhs); 
+        }
+        
+    private:
+        iterator_bucket m_buckets_iterator;
+        iterator_bucket m_buckets_end_iterator;
+        iterator_overflow m_overflow_iterator;
+    };
+    
+public:
+    template<class OC = OverflowContainer, typename std::enable_if<!has_key_compare<OC>::value>::type* = nullptr>
+    hopscotch_hash(size_type bucket_count, 
+                  const Hash& hash,
+                  const KeyEqual& equal,
+                  const Allocator& alloc,
+                  float max_load_factor) :  Hash(hash),
+                                            KeyEqual(equal),
+                                            GrowthPolicy(bucket_count),
+                                            m_buckets_data(alloc), 
+                                            m_overflow_elements(alloc),
+                                            m_buckets(static_empty_bucket_ptr()),
+                                            m_nb_elements(0)
+    {
+        if(bucket_count > max_bucket_count()) {
+            TSL_HH_THROW_OR_TERMINATE(std::length_error, "The map exceeds its maximum size.");
+        }
+        
+        if(bucket_count > 0) {
+            static_assert(NeighborhoodSize - 1 > 0, "");
+            
+            // Can't directly construct with the appropriate size in the initializer 
+            // as m_buckets_data(bucket_count, alloc) is not supported by GCC 4.8
+            m_buckets_data.resize(bucket_count + NeighborhoodSize - 1);
+            m_buckets = m_buckets_data.data();
+        }
+        
+        
+        this->max_load_factor(max_load_factor);
+        
+        
+        // Check in the constructor instead of outside of a function to avoid compilation issues
+        // when value_type is not complete.
+        static_assert(std::is_nothrow_move_constructible<value_type>::value || 
+                      std::is_copy_constructible<value_type>::value, 
+                      "value_type must be either copy constructible or nothrow move constructible.");
+    }
+    
+    template<class OC = OverflowContainer, typename std::enable_if<has_key_compare<OC>::value>::type* = nullptr>
+    hopscotch_hash(size_type bucket_count, 
+                  const Hash& hash,
+                  const KeyEqual& equal,
+                  const Allocator& alloc,
+                  float max_load_factor,
+                  const typename OC::key_compare& comp) : Hash(hash),
+                                                          KeyEqual(equal),
+                                                          GrowthPolicy(bucket_count),
+                                                          m_buckets_data(alloc), 
+                                                          m_overflow_elements(comp, alloc),
+                                                          m_buckets(static_empty_bucket_ptr()),
+                                                          m_nb_elements(0)
+    {
+        
+        if(bucket_count > max_bucket_count()) {
+            TSL_HH_THROW_OR_TERMINATE(std::length_error, "The map exceeds its maximum size.");
+        }
+        
+        if(bucket_count > 0) {
+            static_assert(NeighborhoodSize - 1 > 0, "");
+            
+            // Can't directly construct with the appropriate size in the initializer 
+            // as m_buckets_data(bucket_count, alloc) is not supported by GCC 4.8
+            m_buckets_data.resize(bucket_count + NeighborhoodSize - 1);
+            m_buckets = m_buckets_data.data();
+        }
+        
+        
+        this->max_load_factor(max_load_factor);
+        
+        
+        // Check in the constructor instead of outside of a function to avoid compilation issues
+        // when value_type is not complete.
+        static_assert(std::is_nothrow_move_constructible<value_type>::value || 
+                      std::is_copy_constructible<value_type>::value, 
+                      "value_type must be either copy constructible or nothrow move constructible.");
+    }
+    
+    hopscotch_hash(const hopscotch_hash& other): 
+                          Hash(other),
+                          KeyEqual(other),
+                          GrowthPolicy(other),
+                          m_buckets_data(other.m_buckets_data),
+                          m_overflow_elements(other.m_overflow_elements),
+                          m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():
+                                                           m_buckets_data.data()),
+                          m_nb_elements(other.m_nb_elements),
+                          m_min_load_threshold_rehash(other.m_min_load_threshold_rehash),
+                          m_max_load_threshold_rehash(other.m_max_load_threshold_rehash),
+                          m_max_load_factor(other.m_max_load_factor)
+    {
+    }
+    
+    hopscotch_hash(hopscotch_hash&& other) 
+                        noexcept(
+                            std::is_nothrow_move_constructible<Hash>::value &&
+                            std::is_nothrow_move_constructible<KeyEqual>::value &&
+                            std::is_nothrow_move_constructible<GrowthPolicy>::value &&
+                            std::is_nothrow_move_constructible<buckets_container_type>::value &&
+                            std::is_nothrow_move_constructible<overflow_container_type>::value
+                        ):
+                          Hash(std::move(static_cast<Hash&>(other))),
+                          KeyEqual(std::move(static_cast<KeyEqual&>(other))),
+                          GrowthPolicy(std::move(static_cast<GrowthPolicy&>(other))),
+                          m_buckets_data(std::move(other.m_buckets_data)),
+                          m_overflow_elements(std::move(other.m_overflow_elements)),
+                          m_buckets(m_buckets_data.empty()?static_empty_bucket_ptr():
+                                                           m_buckets_data.data()),
+                          m_nb_elements(other.m_nb_elements),
+                          m_min_load_threshold_rehash(other.m_min_load_threshold_rehash),
+                          m_max_load_threshold_rehash(other.m_max_load_threshold_rehash),
+                          m_max_load_factor(other.m_max_load_factor)
+    {
+        other.GrowthPolicy::clear();
+        other.m_buckets_data.clear();
+        other.m_overflow_elements.clear();
+        other.m_buckets = static_empty_bucket_ptr();
+        other.m_nb_elements = 0;
+        other.m_min_load_threshold_rehash = 0;
+        other.m_max_load_threshold_rehash = 0;
+    }
+    
+    hopscotch_hash& operator=(const hopscotch_hash& other) {
+        if(&other != this) {
+            Hash::operator=(other);
+            KeyEqual::operator=(other);
+            GrowthPolicy::operator=(other);
+            
+            m_buckets_data = other.m_buckets_data;
+            m_overflow_elements = other.m_overflow_elements;
+            m_buckets = m_buckets_data.empty()?static_empty_bucket_ptr():
+                                               m_buckets_data.data();
+            m_nb_elements = other.m_nb_elements;
+            
+            m_min_load_threshold_rehash = other.m_min_load_threshold_rehash;
+            m_max_load_threshold_rehash = other.m_max_load_threshold_rehash;
+            m_max_load_factor = other.m_max_load_factor;
+        }
+        
+        return *this;
+    }
+    
+    hopscotch_hash& operator=(hopscotch_hash&& other) {
+        other.swap(*this);
+        other.clear();
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const {
+        return m_buckets_data.get_allocator();
+    }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept {
+        auto begin = m_buckets_data.begin();
+        while(begin != m_buckets_data.end() && begin->empty()) {
+            ++begin;
+        }
+        
+        return iterator(begin, m_buckets_data.end(), m_overflow_elements.begin());
+    }
+    
+    const_iterator begin() const noexcept {
+        return cbegin();
+    }
+    
+    const_iterator cbegin() const noexcept {
+        auto begin = m_buckets_data.cbegin();
+        while(begin != m_buckets_data.cend() && begin->empty()) {
+            ++begin;
+        }
+        
+        return const_iterator(begin, m_buckets_data.cend(), m_overflow_elements.cbegin());
+    }
+    
+    iterator end() noexcept {
+        return iterator(m_buckets_data.end(), m_buckets_data.end(), m_overflow_elements.end());
+    }
+    
+    const_iterator end() const noexcept {
+        return cend();
+    }
+    
+    const_iterator cend() const noexcept {
+        return const_iterator(m_buckets_data.cend(), m_buckets_data.cend(), m_overflow_elements.cend());
+    }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept {
+        return m_nb_elements == 0;
+    }
+    
+    size_type size() const noexcept {
+        return m_nb_elements;
+    }
+    
+    size_type max_size() const noexcept {
+        return m_buckets_data.max_size();
+    }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept {
+        for(auto& bucket: m_buckets_data) {
+            bucket.clear();
+        }
+        
+        m_overflow_elements.clear();
+        m_nb_elements = 0;
+    }
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { 
+        return insert_impl(value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    std::pair<iterator, bool> insert(P&& value) { 
+        return insert_impl(value_type(std::forward<P>(value)));
+    }
+    
+    std::pair<iterator, bool> insert(value_type&& value) { 
+        return insert_impl(std::move(value)); 
+    }
+    
+    
+    iterator insert(const_iterator hint, const value_type& value) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return insert(value).first; 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    iterator insert(const_iterator hint, P&& value) {
+        return emplace_hint(hint, std::forward<P>(value)); 
+    }
+    
+    iterator insert(const_iterator hint, value_type&& value) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return insert(std::move(value)).first; 
+    }
+    
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) {
+        if(std::is_base_of<std::forward_iterator_tag, 
+                           typename std::iterator_traits<InputIt>::iterator_category>::value) 
+        {
+            const auto nb_elements_insert = std::distance(first, last);
+            const std::size_t nb_elements_in_buckets = m_nb_elements - m_overflow_elements.size();
+            const std::size_t nb_free_buckets = m_max_load_threshold_rehash - nb_elements_in_buckets;
+            tsl_hh_assert(m_nb_elements >= m_overflow_elements.size());
+            tsl_hh_assert(m_max_load_threshold_rehash >= nb_elements_in_buckets);
+            
+            if(nb_elements_insert > 0 && nb_free_buckets < std::size_t(nb_elements_insert)) {
+                reserve(nb_elements_in_buckets + std::size_t(nb_elements_insert));
+            }
+        }
+        
+        for(; first != last; ++first) {
+            insert(*first);
+        }
+    }
+    
+    
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) { 
+        return insert_or_assign_impl(k, std::forward<M>(obj)); 
+    }
+
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) { 
+        return insert_or_assign_impl(std::move(k), std::forward<M>(obj)); 
+    }
+    
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { 
+            auto it = mutable_iterator(hint); 
+            it.value() = std::forward<M>(obj);
+            
+            return it;
+        }
+        
+        return insert_or_assign(k, std::forward<M>(obj)).first;
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) {
+            auto it = mutable_iterator(hint); 
+            it.value() = std::forward<M>(obj);
+            
+            return it;
+        }
+        
+        return insert_or_assign(std::move(k), std::forward<M>(obj)).first;
+    }
+    
+    
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) {
+        return insert(value_type(std::forward<Args>(args)...));
+    }
+    
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return insert(hint, value_type(std::forward<Args>(args)...));        
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) { 
+        return try_emplace_impl(k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
+        return try_emplace_impl(std::move(k), std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { 
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return try_emplace(k, std::forward<Args>(args)...).first;
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
+        if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { 
+            return mutable_iterator(hint); 
+        }
+        
+        return try_emplace(std::move(k), std::forward<Args>(args)...).first;
+    }
+    
+    
+    /**
+     * Here to avoid `template<class K> size_type erase(const K& key)` being used when
+     * we use an iterator instead of a const_iterator.
+     */
+    iterator erase(iterator pos) {
+        return erase(const_iterator(pos));
+    }
+    
+    iterator erase(const_iterator pos) {
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash_key(pos.key()));
+        
+        if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) {
+            auto it_bucket = m_buckets_data.begin() + std::distance(m_buckets_data.cbegin(), pos.m_buckets_iterator);
+            erase_from_bucket(*it_bucket, ibucket_for_hash);
+            
+            return ++iterator(it_bucket, m_buckets_data.end(), m_overflow_elements.begin()); 
+        }
+        else {
+            auto it_next_overflow = erase_from_overflow(pos.m_overflow_iterator, ibucket_for_hash);
+            return iterator(m_buckets_data.end(), m_buckets_data.end(), it_next_overflow);
+        }
+    }
+    
+    iterator erase(const_iterator first, const_iterator last) {
+        if(first == last) {
+            return mutable_iterator(first);
+        }
+        
+        auto to_delete = erase(first);
+        while(to_delete != last) {
+            to_delete = erase(to_delete);
+        }
+        
+        return to_delete;
+    }
+    
+    template<class K>
+    size_type erase(const K& key) {
+        return erase(key, hash_key(key));
+    }
+    
+    template<class K>
+    size_type erase(const K& key, std::size_t hash) {
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+
+        hopscotch_bucket* bucket_found = find_in_buckets(key, hash, m_buckets + ibucket_for_hash);
+        if(bucket_found != nullptr) {
+            erase_from_bucket(*bucket_found, ibucket_for_hash);
+
+            return 1;
+        }
+        
+        if(m_buckets[ibucket_for_hash].has_overflow()) {
+            auto it_overflow = find_in_overflow(key);
+            if(it_overflow != m_overflow_elements.end()) {
+                erase_from_overflow(it_overflow, ibucket_for_hash);
+                
+                return 1;
+            }
+        }
+        
+        return 0;
+    }
+    
+    void swap(hopscotch_hash& other) {
+        using std::swap;
+        
+        swap(static_cast<Hash&>(*this), static_cast<Hash&>(other));
+        swap(static_cast<KeyEqual&>(*this), static_cast<KeyEqual&>(other));
+        swap(static_cast<GrowthPolicy&>(*this), static_cast<GrowthPolicy&>(other));
+        swap(m_buckets_data, other.m_buckets_data);
+        swap(m_overflow_elements, other.m_overflow_elements);
+        swap(m_buckets, other.m_buckets);
+        swap(m_nb_elements, other.m_nb_elements);
+        swap(m_min_load_threshold_rehash, other.m_min_load_threshold_rehash);
+        swap(m_max_load_threshold_rehash, other.m_max_load_threshold_rehash);
+        swap(m_max_load_factor, other.m_max_load_factor);
+    }
+    
+    
+    /*
+     * Lookup
+     */
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& at(const K& key) {
+        return at(key, hash_key(key));
+    }
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& at(const K& key, std::size_t hash) {
+        return const_cast<typename U::value_type&>(static_cast<const hopscotch_hash*>(this)->at(key, hash));
+    }
+    
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type& at(const K& key) const {
+        return at(key, hash_key(key));
+    }
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type& at(const K& key, std::size_t hash) const {
+        using T = typename U::value_type;
+        
+        const T* value = find_value_impl(key, hash, m_buckets + bucket_for_hash(hash));
+        if(value == nullptr) {
+            TSL_HH_THROW_OR_TERMINATE(std::out_of_range, "Couldn't find key.");
+        }
+        else {
+            return *value;
+        }
+    }
+    
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type& operator[](K&& key) {
+        using T = typename U::value_type;
+        
+        const std::size_t hash = hash_key(key);
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        T* value = find_value_impl(key, hash, m_buckets + ibucket_for_hash);
+        if(value != nullptr) {
+            return *value;
+        }
+        else {
+            return insert_value(ibucket_for_hash, hash, std::piecewise_construct, 
+                                                        std::forward_as_tuple(std::forward<K>(key)), 
+                                                        std::forward_as_tuple()).first.value();
+        }
+    }
+    
+    
+    template<class K>
+    size_type count(const K& key) const {
+        return count(key, hash_key(key));
+    }
+    
+    template<class K>
+    size_type count(const K& key, std::size_t hash) const {
+        return count_impl(key, hash, m_buckets + bucket_for_hash(hash));
+    }
+    
+    
+    template<class K>
+    iterator find(const K& key) {
+        return find(key, hash_key(key));
+    }
+    
+    template<class K>
+    iterator find(const K& key, std::size_t hash) {
+        return find_impl(key, hash, m_buckets + bucket_for_hash(hash));
+    }
+    
+    
+    template<class K>
+    const_iterator find(const K& key) const {
+        return find(key, hash_key(key));
+    }
+    
+    template<class K>
+    const_iterator find(const K& key, std::size_t hash) const {
+        return find_impl(key, hash, m_buckets + bucket_for_hash(hash));
+    }
+    
+    
+    template<class K>
+    bool contains(const K& key) const {
+        return contains(key, hash_key(key));
+    }
+    
+    template<class K>
+    bool contains(const K& key, std::size_t hash) const {
+        return count(key, hash) != 0;
+    }
+    
+    
+    template<class K>
+    std::pair<iterator, iterator> equal_range(const K& key) {
+        return equal_range(key, hash_key(key));
+    }
+    
+    template<class K>
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t hash) {
+        iterator it = find(key, hash);
+        return std::make_pair(it, (it == end())?it:std::next(it));
+    }
+    
+    
+    template<class K>
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const {
+        return equal_range(key, hash_key(key));
+    }
+    
+    template<class K>
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t hash) const {
+        const_iterator it = find(key, hash);
+        return std::make_pair(it, (it == cend())?it:std::next(it));
+    }
+    
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const {
+        /*
+         * So that the last bucket can have NeighborhoodSize neighbors, the size of the bucket array is a little
+         * bigger than the real number of buckets when not empty. 
+         * We could use some of the buckets at the beginning, but it is faster this way as we avoid extra checks.
+         */
+        if(m_buckets_data.empty()) {
+            return 0;
+        }
+        
+        return m_buckets_data.size() - NeighborhoodSize + 1; 
+    }
+    
+    size_type max_bucket_count() const {
+        const std::size_t max_bucket_count = std::min(GrowthPolicy::max_bucket_count(), m_buckets_data.max_size());
+        return max_bucket_count - NeighborhoodSize + 1;
+    }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const {
+        if(bucket_count() == 0) {
+            return 0;
+        }
+        
+        return float(m_nb_elements)/float(bucket_count());
+    }
+    
+    float max_load_factor() const {
+        return m_max_load_factor;
+    }
+    
+    void max_load_factor(float ml) {
+        m_max_load_factor = std::max(0.1f, std::min(ml, 0.95f));
+        m_min_load_threshold_rehash = size_type(float(bucket_count())*MIN_LOAD_FACTOR_FOR_REHASH);
+        m_max_load_threshold_rehash = size_type(float(bucket_count())*m_max_load_factor);
+    }
+    
+    void rehash(size_type count_) {
+        count_ = std::max(count_, size_type(std::ceil(float(size())/max_load_factor())));
+        rehash_impl(count_);
+    }
+    
+    void reserve(size_type count_) {
+        rehash(size_type(std::ceil(float(count_)/max_load_factor())));
+    }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const {
+        return static_cast<const Hash&>(*this);
+    }
+    
+    key_equal key_eq() const {
+        return static_cast<const KeyEqual&>(*this);
+    }
+    
+    /*
+     * Other
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) {
+            // Get a non-const iterator
+            auto it = m_buckets_data.begin() + std::distance(m_buckets_data.cbegin(), pos.m_buckets_iterator);
+            return iterator(it, m_buckets_data.end(), m_overflow_elements.begin());
+        }
+        else {
+            // Get a non-const iterator
+            auto it = mutable_overflow_iterator(pos.m_overflow_iterator);
+            return iterator(m_buckets_data.end(), m_buckets_data.end(), it);
+        }
+    }
+    
+    size_type overflow_size() const noexcept {
+        return m_overflow_elements.size();
+    }
+    
+    template<class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    typename U::key_compare key_comp() const {
+        return m_overflow_elements.key_comp();
+    }
+    
+    
+private:
+    template<class K>
+    std::size_t hash_key(const K& key) const {
+        return Hash::operator()(key);
+    }
+    
+    template<class K1, class K2>
+    bool compare_keys(const K1& key1, const K2& key2) const {
+        return KeyEqual::operator()(key1, key2);
+    }
+    
+    std::size_t bucket_for_hash(std::size_t hash) const {
+        const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash);
+        tsl_hh_assert(bucket < m_buckets_data.size() || (bucket == 0 && m_buckets_data.empty()));
+        
+        return bucket;
+    }
+    
+    template<typename U = value_type, 
+             typename std::enable_if<std::is_nothrow_move_constructible<U>::value>::type* = nullptr>
+    void rehash_impl(size_type count_) {
+        hopscotch_hash new_map = new_hopscotch_hash(count_);
+        
+        if(!m_overflow_elements.empty()) {
+            new_map.m_overflow_elements.swap(m_overflow_elements);
+            new_map.m_nb_elements += new_map.m_overflow_elements.size();
+            
+            for(const value_type& value : new_map.m_overflow_elements) {
+                const std::size_t ibucket_for_hash = new_map.bucket_for_hash(new_map.hash_key(KeySelect()(value)));
+                new_map.m_buckets[ibucket_for_hash].set_overflow(true);
+            }
+        }
+        
+#ifndef TSL_HH_NO_EXCEPTIONS
+        try {
+#endif
+            const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_map.bucket_count());
+            for(auto it_bucket = m_buckets_data.begin(); it_bucket != m_buckets_data.end(); ++it_bucket) {
+                if(it_bucket->empty()) {
+                    continue;
+                }
+                
+                const std::size_t hash = use_stored_hash?
+                                            it_bucket->truncated_bucket_hash():
+                                            new_map.hash_key(KeySelect()(it_bucket->value()));
+                const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash);
+                
+                new_map.insert_value(ibucket_for_hash, hash, std::move(it_bucket->value()));
+                
+                
+                erase_from_bucket(*it_bucket, bucket_for_hash(hash));
+            }
+#ifndef TSL_HH_NO_EXCEPTIONS
+        }
+        /*
+         * The call to insert_value may throw an exception if an element is added to the overflow
+         * list and the memory allocation fails. Rollback the elements in this case.
+         */
+        catch(...) {
+            m_overflow_elements.swap(new_map.m_overflow_elements);
+            
+            const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_map.bucket_count());
+            for(auto it_bucket = new_map.m_buckets_data.begin(); it_bucket != new_map.m_buckets_data.end(); ++it_bucket) {
+                if(it_bucket->empty()) {
+                    continue;
+                }
+                
+                const std::size_t hash = use_stored_hash?
+                                            it_bucket->truncated_bucket_hash():
+                                            hash_key(KeySelect()(it_bucket->value()));
+                const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+                
+                // The elements we insert were not in the overflow list before the switch.
+                // They will not be go in the overflow list if we rollback the switch.
+                insert_value(ibucket_for_hash, hash, std::move(it_bucket->value()));
+            }
+            
+            throw;
+        }
+#endif
+        
+        new_map.swap(*this);
+    }
+    
+    template<typename U = value_type, 
+             typename std::enable_if<std::is_copy_constructible<U>::value && 
+                                     !std::is_nothrow_move_constructible<U>::value>::type* = nullptr>
+    void rehash_impl(size_type count_) {
+        hopscotch_hash new_map = new_hopscotch_hash(count_);
+                
+        const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_map.bucket_count());
+        for(const hopscotch_bucket& bucket: m_buckets_data) {
+            if(bucket.empty()) {
+                continue;
+            }
+            
+            const std::size_t hash = use_stored_hash?
+                                         bucket.truncated_bucket_hash():
+                                         new_map.hash_key(KeySelect()(bucket.value()));
+            const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash);
+            
+            new_map.insert_value(ibucket_for_hash, hash, bucket.value());
+        }
+        
+        for(const value_type& value: m_overflow_elements) {
+            const std::size_t hash = new_map.hash_key(KeySelect()(value));
+            const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash);
+            
+            new_map.insert_value(ibucket_for_hash, hash, value);
+        }
+            
+        new_map.swap(*this);
+    }
+    
+#ifdef TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR
+    iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) {
+        return std::next(m_overflow_elements.begin(), std::distance(m_overflow_elements.cbegin(), it));        
+    }
+#else            
+    iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) {
+        return m_overflow_elements.erase(it, it);       
+    }
+#endif    
+
+    // iterator is in overflow list
+    iterator_overflow erase_from_overflow(const_iterator_overflow pos, std::size_t ibucket_for_hash) {
+#ifdef TSL_HH_NO_RANGE_ERASE_WITH_CONST_ITERATOR        
+        auto it_next = m_overflow_elements.erase(mutable_overflow_iterator(pos));
+#else
+        auto it_next = m_overflow_elements.erase(pos);
+#endif
+        m_nb_elements--;
+        
+        
+        // Check if we can remove the overflow flag
+        tsl_hh_assert(m_buckets[ibucket_for_hash].has_overflow());
+        for(const value_type& value: m_overflow_elements) {
+            const std::size_t bucket_for_value = bucket_for_hash(hash_key(KeySelect()(value)));
+            if(bucket_for_value == ibucket_for_hash) {
+                return it_next;
+            }
+        }
+        
+        m_buckets[ibucket_for_hash].set_overflow(false);
+        return it_next;
+    }
+    
+
+    /**
+     * bucket_for_value is the bucket in which the value is.
+     * ibucket_for_hash is the bucket where the value belongs.
+     */
+    void erase_from_bucket(hopscotch_bucket& bucket_for_value, std::size_t ibucket_for_hash) noexcept {
+        const std::size_t ibucket_for_value = std::distance(m_buckets_data.data(), &bucket_for_value);
+        tsl_hh_assert(ibucket_for_value >= ibucket_for_hash);
+        
+        bucket_for_value.remove_value();
+        m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_for_value - ibucket_for_hash);
+        m_nb_elements--;
+    }
+    
+
+    
+    template<class K, class M>
+    std::pair<iterator, bool> insert_or_assign_impl(K&& key, M&& obj) {
+        auto it = try_emplace_impl(std::forward<K>(key), std::forward<M>(obj));
+        if(!it.second) {
+            it.first.value() = std::forward<M>(obj);
+        }
+        
+        return it;
+    }
+    
+    template<typename P, class... Args>
+    std::pair<iterator, bool> try_emplace_impl(P&& key, Args&&... args_value) {
+        const std::size_t hash = hash_key(key);
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        // Check if already presents
+        auto it_find = find_impl(key, hash, m_buckets + ibucket_for_hash);
+        if(it_find != end()) {
+            return std::make_pair(it_find, false);
+        }
+        
+        return insert_value(ibucket_for_hash, hash, std::piecewise_construct, 
+                                                    std::forward_as_tuple(std::forward<P>(key)), 
+                                                    std::forward_as_tuple(std::forward<Args>(args_value)...));
+    }
+    
+    template<typename P>
+    std::pair<iterator, bool> insert_impl(P&& value) {
+        const std::size_t hash = hash_key(KeySelect()(value));
+        const std::size_t ibucket_for_hash = bucket_for_hash(hash);
+        
+        // Check if already presents
+        auto it_find = find_impl(KeySelect()(value), hash, m_buckets + ibucket_for_hash);
+        if(it_find != end()) {
+            return std::make_pair(it_find, false);
+        }
+        
+        
+        return insert_value(ibucket_for_hash, hash, std::forward<P>(value));
+    }
+    
+    template<typename... Args>
+    std::pair<iterator, bool> insert_value(std::size_t ibucket_for_hash, std::size_t hash, Args&&... value_type_args) {
+        if((m_nb_elements - m_overflow_elements.size()) >= m_max_load_threshold_rehash) {
+            rehash(GrowthPolicy::next_bucket_count());
+            ibucket_for_hash = bucket_for_hash(hash);
+        }
+        
+        std::size_t ibucket_empty = find_empty_bucket(ibucket_for_hash);
+        if(ibucket_empty < m_buckets_data.size()) {
+            do {
+                tsl_hh_assert(ibucket_empty >= ibucket_for_hash);
+                
+                // Empty bucket is in range of NeighborhoodSize, use it
+                if(ibucket_empty - ibucket_for_hash < NeighborhoodSize) {
+                    auto it = insert_in_bucket(ibucket_empty, ibucket_for_hash, 
+                                               hash, std::forward<Args>(value_type_args)...);
+                    return std::make_pair(iterator(it, m_buckets_data.end(), m_overflow_elements.begin()), true);
+                }
+            }
+            // else, try to swap values to get a closer empty bucket
+            while(swap_empty_bucket_closer(ibucket_empty));
+        }
+            
+        // Load factor is too low or a rehash will not change the neighborhood, put the value in overflow list
+        if(size() < m_min_load_threshold_rehash || !will_neighborhood_change_on_rehash(ibucket_for_hash)) {
+            auto it = insert_in_overflow(ibucket_for_hash, std::forward<Args>(value_type_args)...);
+            return std::make_pair(iterator(m_buckets_data.end(), m_buckets_data.end(), it), true);
+        }
+    
+        rehash(GrowthPolicy::next_bucket_count());
+        ibucket_for_hash = bucket_for_hash(hash);
+        
+        return insert_value(ibucket_for_hash, hash, std::forward<Args>(value_type_args)...);
+    }    
+    
+    /*
+     * Return true if a rehash will change the position of a key-value in the neighborhood of 
+     * ibucket_neighborhood_check. In this case a rehash is needed instead of puting the value in overflow list.
+     */
+    bool will_neighborhood_change_on_rehash(size_t ibucket_neighborhood_check) const {
+        std::size_t expand_bucket_count = GrowthPolicy::next_bucket_count();
+        GrowthPolicy expand_growth_policy(expand_bucket_count);
+        
+        const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(expand_bucket_count);
+        for(size_t ibucket = ibucket_neighborhood_check; 
+            ibucket < m_buckets_data.size() && (ibucket - ibucket_neighborhood_check) < NeighborhoodSize; 
+            ++ibucket)
+        {
+            tsl_hh_assert(!m_buckets[ibucket].empty());
+            
+            const size_t hash = use_stored_hash?
+                                    m_buckets[ibucket].truncated_bucket_hash():
+                                    hash_key(KeySelect()(m_buckets[ibucket].value()));
+            if(bucket_for_hash(hash) != expand_growth_policy.bucket_for_hash(hash)) {
+                return true;
+            }
+        }
+        
+        return false;
+    }
+    
+    /*
+     * Return the index of an empty bucket in m_buckets_data.
+     * If none, the returned index equals m_buckets_data.size()
+     */
+    std::size_t find_empty_bucket(std::size_t ibucket_start) const {
+        const std::size_t limit = std::min(ibucket_start + MAX_PROBES_FOR_EMPTY_BUCKET, m_buckets_data.size());
+        for(; ibucket_start < limit; ibucket_start++) {
+            if(m_buckets[ibucket_start].empty()) {
+                return ibucket_start;
+            }
+        }
+        
+        return m_buckets_data.size();
+    }
+    
+    /*
+     * Insert value in ibucket_empty where value originally belongs to ibucket_for_hash
+     * 
+     * Return bucket iterator to ibucket_empty
+     */
+    template<typename... Args>
+    iterator_buckets insert_in_bucket(std::size_t ibucket_empty, std::size_t ibucket_for_hash,
+                                      std::size_t hash, Args&&... value_type_args) 
+    {
+        tsl_hh_assert(ibucket_empty >= ibucket_for_hash );
+        tsl_hh_assert(m_buckets[ibucket_empty].empty());
+        m_buckets[ibucket_empty].set_value_of_empty_bucket(hopscotch_bucket::truncate_hash(hash), std::forward<Args>(value_type_args)...);
+        
+        tsl_hh_assert(!m_buckets[ibucket_for_hash].empty());
+        m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_empty - ibucket_for_hash);
+        m_nb_elements++;
+        
+        return m_buckets_data.begin() + ibucket_empty;
+    }
+    
+    template<class... Args, class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, Args&&... value_type_args) {
+        auto it = m_overflow_elements.emplace(m_overflow_elements.end(), std::forward<Args>(value_type_args)...);
+        
+        m_buckets[ibucket_for_hash].set_overflow(true);
+        m_nb_elements++;
+            
+        return it;
+    }
+    
+    template<class... Args, class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, Args&&... value_type_args) {
+        auto it = m_overflow_elements.emplace(std::forward<Args>(value_type_args)...).first;
+        
+        m_buckets[ibucket_for_hash].set_overflow(true);
+        m_nb_elements++;
+        
+        return it;
+    }
+    
+    /*
+     * Try to swap the bucket ibucket_empty_in_out with a bucket preceding it while keeping the neighborhood 
+     * conditions correct.
+     * 
+     * If a swap was possible, the position of ibucket_empty_in_out will be closer to 0 and true will re returned.
+     */
+    bool swap_empty_bucket_closer(std::size_t& ibucket_empty_in_out) {
+        tsl_hh_assert(ibucket_empty_in_out >= NeighborhoodSize);
+        const std::size_t neighborhood_start = ibucket_empty_in_out - NeighborhoodSize + 1;
+        
+        for(std::size_t to_check = neighborhood_start; to_check < ibucket_empty_in_out; to_check++) {
+            neighborhood_bitmap neighborhood_infos = m_buckets[to_check].neighborhood_infos();
+            std::size_t to_swap = to_check;
+            
+            while(neighborhood_infos != 0 && to_swap < ibucket_empty_in_out) {
+                if((neighborhood_infos & 1) == 1) {
+                    tsl_hh_assert(m_buckets[ibucket_empty_in_out].empty());
+                    tsl_hh_assert(!m_buckets[to_swap].empty());
+                    
+                    m_buckets[to_swap].swap_value_into_empty_bucket(m_buckets[ibucket_empty_in_out]);
+                    
+                    tsl_hh_assert(!m_buckets[to_check].check_neighbor_presence(ibucket_empty_in_out - to_check));
+                    tsl_hh_assert(m_buckets[to_check].check_neighbor_presence(to_swap - to_check));
+                    
+                    m_buckets[to_check].toggle_neighbor_presence(ibucket_empty_in_out - to_check);
+                    m_buckets[to_check].toggle_neighbor_presence(to_swap - to_check);
+                    
+                    
+                    ibucket_empty_in_out = to_swap;
+                    
+                    return true;
+                }
+                
+                to_swap++;
+                neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1);
+            }
+        }
+        
+        return false;
+    }
+    
+    
+    
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    typename U::value_type* find_value_impl(const K& key, std::size_t hash, hopscotch_bucket* bucket_for_hash) {
+        return const_cast<typename U::value_type*>(
+                    static_cast<const hopscotch_hash*>(this)->find_value_impl(key, hash, bucket_for_hash));
+    }
+    
+    /*
+     * Avoid the creation of an iterator to just get the value for operator[] and at() in maps. Faster this way.
+     *
+     * Return null if no value for the key (TODO use std::optional when available).
+     */
+    template<class K, class U = ValueSelect, typename std::enable_if<has_mapped_type<U>::value>::type* = nullptr>
+    const typename U::value_type* find_value_impl(const K& key, std::size_t hash, 
+                                                  const hopscotch_bucket* bucket_for_hash) const 
+    {
+        const hopscotch_bucket* bucket_found = find_in_buckets(key, hash, bucket_for_hash);
+        if(bucket_found != nullptr) {
+            return std::addressof(ValueSelect()(bucket_found->value()));
+        }
+        
+        if(bucket_for_hash->has_overflow()) {
+            auto it_overflow = find_in_overflow(key);
+            if(it_overflow != m_overflow_elements.end()) {
+                return std::addressof(ValueSelect()(*it_overflow));
+            }
+        }
+        
+        return nullptr;
+    }
+    
+    template<class K>
+    size_type count_impl(const K& key, std::size_t hash, const hopscotch_bucket* bucket_for_hash) const {
+        if(find_in_buckets(key, hash, bucket_for_hash) != nullptr) {
+            return 1;
+        }
+        else if(bucket_for_hash->has_overflow() && find_in_overflow(key) != m_overflow_elements.cend()) {
+            return 1;
+        }
+        else {
+            return 0;
+        }
+    }
+    
+    template<class K>
+    iterator find_impl(const K& key, std::size_t hash, hopscotch_bucket* bucket_for_hash) {
+        hopscotch_bucket* bucket_found = find_in_buckets(key, hash, bucket_for_hash);
+        if(bucket_found != nullptr) {
+            return iterator(m_buckets_data.begin() + std::distance(m_buckets_data.data(), bucket_found), 
+                            m_buckets_data.end(), m_overflow_elements.begin());
+        }
+        
+        if(!bucket_for_hash->has_overflow()) {
+            return end();
+        }
+        
+        return iterator(m_buckets_data.end(), m_buckets_data.end(), find_in_overflow(key));
+    }
+    
+    template<class K>
+    const_iterator find_impl(const K& key, std::size_t hash, const hopscotch_bucket* bucket_for_hash) const {
+        const hopscotch_bucket* bucket_found = find_in_buckets(key, hash, bucket_for_hash);
+        if(bucket_found != nullptr) {
+            return const_iterator(m_buckets_data.cbegin() + std::distance(m_buckets_data.data(), bucket_found), 
+                                  m_buckets_data.cend(), m_overflow_elements.cbegin());
+        }
+        
+        if(!bucket_for_hash->has_overflow()) {
+            return cend();
+        }
+
+        
+        return const_iterator(m_buckets_data.cend(), m_buckets_data.cend(), find_in_overflow(key));
+    }
+    
+    template<class K>
+    hopscotch_bucket* find_in_buckets(const K& key, std::size_t hash, hopscotch_bucket* bucket_for_hash) {   
+        const hopscotch_bucket* bucket_found = 
+                                    static_cast<const hopscotch_hash*>(this)->find_in_buckets(key, hash, bucket_for_hash); 
+        return const_cast<hopscotch_bucket*>(bucket_found);
+    }
+
+    
+    /**
+     * Return a pointer to the bucket which has the value, nullptr otherwise.
+     */
+    template<class K>
+    const hopscotch_bucket* find_in_buckets(const K& key, std::size_t hash, const hopscotch_bucket* bucket_for_hash) const {      
+        (void) hash; // Avoid warning of unused variable when StoreHash is false;
+
+        // TODO Try to optimize the function. 
+        // I tried to use ffs and  __builtin_ffs functions but I could not reduce the time the function
+        // takes with -march=native
+        
+        neighborhood_bitmap neighborhood_infos = bucket_for_hash->neighborhood_infos();
+        while(neighborhood_infos != 0) {
+            if((neighborhood_infos & 1) == 1) {
+                // Check StoreHash before calling bucket_hash_equal. Functionally it doesn't change anythin. 
+                // If StoreHash is false, bucket_hash_equal is a no-op. Avoiding the call is there to help 
+                // GCC optimizes `hash` parameter away, it seems to not be able to do without this hint.
+                if((!StoreHash || bucket_for_hash->bucket_hash_equal(hash)) && 
+                    compare_keys(KeySelect()(bucket_for_hash->value()), key)) 
+                {
+                    return bucket_for_hash;
+                }
+            }
+            
+            ++bucket_for_hash;
+            neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1);
+        }
+        
+        return nullptr;
+    }
+    
+
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow find_in_overflow(const K& key) {
+        return std::find_if(m_overflow_elements.begin(), m_overflow_elements.end(), 
+                            [&](const value_type& value) { 
+                                return compare_keys(key, KeySelect()(value)); 
+                            });
+    }
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    const_iterator_overflow find_in_overflow(const K& key) const {
+        return std::find_if(m_overflow_elements.cbegin(), m_overflow_elements.cend(), 
+                            [&](const value_type& value) { 
+                                return compare_keys(key, KeySelect()(value)); 
+                            });
+    }
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    iterator_overflow find_in_overflow(const K& key) {
+        return m_overflow_elements.find(key);
+    }
+    
+    template<class K, class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    const_iterator_overflow find_in_overflow(const K& key) const {
+        return m_overflow_elements.find(key);
+    }
+    
+    
+    
+    template<class U = OverflowContainer, typename std::enable_if<!has_key_compare<U>::value>::type* = nullptr>
+    hopscotch_hash new_hopscotch_hash(size_type bucket_count) {
+        return hopscotch_hash(bucket_count, static_cast<Hash&>(*this), static_cast<KeyEqual&>(*this), 
+                              get_allocator(), m_max_load_factor);
+    }
+    
+    template<class U = OverflowContainer, typename std::enable_if<has_key_compare<U>::value>::type* = nullptr>
+    hopscotch_hash new_hopscotch_hash(size_type bucket_count) {
+        return hopscotch_hash(bucket_count, static_cast<Hash&>(*this), static_cast<KeyEqual&>(*this), 
+                              get_allocator(), m_max_load_factor, m_overflow_elements.key_comp());
+    }
+    
+public:    
+    static const size_type DEFAULT_INIT_BUCKETS_SIZE = 0;
+    static constexpr float DEFAULT_MAX_LOAD_FACTOR = (NeighborhoodSize <= 30)?0.8f:0.9f;
+    
+private:    
+    static const std::size_t MAX_PROBES_FOR_EMPTY_BUCKET = 12*NeighborhoodSize;
+    static constexpr float MIN_LOAD_FACTOR_FOR_REHASH = 0.1f;
+    
+    /**
+     * We can only use the hash on rehash if the size of the hash type is the same as the stored one or
+     * if we use a power of two modulo. In the case of the power of two modulo, we just mask
+     * the least significant bytes, we just have to check that the truncated_hash_type didn't truncated
+     * too much bytes.
+     */
+    template<class T = size_type, typename std::enable_if<std::is_same<T, truncated_hash_type>::value>::type* = nullptr>
+    static bool USE_STORED_HASH_ON_REHASH(size_type /*bucket_count*/) {
+        return StoreHash;
+    }
+    
+    template<class T = size_type, typename std::enable_if<!std::is_same<T, truncated_hash_type>::value>::type* = nullptr>
+    static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) {
+        (void) bucket_count;
+        if(StoreHash && is_power_of_two_policy<GrowthPolicy>::value) {
+            tsl_hh_assert(bucket_count > 0);
+            return (bucket_count - 1) <= std::numeric_limits<truncated_hash_type>::max();
+        }
+        else {
+            return false;   
+        }
+    }
+    
+    /**
+     * Return an always valid pointer to an static empty hopscotch_bucket.
+     */            
+    hopscotch_bucket* static_empty_bucket_ptr() {
+        static hopscotch_bucket empty_bucket;
+        return &empty_bucket;
+    }
+    
+private:    
+    buckets_container_type m_buckets_data;
+    overflow_container_type m_overflow_elements;
+    
+    /**
+     * Points to m_buckets_data.data() if !m_buckets_data.empty() otherwise points to static_empty_bucket_ptr.
+     * This variable is useful to avoid the cost of checking if m_buckets_data is empty when trying 
+     * to find an element.
+     * 
+     * TODO Remove m_buckets_data and only use a pointer+size instead of a pointer+vector to save some space in the hopscotch_hash object.
+     */
+    hopscotch_bucket* m_buckets;
+    
+    size_type m_nb_elements;
+    
+    /**
+     * Min size of the hash table before a rehash can occurs automatically (except if m_max_load_threshold_rehash os reached).
+     * If the neighborhood of a bucket is full before the min is reacher, the elements are put into m_overflow_elements.
+     */
+    size_type m_min_load_threshold_rehash;
+    
+    /**
+     * Max size of the hash table before a rehash occurs automatically to grow the table.
+     */
+    size_type m_max_load_threshold_rehash;
+    
+    float m_max_load_factor;
+};
+
+} // end namespace detail_hopscotch_hash
+
+
+} // end namespace tsl
+
+#endif
diff --git a/benchmarks/others/hopscotch_map.h b/benchmarks/others/hopscotch_map.h
new file mode 100644
index 00000000..f9fa41f0
--- /dev/null
+++ b/benchmarks/others/hopscotch_map.h
@@ -0,0 +1,710 @@
+/**
+ * MIT License
+ * 
+ * Copyright (c) 2017 Thibaut Goetghebuer-Planchon <[email protected]>
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TSL_HOPSCOTCH_MAP_H
+#define TSL_HOPSCOTCH_MAP_H
+
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <initializer_list>
+#include <list>
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include "hopscotch_hash.h"
+
+
+namespace tsl {
+
+/**
+ * Implementation of a hash map using the hopscotch hashing algorithm.
+ * 
+ * The Key and the value T must be either nothrow move-constructible, copy-constructible or both.
+ * 
+ * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false.
+ * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting
+ * the NeighborhoodSize to <= 30. There is no memory usage difference between 
+ * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'.
+ * 
+ * Storing the hash may improve performance on insert during the rehash process if the hash takes time
+ * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss).
+ * If used with simple Hash and KeyEqual it may slow things down.
+ * 
+ * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy.
+ * 
+ * GrowthPolicy defines how the map grows and consequently how a hash value is mapped to a bucket. 
+ * By default the map uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets 
+ * to a power of two and uses a mask to map the hash to a bucket instead of the slow modulo.
+ * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface.
+ * 
+ * If the destructors of Key or T throw an exception, behaviour of the class is undefined.
+ * 
+ * Iterators invalidation:
+ *  - clear, operator=, reserve, rehash: always invalidate the iterators.
+ *  - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators 
+ *    if a displacement is needed to resolve a collision (which mean that most of the time, 
+ *    insert will invalidate the iterators). Or if there is a rehash.
+ *  - erase: iterator on the erased element is the only one which become invalid.
+ */
+template<class Key, 
+         class T, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Allocator = std::allocator<std::pair<Key, T>>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false,
+         class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>>
+class hopscotch_map {
+private:    
+    template<typename U>
+    using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent<U>;
+    
+    class KeySelect {
+    public:
+        using key_type = Key;
+        
+        const key_type& operator()(const std::pair<Key, T>& key_value) const {
+            return key_value.first;
+        }
+        
+        key_type& operator()(std::pair<Key, T>& key_value) {
+            return key_value.first;
+        }
+    };  
+    
+    class ValueSelect {
+    public:
+        using value_type = T;
+        
+        const value_type& operator()(const std::pair<Key, T>& key_value) const {
+            return key_value.second;
+        }
+        
+        value_type& operator()(std::pair<Key, T>& key_value) {
+            return key_value.second;
+        }
+    };
+    
+    
+    using overflow_container_type = std::list<std::pair<Key, T>, Allocator>;
+    using ht = detail_hopscotch_hash::hopscotch_hash<std::pair<Key, T>, KeySelect, ValueSelect,
+                                                     Hash, KeyEqual, 
+                                                     Allocator, NeighborhoodSize, 
+                                                     StoreHash, GrowthPolicy,
+                                                     overflow_container_type>;
+    
+public:
+    using key_type = typename ht::key_type;
+    using mapped_type = T;
+    using value_type = typename ht::value_type;
+    using size_type = typename ht::size_type;
+    using difference_type = typename ht::difference_type;
+    using hasher = typename ht::hasher;
+    using key_equal = typename ht::key_equal;
+    using allocator_type = typename ht::allocator_type;
+    using reference = typename ht::reference;
+    using const_reference = typename ht::const_reference;
+    using pointer = typename ht::pointer;
+    using const_pointer = typename ht::const_pointer;
+    using iterator = typename ht::iterator;
+    using const_iterator = typename ht::const_iterator;
+    
+    
+    
+    /*
+     * Constructors
+     */
+    hopscotch_map() : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE) {
+    }
+    
+    explicit hopscotch_map(size_type bucket_count, 
+                        const Hash& hash = Hash(),
+                        const KeyEqual& equal = KeyEqual(),
+                        const Allocator& alloc = Allocator()) : 
+                        m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR)
+    {
+    }
+    
+    hopscotch_map(size_type bucket_count,
+                  const Allocator& alloc) : hopscotch_map(bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    hopscotch_map(size_type bucket_count,
+                  const Hash& hash,
+                  const Allocator& alloc) : hopscotch_map(bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+    
+    explicit hopscotch_map(const Allocator& alloc) : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) {
+    }
+    
+    template<class InputIt>
+    hopscotch_map(InputIt first, InputIt last,
+                size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                const Hash& hash = Hash(),
+                const KeyEqual& equal = KeyEqual(),
+                const Allocator& alloc = Allocator()) : hopscotch_map(bucket_count, hash, equal, alloc)
+    {
+        insert(first, last);
+    }
+    
+    template<class InputIt>
+    hopscotch_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Allocator& alloc) : hopscotch_map(first, last, bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+    
+    template<class InputIt>
+    hopscotch_map(InputIt first, InputIt last,
+                size_type bucket_count,
+                const Hash& hash,
+                const Allocator& alloc) : hopscotch_map(first, last, bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_map(std::initializer_list<value_type> init,
+                    size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE,
+                    const Hash& hash = Hash(),
+                    const KeyEqual& equal = KeyEqual(),
+                    const Allocator& alloc = Allocator()) : 
+                    hopscotch_map(init.begin(), init.end(), bucket_count, hash, equal, alloc)
+    {
+    }
+
+    hopscotch_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Allocator& alloc) : 
+                    hopscotch_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc)
+    {
+    }
+
+    hopscotch_map(std::initializer_list<value_type> init,
+                    size_type bucket_count,
+                    const Hash& hash,
+                    const Allocator& alloc) : 
+                    hopscotch_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc)
+    {
+    }
+
+    
+    hopscotch_map& operator=(std::initializer_list<value_type> ilist) {
+        m_ht.clear();
+        
+        m_ht.reserve(ilist.size());
+        m_ht.insert(ilist.begin(), ilist.end());
+        
+        return *this;
+    }
+    
+    allocator_type get_allocator() const { return m_ht.get_allocator(); }
+    
+    
+    /*
+     * Iterators
+     */
+    iterator begin() noexcept { return m_ht.begin(); }
+    const_iterator begin() const noexcept { return m_ht.begin(); }
+    const_iterator cbegin() const noexcept { return m_ht.cbegin(); }
+    
+    iterator end() noexcept { return m_ht.end(); }
+    const_iterator end() const noexcept { return m_ht.end(); }
+    const_iterator cend() const noexcept { return m_ht.cend(); }
+    
+    
+    /*
+     * Capacity
+     */
+    bool empty() const noexcept { return m_ht.empty(); }
+    size_type size() const noexcept { return m_ht.size(); }
+    size_type max_size() const noexcept { return m_ht.max_size(); }
+    
+    /*
+     * Modifiers
+     */
+    void clear() noexcept { m_ht.clear(); }
+    
+    
+    
+    
+    std::pair<iterator, bool> insert(const value_type& value) { 
+        return m_ht.insert(value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    std::pair<iterator, bool> insert(P&& value) { 
+        return m_ht.insert(std::forward<P>(value)); 
+    }
+    
+    std::pair<iterator, bool> insert(value_type&& value) { 
+        return m_ht.insert(std::move(value)); 
+    }
+    
+    
+    iterator insert(const_iterator hint, const value_type& value) { 
+        return m_ht.insert(hint, value); 
+    }
+        
+    template<class P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type* = nullptr>
+    iterator insert(const_iterator hint, P&& value) { 
+        return m_ht.insert(hint, std::forward<P>(value));
+    }
+    
+    iterator insert(const_iterator hint, value_type&& value) { 
+        return m_ht.insert(hint, std::move(value)); 
+    }
+    
+    
+    template<class InputIt>
+    void insert(InputIt first, InputIt last) { 
+        m_ht.insert(first, last); 
+    }
+    
+    void insert(std::initializer_list<value_type> ilist) { 
+        m_ht.insert(ilist.begin(), ilist.end()); 
+    }
+
+    
+    
+    
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj) { 
+        return m_ht.insert_or_assign(k, std::forward<M>(obj)); 
+    }
+
+    template<class M>
+    std::pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj) { 
+        return m_ht.insert_or_assign(std::move(k), std::forward<M>(obj)); 
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, k, std::forward<M>(obj));
+    }
+    
+    template<class M>
+    iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) {
+        return m_ht.insert_or_assign(hint, std::move(k), std::forward<M>(obj));
+    }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace will need to move or copy the key-value once.
+     * The method is equivalent to insert(value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) { 
+        return m_ht.emplace(std::forward<Args>(args)...); 
+    }
+    
+    
+    
+    
+    /**
+     * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once.
+     * The method is equivalent to insert(hint, value_type(std::forward<Args>(args)...));
+     * 
+     * Mainly here for compatibility with the std::unordered_map interface.
+     */
+    template<class... Args>
+    iterator emplace_hint(const_iterator hint, Args&&... args) {
+        return m_ht.emplace_hint(hint, std::forward<Args>(args)...);
+    }
+    
+    
+    
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& k, Args&&... args) { 
+        return m_ht.try_emplace(k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(std::move(k), std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) {
+        return m_ht.try_emplace(hint, k, std::forward<Args>(args)...);
+    }
+    
+    template<class... Args>
+    iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) {
+        return m_ht.try_emplace(hint, std::move(k), std::forward<Args>(args)...);
+    }
+    
+    
+
+    
+    iterator erase(iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator pos) { return m_ht.erase(pos); }
+    iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); }
+    size_type erase(const key_type& key) { return m_ht.erase(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup to the value if you already have the hash.
+     */    
+    size_type erase(const key_type& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key) { return m_ht.erase(key); }
+    
+    /**
+     * @copydoc erase(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup to the value if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type erase(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.erase(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    void swap(hopscotch_map& other) { other.m_ht.swap(m_ht); }
+    
+    /*
+     * Lookup
+     */
+    T& at(const Key& key) { return m_ht.at(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */
+    T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    
+    const T& at(const Key& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const Key& key, std::size_t precalculated_hash)
+     */
+    const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    T& at(const K& key) { return m_ht.at(key); }
+
+    /**
+     * @copydoc at(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); }
+    
+    
+    /**
+     * @copydoc at(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const T& at(const K& key) const { return m_ht.at(key); }
+    
+    /**
+     * @copydoc at(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); }
+    
+    
+    
+    
+    T& operator[](const Key& key) { return m_ht[key]; }    
+    T& operator[](Key&& key) { return m_ht[std::move(key)]; }
+    
+    
+    
+    
+    size_type count(const Key& key) const { return m_ht.count(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */
+    size_type count(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.count(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key) const { return m_ht.count(key); }
+    
+    /**
+     * @copydoc count(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */     
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); }
+    
+    
+    
+    
+    iterator find(const Key& key) { return m_ht.find(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */
+    iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    const_iterator find(const Key& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const Key& key, std::size_t precalculated_hash)
+     */
+    const_iterator find(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.find(key, precalculated_hash);
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key) { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); }
+    
+    /**
+     * @copydoc find(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key) const { return m_ht.find(key); }
+    
+    /**
+     * @copydoc find(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    const_iterator find(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.find(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    bool contains(const Key& key) const { return m_ht.contains(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */
+    bool contains(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.contains(key, precalculated_hash); 
+    }
+    
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    bool contains(const K& key) const { return m_ht.contains(key); }
+    
+    /**
+     * @copydoc contains(const K& key) const
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    bool contains(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.contains(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    std::pair<iterator, iterator> equal_range(const Key& key) { return m_ht.equal_range(key); }
+    
+    /**
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */
+    std::pair<iterator, iterator> equal_range(const Key& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const Key& key, std::size_t precalculated_hash)
+     */
+    std::pair<const_iterator, const_iterator> equal_range(const Key& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+
+    /**
+     * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. 
+     * If so, K must be hashable and comparable to Key.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key) { return m_ht.equal_range(key); }
+    
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     * 
+     * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same
+     * as hash_function()(key). Useful to speed-up the lookup if you already have the hash.
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<iterator, iterator> equal_range(const K& key, std::size_t precalculated_hash) { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    /**
+     * @copydoc equal_range(const K& key)
+     */
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key) const { return m_ht.equal_range(key); }
+    
+    /**
+     * @copydoc equal_range(const K& key, std::size_t precalculated_hash)
+     */    
+    template<class K, class KE = KeyEqual, typename std::enable_if<has_is_transparent<KE>::value>::type* = nullptr> 
+    std::pair<const_iterator, const_iterator> equal_range(const K& key, std::size_t precalculated_hash) const { 
+        return m_ht.equal_range(key, precalculated_hash); 
+    }
+    
+    
+    
+    
+    /*
+     * Bucket interface 
+     */
+    size_type bucket_count() const { return m_ht.bucket_count(); }
+    size_type max_bucket_count() const { return m_ht.max_bucket_count(); }
+    
+    
+    /*
+     *  Hash policy 
+     */
+    float load_factor() const { return m_ht.load_factor(); }
+    float max_load_factor() const { return m_ht.max_load_factor(); }
+    void max_load_factor(float ml) { m_ht.max_load_factor(ml); }
+    
+    void rehash(size_type count_) { m_ht.rehash(count_); }
+    void reserve(size_type count_) { m_ht.reserve(count_); }
+    
+    
+    /*
+     * Observers
+     */
+    hasher hash_function() const { return m_ht.hash_function(); }
+    key_equal key_eq() const { return m_ht.key_eq(); }
+    
+    /*
+     * Other
+     */
+    
+    /**
+     * Convert a const_iterator to an iterator.
+     */
+    iterator mutable_iterator(const_iterator pos) {
+        return m_ht.mutable_iterator(pos);
+    }
+    
+    size_type overflow_size() const noexcept { return m_ht.overflow_size(); }
+    
+    friend bool operator==(const hopscotch_map& lhs, const hopscotch_map& rhs) {
+        if(lhs.size() != rhs.size()) {
+            return false;
+        }
+        
+        for(const auto& element_lhs : lhs) {
+            const auto it_element_rhs = rhs.find(element_lhs.first);
+            if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) {
+                return false;
+            }
+        }
+        
+        return true;
+    }
+
+    friend bool operator!=(const hopscotch_map& lhs, const hopscotch_map& rhs) {
+        return !operator==(lhs, rhs);
+    }
+
+    friend void swap(hopscotch_map& lhs, hopscotch_map& rhs) {
+        lhs.swap(rhs);
+    }
+
+
+    
+private:
+    ht m_ht;
+};
+
+
+/**
+ * Same as `tsl::hopscotch_map<Key, T, Hash, KeyEqual, Allocator, NeighborhoodSize, StoreHash, tsl::hh::prime_growth_policy>`.
+ */
+template<class Key, 
+         class T, 
+         class Hash = std::hash<Key>,
+         class KeyEqual = std::equal_to<Key>,
+         class Allocator = std::allocator<std::pair<Key, T>>,
+         unsigned int NeighborhoodSize = 62,
+         bool StoreHash = false>
+using hopscotch_pg_map = hopscotch_map<Key, T, Hash, KeyEqual, Allocator, NeighborhoodSize, StoreHash, tsl::hh::prime_growth_policy>;
+
+} // end namespace tsl
+
+#endif
diff --git a/benchmarks/others/khash.h b/benchmarks/others/khash.h
new file mode 100644
index 00000000..61dabc4d
--- /dev/null
+++ b/benchmarks/others/khash.h
@@ -0,0 +1,595 @@
+/* The MIT License
+   Copyright (c) 2008, 2009, 2011 by Attractive Chaos <[email protected]>
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  An example:
+#include "khash.h"
+KHASH_MAP_INIT_INT(32, char)
+int main() {
+    int ret, is_missing;
+    khiter_t k;
+    khash_t(32) *h = kh_init(32);
+    k = kh_put(32, h, 5, &ret);
+    kh_value(h, k) = 10;
+    k = kh_get(32, h, 10);
+    is_missing = (k == kh_end(h));
+    k = kh_get(32, h, 5);
+    kh_del(32, h, k);
+    for (k = kh_begin(h); k != kh_end(h); ++k)
+        if (kh_exist(h, k)) kh_value(h, k) = 1;
+    kh_destroy(32, h);
+    return 0;
+}
+*/
+
+/*
+  2013-05-02 (0.2.8):
+    * Use quadratic probing. When the capacity is power of 2, stepping function
+      i*(i+1)/2 guarantees to traverse each bucket. It is better than double
+      hashing on cache performance and is more robust than linear probing.
+      In theory, double hashing should be more robust than quadratic probing.
+      However, my implementation is probably not for large hash tables, because
+      the second hash function is closely tied to the first hash function,
+      which reduce the effectiveness of double hashing.
+    Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
+  2011-12-29 (0.2.7):
+    * Minor code clean up; no actual effect.
+  2011-09-16 (0.2.6):
+    * The capacity is a power of 2. This seems to dramatically improve the
+      speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
+       - http://code.google.com/p/ulib/
+       - http://nothings.org/computer/judy/
+    * Allow to optionally use linear probing which usually has better
+      performance for random input. Double hashing is still the default as it
+      is more robust to certain non-random input.
+    * Added Wang's integer hash function (not used by default). This hash
+      function is more robust to certain non-random input.
+  2011-02-14 (0.2.5):
+    * Allow to declare global functions.
+  2009-09-26 (0.2.4):
+    * Improve portability
+  2008-09-19 (0.2.3):
+    * Corrected the example
+    * Improved interfaces
+  2008-09-11 (0.2.2):
+    * Improved speed a little in kh_put()
+  2008-09-10 (0.2.1):
+    * Added kh_clear()
+    * Fixed a compiling error
+  2008-09-02 (0.2.0):
+    * Changed to token concatenation which increases flexibility.
+  2008-08-31 (0.1.2):
+    * Fixed a bug in kh_get(), which has not been tested previously.
+  2008-08-31 (0.1.1):
+    * Added destructor
+*/
+
+
+#ifndef __AC_KHASH_H
+#define __AC_KHASH_H
+
+/*!
+  @header
+  Generic hash table library.
+ */
+
+#define AC_VERSION_KHASH_H "0.2.8"
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+/* compiler specific configuration */
+
+#if UINT_MAX == 0xffffffffu
+typedef unsigned int khint32_t;
+#elif ULONG_MAX == 0xffffffffu
+typedef unsigned long khint32_t;
+#endif
+
+#if ULONG_MAX == ULLONG_MAX
+typedef unsigned long khint64_t;
+#else
+typedef unsigned long long khint64_t;
+#endif
+
+#ifndef kh_inline
+#ifdef _MSC_VER
+#define kh_inline __inline
+#else
+#define kh_inline inline
+#endif
+#endif /* kh_inline */
+
+#ifndef klib_unused
+#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__ ((__unused__))
+#else
+#define klib_unused
+#endif
+#endif /* klib_unused */
+
+typedef khint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
+#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
+#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
+#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
+#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
+#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
+#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
+
+#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#ifndef kcalloc
+#define kcalloc(N,Z) calloc(N,Z)
+#endif
+#ifndef kmalloc
+#define kmalloc(Z) malloc(Z)
+#endif
+#ifndef krealloc
+#define krealloc(P,Z) realloc(P,Z)
+#endif
+#ifndef kfree
+#define kfree(P) free(P)
+#endif
+
+static const double __ac_HASH_UPPER = 0.77;
+
+#define __KHASH_TYPE(name, khkey_t, khval_t) \
+    typedef struct kh_##name##_s { \
+        khint_t n_buckets, size, n_occupied, upper_bound; \
+        khint32_t *flags; \
+        khkey_t *keys; \
+        khval_t *vals; \
+    } kh_##name##_t;
+
+#define __KHASH_PROTOTYPES(name, khkey_t, khval_t)                      \
+    extern kh_##name##_t *kh_init_##name(void);                         \
+    extern void kh_destroy_##name(kh_##name##_t *h);                    \
+    extern void kh_clear_##name(kh_##name##_t *h);                      \
+    extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key);  \
+    extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
+    extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
+    extern void kh_del_##name(kh_##name##_t *h, khint_t x);
+
+#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+    SCOPE kh_##name##_t *kh_init_##name(void) {                         \
+        return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t));       \
+    }                                                                   \
+    SCOPE void kh_destroy_##name(kh_##name##_t *h)                      \
+    {                                                                   \
+        if (h) {                                                        \
+            kfree((void *)h->keys); kfree(h->flags);                    \
+            kfree((void *)h->vals);                                     \
+            kfree(h);                                                   \
+        }                                                               \
+    }                                                                   \
+    SCOPE void kh_clear_##name(kh_##name##_t *h)                        \
+    {                                                                   \
+        if (h && h->flags) {                                            \
+            memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
+            h->size = h->n_occupied = 0;                                \
+        }                                                               \
+    }                                                                   \
+    SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key)    \
+    {                                                                   \
+        if (h->n_buckets) {                                             \
+            khint_t k, i, last, mask, step = 0; \
+            mask = h->n_buckets - 1;                                    \
+            k = __hash_func(key); i = k & mask;                         \
+            last = i; \
+            while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+                i = (i + (++step)) & mask; \
+                if (i == last) return h->n_buckets;                     \
+            }                                                           \
+            return __ac_iseither(h->flags, i)? h->n_buckets : i;        \
+        } else return 0;                                                \
+    }                                                                   \
+    SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
+    { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
+        khint32_t *new_flags = 0;                                       \
+        khint_t j = 1;                                                  \
+        {                                                               \
+            kroundup32(new_n_buckets);                                  \
+            if (new_n_buckets < 4) new_n_buckets = 4;                   \
+            if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
+            else { /* hash table size to be changed (shrink or expand); rehash */ \
+                new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
+                if (!new_flags) return -1;                              \
+                memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
+                if (h->n_buckets < new_n_buckets) { /* expand */        \
+                    khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+                    if (!new_keys) { kfree(new_flags); return -1; }     \
+                    h->keys = new_keys;                                 \
+                    if (kh_is_map) {                                    \
+                        khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
+                        if (!new_vals) { kfree(new_flags); return -1; } \
+                        h->vals = new_vals;                             \
+                    }                                                   \
+                } /* otherwise shrink */                                \
+            }                                                           \
+        }                                                               \
+        if (j) { /* rehashing is needed */                              \
+            for (j = 0; j != h->n_buckets; ++j) {                       \
+                if (__ac_iseither(h->flags, j) == 0) {                  \
+                    khkey_t key = h->keys[j];                           \
+                    khval_t val;                                        \
+                    khint_t new_mask;                                   \
+                    new_mask = new_n_buckets - 1;                       \
+                    if (kh_is_map) val = h->vals[j];                    \
+                    __ac_set_isdel_true(h->flags, j);                   \
+                    while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
+                        khint_t k, i, step = 0; \
+                        k = __hash_func(key);                           \
+                        i = k & new_mask;                               \
+                        while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
+                        __ac_set_isempty_false(new_flags, i);           \
+                        if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
+                            { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+                            if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
+                            __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
+                        } else { /* write the element and jump out of the loop */ \
+                            h->keys[i] = key;                           \
+                            if (kh_is_map) h->vals[i] = val;            \
+                            break;                                      \
+                        }                                               \
+                    }                                                   \
+                }                                                       \
+            }                                                           \
+            if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
+                h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+                if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
+            }                                                           \
+            kfree(h->flags); /* free the working space */               \
+            h->flags = new_flags;                                       \
+            h->n_buckets = new_n_buckets;                               \
+            h->n_occupied = h->size;                                    \
+            h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
+        }                                                               \
+        return 0;                                                       \
+    }                                                                   \
+    SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
+    {                                                                   \
+        khint_t x;                                                      \
+        if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
+            if (h->n_buckets > (h->size<<1)) {                          \
+                if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
+                    *ret = -1; return h->n_buckets;                     \
+                }                                                       \
+            } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
+                *ret = -1; return h->n_buckets;                         \
+            }                                                           \
+        } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
+        {                                                               \
+            khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
+            x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
+            if (__ac_isempty(h->flags, i)) x = i; /* for speed up */    \
+            else {                                                      \
+                last = i; \
+                while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+                    if (__ac_isdel(h->flags, i)) site = i;              \
+                    i = (i + (++step)) & mask; \
+                    if (i == last) { x = site; break; }                 \
+                }                                                       \
+                if (x == h->n_buckets) {                                \
+                    if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
+                    else x = i;                                         \
+                }                                                       \
+            }                                                           \
+        }                                                               \
+        if (__ac_isempty(h->flags, x)) { /* not present at all */       \
+            h->keys[x] = key;                                           \
+            __ac_set_isboth_false(h->flags, x);                         \
+            ++h->size; ++h->n_occupied;                                 \
+            *ret = 1;                                                   \
+        } else if (__ac_isdel(h->flags, x)) { /* deleted */             \
+            h->keys[x] = key;                                           \
+            __ac_set_isboth_false(h->flags, x);                         \
+            ++h->size;                                                  \
+            *ret = 2;                                                   \
+        } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
+        return x;                                                       \
+    }                                                                   \
+    SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x)               \
+    {                                                                   \
+        if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {         \
+            __ac_set_isdel_true(h->flags, x);                           \
+            --h->size;                                                  \
+        }                                                               \
+    }
+
+#define KHASH_DECLARE(name, khkey_t, khval_t)                           \
+    __KHASH_TYPE(name, khkey_t, khval_t)                                \
+    __KHASH_PROTOTYPES(name, khkey_t, khval_t)
+
+#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+    __KHASH_TYPE(name, khkey_t, khval_t)                                \
+    __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+    KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
+/* --- BEGIN OF HASH FUNCTIONS --- */
+
+/*! @function
+  @abstract     Integer hash function
+  @param  key   The integer [khint32_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int_hash_func(key) (khint32_t)(key)
+/*! @function
+  @abstract     Integer comparison function
+ */
+#define kh_int_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     64-bit integer hash function
+  @param  key   The integer [khint64_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
+/*! @function
+  @abstract     64-bit integer comparison function
+ */
+#define kh_int64_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     const char* hash function
+  @param  s     Pointer to a null terminated string
+  @return       The hash value
+ */
+static kh_inline khint_t __ac_X31_hash_string(const char *s)
+{
+    khint_t h = (khint_t)*s;
+    if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
+    return h;
+}
+/*! @function
+  @abstract     Another interface to const char* hash function
+  @param  key   Pointer to a null terminated string [const char*]
+  @return       The hash value [khint_t]
+ */
+#define kh_str_hash_func(key) __ac_X31_hash_string(key)
+/*! @function
+  @abstract     Const char* comparison function
+ */
+#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
+
+static kh_inline khint_t __ac_Wang_hash(khint_t key)
+{
+    key += ~(key << 15);
+    key ^=  (key >> 10);
+    key +=  (key << 3);
+    key ^=  (key >> 6);
+    key += ~(key << 11);
+    key ^=  (key >> 16);
+    return key;
+}
+#define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key)
+
+/* --- END OF HASH FUNCTIONS --- */
+
+/* Other convenient macros... */
+
+/*!
+  @abstract Type of the hash table.
+  @param  name  Name of the hash table [symbol]
+ */
+#define khash_t(name) kh_##name##_t
+
+/*! @function
+  @abstract     Initiate a hash table.
+  @param  name  Name of the hash table [symbol]
+  @return       Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_init(name) kh_init_##name()
+
+/*! @function
+  @abstract     Destroy a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_destroy(name, h) kh_destroy_##name(h)
+
+/*! @function
+  @abstract     Reset a hash table without deallocating memory.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_clear(name, h) kh_clear_##name(h)
+
+/*! @function
+  @abstract     Resize a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  s     New size [khint_t]
+ */
+#define kh_resize(name, h, s) kh_resize_##name(h, s)
+
+/*! @function
+  @abstract     Insert a key to the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @param  r     Extra return code: -1 if the operation failed;
+                0 if the key is present in the hash table;
+                1 if the bucket is empty (never used); 2 if the element in
+                the bucket has been deleted [int*]
+  @return       Iterator to the inserted element [khint_t]
+ */
+#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
+
+/*! @function
+  @abstract     Retrieve a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @return       Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
+ */
+#define kh_get(name, h, k) kh_get_##name(h, k)
+
+/*! @function
+  @abstract     Remove a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Iterator to the element to be deleted [khint_t]
+ */
+#define kh_del(name, h, k) kh_del_##name(h, k)
+
+/*! @function
+  @abstract     Test whether a bucket contains data.
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
+
+/*! @function
+  @abstract     Get key given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x])
+
+/*! @function
+  @abstract     Get value given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Value [type of values]
+  @discussion   For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Alias of kh_val()
+ */
+#define kh_value(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Get the start iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The start iterator [khint_t]
+ */
+#define kh_begin(h) (khint_t)(0)
+
+/*! @function
+  @abstract     Get the end iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The end iterator [khint_t]
+ */
+#define kh_end(h) ((h)->n_buckets)
+
+/*! @function
+  @abstract     Get the number of elements in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->size)
+
+/*! @function
+  @abstract     Get the number of buckets in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of buckets in the hash table [khint_t]
+ */
+#define kh_n_buckets(h) ((h)->n_buckets)
+
+/*! @function
+  @abstract     Iterate over the entries in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  kvar  Variable to which key will be assigned
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach(h, kvar, vvar, code) { khint_t __i;      \
+    for (__i = kh_begin(h); __i != kh_end(h); ++__i) {      \
+        if (!kh_exist(h,__i)) continue;                     \
+        (kvar) = kh_key(h,__i);                             \
+        (vvar) = kh_val(h,__i);                             \
+        code;                                               \
+    } }
+
+/*! @function
+  @abstract     Iterate over the values in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach_value(h, vvar, code) { khint_t __i;      \
+    for (__i = kh_begin(h); __i != kh_end(h); ++__i) {      \
+        if (!kh_exist(h,__i)) continue;                     \
+        (vvar) = kh_val(h,__i);                             \
+        code;                                               \
+    } }
+
+/* More convenient interfaces */
+
+/*! @function
+  @abstract     Instantiate a hash set containing integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT(name)                                        \
+    KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT(name, khval_t)                               \
+    KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash set containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT64(name)                                      \
+    KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT64(name, khval_t)                             \
+    KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
+
+typedef const char *kh_cstr_t;
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_STR(name)                                        \
+    KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_STR(name, khval_t)                               \
+    KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
+
+#endif /* __AC_KHASH_H */
diff --git a/benchmarks/others/khashl.h b/benchmarks/others/khashl.h
new file mode 100644
index 00000000..3542ba98
--- /dev/null
+++ b/benchmarks/others/khashl.h
@@ -0,0 +1,345 @@
+/* The MIT License
+   Copyright (c) 2019 by Attractive Chaos <[email protected]>
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#ifndef __AC_KHASHL_H
+#define __AC_KHASHL_H
+
+#define AC_VERSION_KHASHL_H "0.1"
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+/************************************
+ * Compiler specific configurations *
+ ************************************/
+
+#if UINT_MAX == 0xffffffffu
+typedef unsigned int khint32_t;
+#elif ULONG_MAX == 0xffffffffu
+typedef unsigned long khint32_t;
+#endif
+
+#if ULONG_MAX == ULLONG_MAX
+typedef unsigned long khint64_t;
+#else
+typedef unsigned long long khint64_t;
+#endif
+
+#ifndef kh_inline
+#ifdef _MSC_VER
+#define kh_inline __inline
+#else
+#define kh_inline inline
+#endif
+#endif /* kh_inline */
+
+#ifndef klib_unused
+#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__ ((__unused__))
+#else
+#define klib_unused
+#endif
+#endif /* klib_unused */
+
+#define KH_LOCAL static kh_inline klib_unused
+
+typedef khint32_t khint_t;
+
+/******************
+ * malloc aliases *
+ ******************/
+
+#ifndef kcalloc
+#define kcalloc(N,Z) calloc(N,Z)
+#endif
+#ifndef kmalloc
+#define kmalloc(Z) malloc(Z)
+#endif
+#ifndef krealloc
+#define krealloc(P,Z) realloc(P,Z)
+#endif
+#ifndef kfree
+#define kfree(P) free(P)
+#endif
+
+/****************************
+ * Simple private functions *
+ ****************************/
+
+#define __kh_used(flag, i)       (flag[i>>5] >> (i&0x1fU) & 1U)
+#define __kh_set_used(flag, i)   (flag[i>>5] |= 1U<<(i&0x1fU))
+#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU)))
+
+#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5)
+
+static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
+
+/*******************
+ * Hash table base *
+ *******************/
+
+#define __KHASHL_TYPE(HType, khkey_t) \
+    typedef struct { \
+        khint_t bits, count; \
+        khint32_t *used; \
+        khkey_t *keys; \
+    } HType;
+
+#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \
+    extern HType *prefix##_init(void); \
+    extern void prefix##_destroy(HType *h); \
+    extern void prefix##_clear(HType *h); \
+    extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \
+    extern int prefix##_resize(HType *h, khint_t new_n_buckets); \
+    extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \
+    extern void prefix##_del(HType *h, khint_t k);
+
+#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
+    SCOPE HType *prefix##_init(void) { \
+        return (HType*)kcalloc(1, sizeof(HType)); \
+    } \
+    SCOPE void prefix##_destroy(HType *h) { \
+        if (!h) return; \
+        kfree((void *)h->keys); kfree(h->used); \
+        kfree(h); \
+    } \
+    SCOPE void prefix##_clear(HType *h) { \
+        if (h && h->used) { \
+            uint32_t n_buckets = 1U << h->bits; \
+            memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \
+            h->count = 0; \
+        } \
+    }
+
+#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+    SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { \
+        khint_t i, last, n_buckets, mask; \
+        if (h->keys == 0) return 0; \
+        n_buckets = 1U << h->bits; \
+        mask = n_buckets - 1U; \
+        i = last = __kh_h2b(__hash_fn(*key), h->bits); \
+        while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
+            i = (i + 1U) & mask; \
+            if (i == last) return n_buckets; \
+        } \
+        return !__kh_used(h->used, i)? n_buckets : i; \
+    } \
+    SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp(h, &key); }
+
+#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+    SCOPE int prefix##_resize(HType *h, khint_t new_n_buckets) { \
+        khint32_t *new_used = 0; \
+        khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \
+        while ((x >>= 1) != 0) ++j; \
+        if (new_n_buckets & (new_n_buckets - 1)) ++j; \
+        new_bits = j > 2? j : 2; \
+        new_n_buckets = 1U << new_bits; \
+        if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return 0; /* requested size is too small */ \
+        new_used = (khint32_t*)kmalloc(__kh_fsize(new_n_buckets) * sizeof(khint32_t)); \
+        memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(khint32_t)); \
+        if (!new_used) return -1; /* not enough memory */ \
+        n_buckets = h->keys? 1U<<h->bits : 0U; \
+        if (n_buckets < new_n_buckets) { /* expand */ \
+            khkey_t *new_keys = (khkey_t*)krealloc((void*)h->keys, new_n_buckets * sizeof(khkey_t)); \
+            if (!new_keys) { kfree(new_used); return -1; } \
+            h->keys = new_keys; \
+        } /* otherwise shrink */ \
+        new_mask = new_n_buckets - 1; \
+        for (j = 0; j != n_buckets; ++j) { \
+            khkey_t key; \
+            if (!__kh_used(h->used, j)) continue; \
+            key = h->keys[j]; \
+            __kh_set_unused(h->used, j); \
+            while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
+                khint_t i; \
+                i = __kh_h2b(__hash_fn(key), new_bits); \
+                while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \
+                __kh_set_used(new_used, i); \
+                if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \
+                    { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+                    __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \
+                } else { /* write the element and jump out of the loop */ \
+                    h->keys[i] = key; \
+                    break; \
+                } \
+            } \
+        } \
+        if (n_buckets > new_n_buckets) /* shrink the hash table */ \
+            h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+        kfree(h->used); /* free the working space */ \
+        h->used = new_used, h->bits = new_bits; \
+        return 0; \
+    }
+
+#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+    SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { \
+        khint_t n_buckets, i, last, mask; \
+        n_buckets = h->keys? 1U<<h->bits : 0U; \
+        *absent = -1; \
+        if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \
+            if (prefix##_resize(h, n_buckets + 1U) < 0) \
+                return n_buckets; \
+            n_buckets = 1U<<h->bits; \
+        } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
+        mask = n_buckets - 1; \
+        i = last = __kh_h2b(__hash_fn(*key), h->bits); \
+        while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
+            i = (i + 1U) & mask; \
+            if (i == last) break; \
+        } \
+        if (!__kh_used(h->used, i)) { /* not present at all */ \
+            h->keys[i] = *key; \
+            __kh_set_used(h->used, i); \
+            ++h->count; \
+            *absent = 1; \
+        } else *absent = 0; /* Don't touch h->keys[i] if present */ \
+        return i; \
+    } \
+    SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp(h, &key, absent); }
+
+#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \
+    SCOPE int prefix##_del(HType *h, khint_t i) { \
+        khint_t j = i, k, mask, n_buckets; \
+        if (h->keys == 0) return 0; \
+        n_buckets = 1U<<h->bits; \
+        mask = n_buckets - 1U; \
+        while (1) { \
+            j = (j + 1U) & mask; \
+            if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \
+            k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \
+            if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \
+                h->keys[i] = h->keys[j], i = j; \
+        } \
+        __kh_set_unused(h->used, i); \
+        --h->count; \
+        return 1; \
+    }
+
+#define KHASHL_DECLARE(HType, prefix, khkey_t) \
+    __KHASHL_TYPE(HType, khkey_t) \
+    __KHASHL_PROTOTYPES(HType, prefix, khkey_t)
+
+#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+    __KHASHL_TYPE(HType, khkey_t) \
+    __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
+    __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+    __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+    __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+    __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn)
+
+/*****************************
+ * More convenient interface *
+ *****************************/
+
+#define __kh_packed __attribute__ ((__packed__))
+#define __kh_cached_hash(x) ((x).hash)
+
+#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+    typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \
+    static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \
+    static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \
+    KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \
+    SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \
+    SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \
+    SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \
+    SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \
+    SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); }
+
+#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+    typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \
+    static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
+    static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
+    KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
+    SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \
+    SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
+    SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
+    SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \
+    SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); }
+
+#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+    typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \
+    static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
+    KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \
+    SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \
+    SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \
+    SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \
+    SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \
+    SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); }
+
+#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+    typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \
+    static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
+    KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \
+    SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \
+    SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \
+    SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \
+    SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \
+    SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); }
+
+/**************************
+ * Public macro functions *
+ **************************/
+
+#define kh_bucket(h, x) ((h)->keys[x])
+#define kh_size(h) ((h)->count)
+#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U)
+#define kh_end(h) kh_capacity(h)
+
+#define kh_key(h, x) ((h)->keys[x].key)
+#define kh_val(h, x) ((h)->keys[x].val)
+
+/**************************************
+ * Common hash and equality functions *
+ **************************************/
+
+#define kh_eq_generic(a, b) ((a) == (b))
+#define kh_eq_str(a, b) (strcmp((a), (b)) == 0)
+#define kh_hash_dummy(x) ((khint_t)(x))
+
+static kh_inline khint_t kh_hash_uint32(khint_t key) {
+    key += ~(key << 15);
+    key ^=  (key >> 10);
+    key +=  (key << 3);
+    key ^=  (key >> 6);
+    key += ~(key << 11);
+    key ^=  (key >> 16);
+    return key;
+}
+
+static kh_inline khint_t kh_hash_uint64(khint64_t key) {
+    key = ~key + (key << 21);
+    key = key ^ key >> 24;
+    key = (key + (key << 3)) + (key << 8);
+    key = key ^ key >> 14;
+    key = (key + (key << 2)) + (key << 4);
+    key = key ^ key >> 28;
+    key = key + (key << 31);
+    return (khint_t)key;
+}
+
+static kh_inline khint_t kh_hash_str(const char *s) {
+    khint_t h = (khint_t)*s;
+    if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
+    return h;
+}
+
+#endif /* __AC_KHASHL_H */
diff --git a/benchmarks/others/robin_hood.hpp b/benchmarks/others/robin_hood.hpp
new file mode 100644
index 00000000..2cf9e029
--- /dev/null
+++ b/benchmarks/others/robin_hood.hpp
@@ -0,0 +1,2366 @@
+//                 ______  _____                 ______                _________
+//  ______________ ___  /_ ___(_)_______         ___  /_ ______ ______ ______  /
+//  __  ___/_  __ \__  __ \__  / __  __ \        __  __ \_  __ \_  __ \_  __  /
+//  _  /    / /_/ /_  /_/ /_  /  _  / / /        _  / / // /_/ // /_/ // /_/ /
+//  /_/     \____/ /_.___/ /_/   /_/ /_/ ________/_/ /_/ \____/ \____/ \__,_/
+//                                      _/_____/
+//
+// Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20
+// version 3.8.1
+// https://github.com/martinus/robin-hood-hashing
+//
+// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2020 Martin Ankerl <http://martin.ankerl.com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef ROBIN_HOOD_H_INCLUDED
+#define ROBIN_HOOD_H_INCLUDED
+
+// see https://semver.org/
+#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes
+#define ROBIN_HOOD_VERSION_MINOR 8 // for adding functionality in a backwards-compatible manner
+#define ROBIN_HOOD_VERSION_PATCH 1 // for backwards-compatible bug fixes
+
+#include <algorithm>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <memory> // only to support hash of smart pointers
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <utility>
+#if __cplusplus >= 201703L
+#    include <string_view>
+#endif
+
+// #define ROBIN_HOOD_LOG_ENABLED
+#ifdef ROBIN_HOOD_LOG_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_LOG(x) std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl
+#else
+#    define ROBIN_HOOD_LOG(x)
+#endif
+
+// #define ROBIN_HOOD_TRACE_ENABLED
+#ifdef ROBIN_HOOD_TRACE_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_TRACE(x) \
+        std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl
+#else
+#    define ROBIN_HOOD_TRACE(x)
+#endif
+
+// #define ROBIN_HOOD_COUNT_ENABLED
+#ifdef ROBIN_HOOD_COUNT_ENABLED
+#    include <iostream>
+#    define ROBIN_HOOD_COUNT(x) ++counts().x;
+namespace robin_hood {
+struct Counts {
+    uint64_t shiftUp{};
+    uint64_t shiftDown{};
+};
+inline std::ostream& operator<<(std::ostream& os, Counts const& c) {
+    return os << c.shiftUp << " shiftUp" << std::endl << c.shiftDown << " shiftDown" << std::endl;
+}
+
+static Counts& counts() {
+    static Counts counts{};
+    return counts;
+}
+} // namespace robin_hood
+#else
+#    define ROBIN_HOOD_COUNT(x)
+#endif
+
+// all non-argument macros should use this facility. See
+// https://www.fluentcpp.com/2019/05/28/better-macros-better-flags/
+#define ROBIN_HOOD(x) ROBIN_HOOD_PRIVATE_DEFINITION_##x()
+
+// mark unused members with this macro
+#define ROBIN_HOOD_UNUSED(identifier)
+
+// bitness
+#if SIZE_MAX == UINT32_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 32
+#elif SIZE_MAX == UINT64_MAX
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BITNESS() 64
+#else
+#    error Unsupported bitness
+#endif
+
+// endianess
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() 1
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_LITTLE_ENDIAN() \
+        (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_BIG_ENDIAN() (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#endif
+
+// inline
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __declspec(noinline)
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NOINLINE() __attribute__((noinline))
+#endif
+
+// exceptions
+#if !defined(__cpp_exceptions) && !defined(__EXCEPTIONS) && !defined(_CPPUNWIND)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 0
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_EXCEPTIONS() 1
+#endif
+
+// count leading/trailing bits
+#if ((defined __i386 || defined __x86_64__) && defined __BMI__) || defined _M_IX86 || defined _M_X64
+#    ifdef _MSC_VER
+#        include <intrin.h>
+#    else
+#        include <x86intrin.h>
+#    endif
+#    if ROBIN_HOOD(BITNESS) == 32
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u32
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() _tzcnt_u64
+#    endif
+#    define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ROBIN_HOOD(CTZ)(x)
+#elif defined _MSC_VER
+#    if ROBIN_HOOD(BITNESS) == 32
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64
+#    endif
+#    include <intrin.h>
+#    pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD))
+#    define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x)                                       \
+        [](size_t mask) noexcept -> int {                                             \
+            unsigned long index;                                                      \
+            return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast<int>(index) \
+                                                            : ROBIN_HOOD(BITNESS);    \
+        }(x)
+#else
+#    if ROBIN_HOOD(BITNESS) == 32
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll
+#    endif
+#    define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS))
+#    define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS))
+#endif
+
+// fallthrough
+#ifndef __has_cpp_attribute // For backwards compatibility
+#    define __has_cpp_attribute(x) 0
+#endif
+#if __has_cpp_attribute(clang::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[clang::fallthrough]]
+#elif __has_cpp_attribute(gnu::fallthrough)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH() [[gnu::fallthrough]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_FALLTHROUGH()
+#endif
+
+// likely/unlikely
+#ifdef _MSC_VER
+#    define ROBIN_HOOD_LIKELY(condition) condition
+#    define ROBIN_HOOD_UNLIKELY(condition) condition
+#else
+#    define ROBIN_HOOD_LIKELY(condition) __builtin_expect(condition, 1)
+#    define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0)
+#endif
+
+// detect if native wchar_t type is availiable in MSVC
+#ifdef _MSC_VER
+#    ifdef _NATIVE_WCHAR_T_DEFINED
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#    else
+#        define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0
+#    endif
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1
+#endif
+
+// workaround missing "is_trivially_copyable" in g++ < 5.0
+// See https://stackoverflow.com/a/31798726/48181
+#if defined(__GNUC__) && __GNUC__ < 5
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)
+#else
+#    define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value
+#endif
+
+// helpers for C++ versions, see https://gcc.gnu.org/onlinedocs/cpp/Standard-Predefined-Macros.html
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX() __cplusplus
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX98() 199711L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX11() 201103L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX14() 201402L
+#define ROBIN_HOOD_PRIVATE_DEFINITION_CXX17() 201703L
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD() [[nodiscard]]
+#else
+#    define ROBIN_HOOD_PRIVATE_DEFINITION_NODISCARD()
+#endif
+
+namespace robin_hood {
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+#    define ROBIN_HOOD_STD std
+#else
+
+// c++11 compatibility layer
+namespace ROBIN_HOOD_STD {
+template <class T>
+struct alignment_of
+    : std::integral_constant<std::size_t, alignof(typename std::remove_all_extents<T>::type)> {};
+
+template <class T, T... Ints>
+class integer_sequence {
+public:
+    using value_type = T;
+    static_assert(std::is_integral<value_type>::value, "not integral type");
+    static constexpr std::size_t size() noexcept {
+        return sizeof...(Ints);
+    }
+};
+template <std::size_t... Inds>
+using index_sequence = integer_sequence<std::size_t, Inds...>;
+
+namespace detail_ {
+template <class T, T Begin, T End, bool>
+struct IntSeqImpl {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0 && Begin < End, "unexpected argument (Begin<0 || Begin<=End)");
+
+    template <class, class>
+    struct IntSeqCombiner;
+
+    template <TValue... Inds0, TValue... Inds1>
+    struct IntSeqCombiner<integer_sequence<TValue, Inds0...>, integer_sequence<TValue, Inds1...>> {
+        using TResult = integer_sequence<TValue, Inds0..., Inds1...>;
+    };
+
+    using TResult =
+        typename IntSeqCombiner<typename IntSeqImpl<TValue, Begin, Begin + (End - Begin) / 2,
+                                                    (End - Begin) / 2 == 1>::TResult,
+                                typename IntSeqImpl<TValue, Begin + (End - Begin) / 2, End,
+                                                    (End - Begin + 1) / 2 == 1>::TResult>::TResult;
+};
+
+template <class T, T Begin>
+struct IntSeqImpl<T, Begin, Begin, false> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue>;
+};
+
+template <class T, T Begin, T End>
+struct IntSeqImpl<T, Begin, End, true> {
+    using TValue = T;
+    static_assert(std::is_integral<TValue>::value, "not integral type");
+    static_assert(Begin >= 0, "unexpected argument (Begin<0)");
+    using TResult = integer_sequence<TValue, Begin>;
+};
+} // namespace detail_
+
+template <class T, T N>
+using make_integer_sequence = typename detail_::IntSeqImpl<T, 0, N, (N - 0) == 1>::TResult;
+
+template <std::size_t N>
+using make_index_sequence = make_integer_sequence<std::size_t, N>;
+
+template <class... T>
+using index_sequence_for = make_index_sequence<sizeof...(T)>;
+
+} // namespace ROBIN_HOOD_STD
+
+#endif
+
+namespace detail {
+
+template <typename T>
+T rotr(T x, unsigned k) {
+    return (x >> k) | (x << (8U * sizeof(T) - k));
+}
+
+// This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to
+// 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with
+// care!
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+template <typename T>
+inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept {
+    return reinterpret_cast<T>(ptr);
+}
+
+// make sure this is not inlined as it is slow and dramatically enlarges code, thus making other
+// inlinings more difficult. Throws are also generally the slow path.
+template <typename E, typename... Args>
+ROBIN_HOOD(NOINLINE)
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+void doThrow(Args&&... args) {
+    // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
+    throw E(std::forward<Args>(args)...);
+}
+#else
+void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) {
+    abort();
+}
+#endif
+
+template <typename E, typename T, typename... Args>
+T* assertNotNull(T* t, Args&&... args) {
+    if (ROBIN_HOOD_UNLIKELY(nullptr == t)) {
+        doThrow<E>(std::forward<Args>(args)...);
+    }
+    return t;
+}
+
+template <typename T>
+inline T unaligned_load(void const* ptr) noexcept {
+    // using memcpy so we don't get into unaligned load problems.
+    // compiler should optimize this very well anyways.
+    T t;
+    std::memcpy(&t, ptr, sizeof(T));
+    return t;
+}
+
+// Allocates bulks of memory for objects of type T. This deallocates the memory in the destructor,
+// and keeps a linked list of the allocated memory around. Overhead per allocation is the size of a
+// pointer.
+template <typename T, size_t MinNumAllocs = 4, size_t MaxNumAllocs = 256>
+class BulkPoolAllocator {
+public:
+    BulkPoolAllocator() noexcept = default;
+
+    // does not copy anything, just creates a new allocator.
+    BulkPoolAllocator(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept
+        : mHead(nullptr)
+        , mListForFree(nullptr) {}
+
+    BulkPoolAllocator(BulkPoolAllocator&& o) noexcept
+        : mHead(o.mHead)
+        , mListForFree(o.mListForFree) {
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+    }
+
+    BulkPoolAllocator& operator=(BulkPoolAllocator&& o) noexcept {
+        reset();
+        mHead = o.mHead;
+        mListForFree = o.mListForFree;
+        o.mListForFree = nullptr;
+        o.mHead = nullptr;
+        return *this;
+    }
+
+    BulkPoolAllocator&
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    operator=(const BulkPoolAllocator& ROBIN_HOOD_UNUSED(o) /*unused*/) noexcept {
+        // does not do anything
+        return *this;
+    }
+
+    ~BulkPoolAllocator() noexcept {
+        reset();
+    }
+
+    // Deallocates all allocated memory.
+    void reset() noexcept {
+        while (mListForFree) {
+            T* tmp = *mListForFree;
+            free(mListForFree);
+            mListForFree = reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        }
+        mHead = nullptr;
+    }
+
+    // allocates, but does NOT initialize. Use in-place new constructor, e.g.
+    //   T* obj = pool.allocate();
+    //   ::new (static_cast<void*>(obj)) T();
+    T* allocate() {
+        T* tmp = mHead;
+        if (!tmp) {
+            tmp = performAllocation();
+        }
+
+        mHead = *reinterpret_cast_no_cast_align_warning<T**>(tmp);
+        return tmp;
+    }
+
+    // does not actually deallocate but puts it in store.
+    // make sure you have already called the destructor! e.g. with
+    //  obj->~T();
+    //  pool.deallocate(obj);
+    void deallocate(T* obj) noexcept {
+        *reinterpret_cast_no_cast_align_warning<T**>(obj) = mHead;
+        mHead = obj;
+    }
+
+    // Adds an already allocated block of memory to the allocator. This allocator is from now on
+    // responsible for freeing the data (with free()). If the provided data is not large enough to
+    // make use of, it is immediately freed. Otherwise it is reused and freed in the destructor.
+    void addOrFree(void* ptr, const size_t numBytes) noexcept {
+        // calculate number of available elements in ptr
+        if (numBytes < ALIGNMENT + ALIGNED_SIZE) {
+            // not enough data for at least one element. Free and return.
+            free(ptr);
+        } else {
+            add(ptr, numBytes);
+        }
+    }
+
+    void swap(BulkPoolAllocator<T, MinNumAllocs, MaxNumAllocs>& other) noexcept {
+        using std::swap;
+        swap(mHead, other.mHead);
+        swap(mListForFree, other.mListForFree);
+    }
+
+private:
+    // iterates the list of allocated memory to calculate how many to alloc next.
+    // Recalculating this each time saves us a size_t member.
+    // This ignores the fact that memory blocks might have been added manually with addOrFree. In
+    // practice, this should not matter much.
+    ROBIN_HOOD(NODISCARD) size_t calcNumElementsToAlloc() const noexcept {
+        auto tmp = mListForFree;
+        size_t numAllocs = MinNumAllocs;
+
+        while (numAllocs * 2 <= MaxNumAllocs && tmp) {
+            auto x = reinterpret_cast<T***>(tmp);
+            tmp = *x;
+            numAllocs *= 2;
+        }
+
+        return numAllocs;
+    }
+
+    // WARNING: Underflow if numBytes < ALIGNMENT! This is guarded in addOrFree().
+    void add(void* ptr, const size_t numBytes) noexcept {
+        const size_t numElements = (numBytes - ALIGNMENT) / ALIGNED_SIZE;
+
+        auto data = reinterpret_cast<T**>(ptr);
+
+        // link free list
+        auto x = reinterpret_cast<T***>(data);
+        *x = mListForFree;
+        mListForFree = data;
+
+        // create linked list for newly allocated data
+        auto* const headT =
+            reinterpret_cast_no_cast_align_warning<T*>(reinterpret_cast<char*>(ptr) + ALIGNMENT);
+
+        auto* const head = reinterpret_cast<char*>(headT);
+
+        // Visual Studio compiler automatically unrolls this loop, which is pretty cool
+        for (size_t i = 0; i < numElements; ++i) {
+            *reinterpret_cast_no_cast_align_warning<char**>(head + i * ALIGNED_SIZE) =
+                head + (i + 1) * ALIGNED_SIZE;
+        }
+
+        // last one points to 0
+        *reinterpret_cast_no_cast_align_warning<T**>(head + (numElements - 1) * ALIGNED_SIZE) =
+            mHead;
+        mHead = headT;
+    }
+
+    // Called when no memory is available (mHead == 0).
+    // Don't inline this slow path.
+    ROBIN_HOOD(NOINLINE) T* performAllocation() {
+        size_t const numElementsToAlloc = calcNumElementsToAlloc();
+
+        // alloc new memory: [prev |T, T, ... T]
+        // std::cout << (sizeof(T*) + ALIGNED_SIZE * numElementsToAlloc) << " bytes" << std::endl;
+        size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc;
+        add(assertNotNull<std::bad_alloc>(malloc(bytes)), bytes);
+        return mHead;
+    }
+
+    // enforce byte alignment of the T's
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX14)
+    static constexpr size_t ALIGNMENT =
+        (std::max)(std::alignment_of<T>::value, std::alignment_of<T*>::value);
+#else
+    static const size_t ALIGNMENT =
+        (ROBIN_HOOD_STD::alignment_of<T>::value > ROBIN_HOOD_STD::alignment_of<T*>::value)
+            ? ROBIN_HOOD_STD::alignment_of<T>::value
+            : +ROBIN_HOOD_STD::alignment_of<T*>::value; // the + is for walkarround
+#endif
+
+    static constexpr size_t ALIGNED_SIZE = ((sizeof(T) - 1) / ALIGNMENT + 1) * ALIGNMENT;
+
+    static_assert(MinNumAllocs >= 1, "MinNumAllocs");
+    static_assert(MaxNumAllocs >= MinNumAllocs, "MaxNumAllocs");
+    static_assert(ALIGNED_SIZE >= sizeof(T*), "ALIGNED_SIZE");
+    static_assert(0 == (ALIGNED_SIZE % sizeof(T*)), "ALIGNED_SIZE mod");
+    static_assert(ALIGNMENT >= sizeof(T*), "ALIGNMENT");
+
+    T* mHead{nullptr};
+    T** mListForFree{nullptr};
+};
+
+template <typename T, size_t MinSize, size_t MaxSize, bool IsFlat>
+struct NodeAllocator;
+
+// dummy allocator that does nothing
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, true> {
+
+    // we are not using the data, so just free it.
+    void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept {
+        free(ptr);
+    }
+};
+
+template <typename T, size_t MinSize, size_t MaxSize>
+struct NodeAllocator<T, MinSize, MaxSize, false> : public BulkPoolAllocator<T, MinSize, MaxSize> {};
+
+// dummy hash, unsed as mixer when robin_hood::hash is already used
+template <typename T>
+struct identity_hash {
+    constexpr size_t operator()(T const& obj) const noexcept {
+        return static_cast<size_t>(obj);
+    }
+};
+
+// c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making
+// my own here.
+namespace swappable {
+#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17)
+using std::swap;
+template <typename T>
+struct nothrow {
+    static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>()));
+};
+#else
+template <typename T>
+struct nothrow {
+    static const bool value = std::is_nothrow_swappable<T>::value;
+};
+#endif
+} // namespace swappable
+
+} // namespace detail
+
+struct is_transparent_tag {};
+
+// A custom pair implementation is used in the map because std::pair is not is_trivially_copyable,
+// which means it would  not be allowed to be used in std::memcpy. This struct is copyable, which is
+// also tested.
+template <typename T1, typename T2>
+struct pair {
+    using first_type = T1;
+    using second_type = T2;
+
+    template <typename U1 = T1, typename U2 = T2,
+              typename = typename std::enable_if<std::is_default_constructible<U1>::value &&
+                                                 std::is_default_constructible<U2>::value>::type>
+    constexpr pair() noexcept(noexcept(U1()) && noexcept(U2()))
+        : first()
+        , second() {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2> const& o) noexcept(
+        noexcept(T1(std::declval<T1 const&>())) && noexcept(T2(std::declval<T2 const&>())))
+        : first(o.first)
+        , second(o.second) {}
+
+    // pair constructors are explicit so we don't accidentally call this ctor when we don't have to.
+    explicit constexpr pair(std::pair<T1, T2>&& o) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(o.first))
+        , second(std::move(o.second)) {}
+
+    constexpr pair(T1&& a, T2&& b) noexcept(noexcept(
+        T1(std::move(std::declval<T1&&>()))) && noexcept(T2(std::move(std::declval<T2&&>()))))
+        : first(std::move(a))
+        , second(std::move(b)) {}
+
+    template <typename U1, typename U2>
+    constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward<U1>(
+        std::declval<U1&&>()))) && noexcept(T2(std::forward<U2>(std::declval<U2&&>()))))
+        : first(std::forward<U1>(a))
+        , second(std::forward<U2>(b)) {}
+
+    template <typename... U1, typename... U2>
+    constexpr pair(
+        std::piecewise_construct_t /*unused*/, std::tuple<U1...> a,
+        std::tuple<U2...> b) noexcept(noexcept(pair(std::declval<std::tuple<U1...>&>(),
+                                                    std::declval<std::tuple<U2...>&>(),
+                                                    ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+                                                    ROBIN_HOOD_STD::index_sequence_for<U2...>())))
+        : pair(a, b, ROBIN_HOOD_STD::index_sequence_for<U1...>(),
+               ROBIN_HOOD_STD::index_sequence_for<U2...>()) {}
+
+    // constructor called from the std::piecewise_construct_t ctor
+    template <typename... U1, size_t... I1, typename... U2, size_t... I2>
+    pair(std::tuple<U1...>& a, std::tuple<U2...>& b, ROBIN_HOOD_STD::index_sequence<I1...> /*unused*/, ROBIN_HOOD_STD::index_sequence<I2...> /*unused*/) noexcept(
+        noexcept(T1(std::forward<U1>(std::get<I1>(
+            std::declval<std::tuple<
+                U1...>&>()))...)) && noexcept(T2(std::
+                                                     forward<U2>(std::get<I2>(
+                                                         std::declval<std::tuple<U2...>&>()))...)))
+        : first(std::forward<U1>(std::get<I1>(a))...)
+        , second(std::forward<U2>(std::get<I2>(b))...) {
+        // make visual studio compiler happy about warning about unused a & b.
+        // Visual studio's pair implementation disables warning 4100.
+        (void)a;
+        (void)b;
+    }
+
+    void swap(pair<T1, T2>& o) noexcept((detail::swappable::nothrow<T1>::value) &&
+                                        (detail::swappable::nothrow<T2>::value)) {
+        using std::swap;
+        swap(first, o.first);
+        swap(second, o.second);
+    }
+
+    T1 first;  // NOLINT(misc-non-private-member-variables-in-classes)
+    T2 second; // NOLINT(misc-non-private-member-variables-in-classes)
+};
+
+template <typename A, typename B>
+inline void swap(pair<A, B>& a, pair<A, B>& b) noexcept(
+    noexcept(std::declval<pair<A, B>&>().swap(std::declval<pair<A, B>&>()))) {
+    a.swap(b);
+}
+
+template <typename A, typename B>
+inline constexpr bool operator==(pair<A, B> const& x, pair<A, B> const& y) {
+    return (x.first == y.first) && (x.second == y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator!=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x == y);
+}
+template <typename A, typename B>
+inline constexpr bool operator<(pair<A, B> const& x, pair<A, B> const& y) noexcept(noexcept(
+    std::declval<A const&>() < std::declval<A const&>()) && noexcept(std::declval<B const&>() <
+                                                                     std::declval<B const&>())) {
+    return x.first < y.first || (!(y.first < x.first) && x.second < y.second);
+}
+template <typename A, typename B>
+inline constexpr bool operator>(pair<A, B> const& x, pair<A, B> const& y) {
+    return y < x;
+}
+template <typename A, typename B>
+inline constexpr bool operator<=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x > y);
+}
+template <typename A, typename B>
+inline constexpr bool operator>=(pair<A, B> const& x, pair<A, B> const& y) {
+    return !(x < y);
+}
+
+inline size_t hash_bytes(void const* ptr, size_t const len) noexcept {
+    static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
+    static constexpr uint64_t seed = UINT64_C(0xe17a1465);
+    static constexpr unsigned int r = 47;
+
+    auto const* const data64 = static_cast<uint64_t const*>(ptr);
+    uint64_t h = seed ^ (len * m);
+
+    size_t const n_blocks = len / 8;
+    for (size_t i = 0; i < n_blocks; ++i) {
+        auto k = detail::unaligned_load<uint64_t>(data64 + i);
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h ^= k;
+        h *= m;
+    }
+
+    auto const* const data8 = reinterpret_cast<uint8_t const*>(data64 + n_blocks);
+    switch (len & 7U) {
+    case 7:
+        h ^= static_cast<uint64_t>(data8[6]) << 48U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 6:
+        h ^= static_cast<uint64_t>(data8[5]) << 40U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 5:
+        h ^= static_cast<uint64_t>(data8[4]) << 32U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 4:
+        h ^= static_cast<uint64_t>(data8[3]) << 24U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 3:
+        h ^= static_cast<uint64_t>(data8[2]) << 16U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 2:
+        h ^= static_cast<uint64_t>(data8[1]) << 8U;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    case 1:
+        h ^= static_cast<uint64_t>(data8[0]);
+        h *= m;
+        ROBIN_HOOD(FALLTHROUGH); // FALLTHROUGH
+    default:
+        break;
+    }
+
+    h ^= h >> r;
+    h *= m;
+    h ^= h >> r;
+    return static_cast<size_t>(h);
+}
+
+inline size_t hash_int(uint64_t x) noexcept {
+    // inspired by lemire's strongly universal hashing
+    // https://lemire.me/blog/2018/08/15/fast-strongly-universal-64-bit-hashing-everywhere/
+    //
+    // Instead of shifts, we use rotations so we don't lose any bits.
+    //
+    // Added a final multiplcation with a constant for more mixing. It is most important that the
+    // lower bits are well mixed.
+    auto h1 = x * UINT64_C(0xA24BAED4963EE407);
+    auto h2 = detail::rotr(x, 32U) * UINT64_C(0x9FB21C651E98DF25);
+    auto h = detail::rotr(h1 + h2, 32U);
+    return static_cast<size_t>(h);
+}
+
+// A thin wrapper around std::hash, performing an additional simple mixing step of the result.
+template <typename T>
+struct hash : public std::hash<T> {
+    size_t operator()(T const& obj) const
+        noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>()))) {
+        // call base hash
+        auto result = std::hash<T>::operator()(obj);
+        // return mixed of that, to be save against identity has
+        return hash_int(static_cast<uint64_t>(result));
+    }
+};
+
+template <typename CharT>
+struct hash<std::basic_string<CharT>> {
+    size_t operator()(std::basic_string<CharT> const& str) const noexcept {
+        return hash_bytes(str.data(), sizeof(CharT) * str.size());
+    }
+};
+
+#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17)
+template <typename CharT>
+struct hash<std::basic_string_view<CharT>> {
+    size_t operator()(std::basic_string_view<CharT> const& sv) const noexcept {
+        return hash_bytes(sv.data(), sizeof(CharT) * sv.size());
+    }
+};
+#endif
+
+template <class T>
+struct hash<T*> {
+    size_t operator()(T* ptr) const noexcept {
+        return hash_int(reinterpret_cast<size_t>(ptr));
+    }
+};
+
+template <class T>
+struct hash<std::unique_ptr<T>> {
+    size_t operator()(std::unique_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<size_t>(ptr.get()));
+    }
+};
+
+template <class T>
+struct hash<std::shared_ptr<T>> {
+    size_t operator()(std::shared_ptr<T> const& ptr) const noexcept {
+        return hash_int(reinterpret_cast<size_t>(ptr.get()));
+    }
+};
+
+#define ROBIN_HOOD_HASH_INT(T)                           \
+    template <>                                          \
+    struct hash<T> {                                     \
+        size_t operator()(T const& obj) const noexcept { \
+            return hash_int(static_cast<uint64_t>(obj)); \
+        }                                                \
+    }
+
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wuseless-cast"
+#endif
+// see https://en.cppreference.com/w/cpp/utility/hash
+ROBIN_HOOD_HASH_INT(bool);
+ROBIN_HOOD_HASH_INT(char);
+ROBIN_HOOD_HASH_INT(signed char);
+ROBIN_HOOD_HASH_INT(unsigned char);
+ROBIN_HOOD_HASH_INT(char16_t);
+ROBIN_HOOD_HASH_INT(char32_t);
+#if ROBIN_HOOD(HAS_NATIVE_WCHART)
+ROBIN_HOOD_HASH_INT(wchar_t);
+#endif
+ROBIN_HOOD_HASH_INT(short);
+ROBIN_HOOD_HASH_INT(unsigned short);
+ROBIN_HOOD_HASH_INT(int);
+ROBIN_HOOD_HASH_INT(unsigned int);
+ROBIN_HOOD_HASH_INT(long);
+ROBIN_HOOD_HASH_INT(long long);
+ROBIN_HOOD_HASH_INT(unsigned long);
+ROBIN_HOOD_HASH_INT(unsigned long long);
+#if defined(__GNUC__) && !defined(__clang__)
+#    pragma GCC diagnostic pop
+#endif
+namespace detail {
+
+template <typename T>
+struct void_type {
+    using type = void;
+};
+
+template <typename T, typename = void>
+struct has_is_transparent : public std::false_type {};
+
+template <typename T>
+struct has_is_transparent<T, typename void_type<typename T::is_transparent>::type>
+    : public std::true_type {};
+
+// using wrapper classes for hash and key_equal prevents the diamond problem when the same type
+// is used. see https://stackoverflow.com/a/28771920/48181
+template <typename T>
+struct WrapHash : public T {
+    WrapHash() = default;
+    explicit WrapHash(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+template <typename T>
+struct WrapKeyEqual : public T {
+    WrapKeyEqual() = default;
+    explicit WrapKeyEqual(T const& o) noexcept(noexcept(T(std::declval<T const&>())))
+        : T(o) {}
+};
+
+// A highly optimized hashmap implementation, using the Robin Hood algorithm.
+//
+// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but
+// be about 2x faster in most cases and require much less allocations.
+//
+// This implementation uses the following memory layout:
+//
+// [Node, Node, ... Node | info, info, ... infoSentinel ]
+//
+// * Node: either a DataNode that directly has the std::pair<key, val> as member,
+//   or a DataNode with a pointer to std::pair<key,val>. Which DataNode representation to use
+//   depends on how fast the swap() operation is. Heuristically, this is automatically choosen
+//   based on sizeof(). there are always 2^n Nodes.
+//
+// * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes.
+//   Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the
+//   corresponding node contains data. Set to 2 means the corresponding Node is filled, but it
+//   actually belongs to the previous position and was pushed out because that place is already
+//   taken.
+//
+// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the
+//   need for a idx variable.
+//
+// According to STL, order of templates has effect on throughput. That's why I've moved the
+// boolean to the front.
+// https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/
+template <bool IsFlat, size_t MaxLoadFactor100, typename Key, typename T, typename Hash,
+          typename KeyEqual>
+class Table
+    : public WrapHash<Hash>,
+      public WrapKeyEqual<KeyEqual>,
+      detail::NodeAllocator<
+          typename std::conditional<
+              std::is_void<T>::value, Key,
+              robin_hood::pair<typename std::conditional<IsFlat, Key, Key const>::type, T>>::type,
+          4, 16384, IsFlat> {
+public:
+    static constexpr bool is_flat = IsFlat;
+    static constexpr bool is_map = !std::is_void<T>::value;
+    static constexpr bool is_set = !is_map;
+    static constexpr bool is_transparent =
+        has_is_transparent<Hash>::value && has_is_transparent<KeyEqual>::value;
+
+    using key_type = Key;
+    using mapped_type = T;
+    using value_type = typename std::conditional<
+        is_set, Key,
+        robin_hood::pair<typename std::conditional<is_flat, Key, Key const>::type, T>>::type;
+    using size_type = size_t;
+    using hasher = Hash;
+    using key_equal = KeyEqual;
+    using Self = Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+
+private:
+    static_assert(MaxLoadFactor100 > 10 && MaxLoadFactor100 < 100,
+                  "MaxLoadFactor100 needs to be >10 && < 100");
+
+    using WHash = WrapHash<Hash>;
+    using WKeyEqual = WrapKeyEqual<KeyEqual>;
+
+    // configuration defaults
+
+    // make sure we have 8 elements, needed to quickly rehash mInfo
+    static constexpr size_t InitialNumElements = sizeof(uint64_t);
+    static constexpr uint32_t InitialInfoNumBits = 5;
+    static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits;
+    static constexpr uint8_t InitialInfoHashShift = sizeof(size_t) * 8 - InitialInfoNumBits;
+    using DataPool = detail::NodeAllocator<value_type, 4, 16384, IsFlat>;
+
+    // type needs to be wider than uint8_t.
+    using InfoType = uint32_t;
+
+    // DataNode ////////////////////////////////////////////////////////
+
+    // Primary template for the data node. We have special implementations for small and big
+    // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these
+    // on the heap so swap merely swaps a pointer.
+    template <typename M, bool>
+    class DataNode {};
+
+    // Small: just allocate on the stack.
+    template <typename M>
+    class DataNode<M, true> final {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, Args&&... args) noexcept(
+            noexcept(value_type(std::forward<Args>(args)...)))
+            : mData(std::forward<Args>(args)...) {}
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, true>&& n) noexcept(
+            std::is_nothrow_move_constructible<value_type>::value)
+            : mData(std::move(n.mData)) {}
+
+        // doesn't do anything
+        void destroy(M& ROBIN_HOOD_UNUSED(map) /*unused*/) noexcept {}
+        void destroyDoNotDeallocate() noexcept {}
+
+        value_type const* operator->() const noexcept {
+            return &mData;
+        }
+        value_type* operator->() noexcept {
+            return &mData;
+        }
+
+        const value_type& operator*() const noexcept {
+            return mData;
+        }
+
+        value_type& operator*() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData.first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData.second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, MT const&>::type getSecond() const noexcept {
+            return mData.second;
+        }
+
+        void swap(DataNode<M, true>& o) noexcept(
+            noexcept(std::declval<value_type>().swap(std::declval<value_type>()))) {
+            mData.swap(o.mData);
+        }
+
+    private:
+        value_type mData;
+    };
+
+    // big object: allocate on heap.
+    template <typename M>
+    class DataNode<M, false> {
+    public:
+        template <typename... Args>
+        explicit DataNode(M& map, Args&&... args)
+            : mData(map.allocate()) {
+            ::new (static_cast<void*>(mData)) value_type(std::forward<Args>(args)...);
+        }
+
+        DataNode(M& ROBIN_HOOD_UNUSED(map) /*unused*/, DataNode<M, false>&& n) noexcept
+            : mData(std::move(n.mData)) {}
+
+        void destroy(M& map) noexcept {
+            // don't deallocate, just put it into list of datapool.
+            mData->~value_type();
+            map.deallocate(mData);
+        }
+
+        void destroyDoNotDeallocate() noexcept {
+            mData->~value_type();
+        }
+
+        value_type const* operator->() const noexcept {
+            return mData;
+        }
+
+        value_type* operator->() noexcept {
+            return mData;
+        }
+
+        const value_type& operator*() const {
+            return *mData;
+        }
+
+        value_type& operator*() {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type&>::type getFirst() noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT&>::type getFirst() noexcept {
+            return *mData;
+        }
+
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, typename VT::first_type const&>::type
+            getFirst() const noexcept {
+            return mData->first;
+        }
+        template <typename VT = value_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_set, VT const&>::type getFirst() const noexcept {
+            return *mData;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT&>::type getSecond() noexcept {
+            return mData->second;
+        }
+
+        template <typename MT = mapped_type>
+        ROBIN_HOOD(NODISCARD)
+        typename std::enable_if<is_map, MT const&>::type getSecond() const noexcept {
+            return mData->second;
+        }
+
+        void swap(DataNode<M, false>& o) noexcept {
+            using std::swap;
+            swap(mData, o.mData);
+        }
+
+    private:
+        value_type* mData;
+    };
+
+    using Node = DataNode<Self, IsFlat>;
+
+    // helpers for doInsert: extract first entry (only const required)
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept {
+        return n.getFirst();
+    }
+
+    // in case we have void mapped_type, we are not using a pair, thus we just route k through.
+    // No need to disable this because it's just not used if not applicable.
+    ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(key_type const& k) const noexcept {
+        return k;
+    }
+
+    // in case we have non-void mapped_type, we have a standard robin_hood::pair
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, key_type const&>::type
+        getFirstConst(value_type const& vt) const noexcept {
+        return vt.first;
+    }
+
+    // Cloner //////////////////////////////////////////////////////////
+
+    template <typename M, bool UseMemcpy>
+    struct Cloner;
+
+    // fast path: Just copy data, without allocating anything.
+    template <typename M>
+    struct Cloner<M, true> {
+        void operator()(M const& source, M& target) const {
+            auto const* const src = reinterpret_cast<char const*>(source.mKeyVals);
+            auto* tgt = reinterpret_cast<char*>(target.mKeyVals);
+            auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1);
+            std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt);
+        }
+    };
+
+    template <typename M>
+    struct Cloner<M, false> {
+        void operator()(M const& s, M& t) const {
+            auto const numElementsWithBuffer = t.calcNumElementsWithBuffer(t.mMask + 1);
+            std::copy(s.mInfo, s.mInfo + t.calcNumBytesInfo(numElementsWithBuffer), t.mInfo);
+
+            for (size_t i = 0; i < numElementsWithBuffer; ++i) {
+                if (t.mInfo[i]) {
+                    ::new (static_cast<void*>(t.mKeyVals + i)) Node(t, *s.mKeyVals[i]);
+                }
+            }
+        }
+    };
+
+    // Destroyer ///////////////////////////////////////////////////////
+
+    template <typename M, bool IsFlatAndTrivial>
+    struct Destroyer {};
+
+    template <typename M>
+    struct Destroyer<M, true> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+        }
+    };
+
+    template <typename M>
+    struct Destroyer<M, false> {
+        void nodes(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroy(m);
+                    n.~Node();
+                }
+            }
+        }
+
+        void nodesDoNotDeallocate(M& m) const noexcept {
+            m.mNumElements = 0;
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            auto const numElementsWithBuffer = m.calcNumElementsWithBuffer(m.mMask + 1);
+            for (size_t idx = 0; idx < numElementsWithBuffer; ++idx) {
+                if (0 != m.mInfo[idx]) {
+                    Node& n = m.mKeyVals[idx];
+                    n.destroyDoNotDeallocate();
+                    n.~Node();
+                }
+            }
+        }
+    };
+
+    // Iter ////////////////////////////////////////////////////////////
+
+    struct fast_forward_tag {};
+
+    // generic iterator for both const_iterator and iterator.
+    template <bool IsConst>
+    // NOLINTNEXTLINE(hicpp-special-member-functions,cppcoreguidelines-special-member-functions)
+    class Iter {
+    private:
+        using NodePtr = typename std::conditional<IsConst, Node const*, Node*>::type;
+
+    public:
+        using difference_type = std::ptrdiff_t;
+        using value_type = typename Self::value_type;
+        using reference = typename std::conditional<IsConst, value_type const&, value_type&>::type;
+        using pointer = typename std::conditional<IsConst, value_type const*, value_type*>::type;
+        using iterator_category = std::forward_iterator_tag;
+
+        // default constructed iterator can be compared to itself, but WON'T return true when
+        // compared to end().
+        Iter() = default;
+
+        // Rule of zero: nothing specified. The conversion constructor is only enabled for
+        // iterator to const_iterator, so it doesn't accidentally work as a copy ctor.
+
+        // Conversion constructor from iterator to const_iterator.
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        // NOLINTNEXTLINE(hicpp-explicit-conversions)
+        Iter(Iter<OtherIsConst> const& other) noexcept
+            : mKeyVals(other.mKeyVals)
+            , mInfo(other.mInfo) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {}
+
+        Iter(NodePtr valPtr, uint8_t const* infoPtr,
+             fast_forward_tag ROBIN_HOOD_UNUSED(tag) /*unused*/) noexcept
+            : mKeyVals(valPtr)
+            , mInfo(infoPtr) {
+            fastForward();
+        }
+
+        template <bool OtherIsConst,
+                  typename = typename std::enable_if<IsConst && !OtherIsConst>::type>
+        Iter& operator=(Iter<OtherIsConst> const& other) noexcept {
+            mKeyVals = other.mKeyVals;
+            mInfo = other.mInfo;
+            return *this;
+        }
+
+        // prefix increment. Undefined behavior if we are at end()!
+        Iter& operator++() noexcept {
+            mInfo++;
+            mKeyVals++;
+            fastForward();
+            return *this;
+        }
+
+        Iter operator++(int) noexcept {
+            Iter tmp = *this;
+            ++(*this);
+            return tmp;
+        }
+
+        reference operator*() const {
+            return **mKeyVals;
+        }
+
+        pointer operator->() const {
+            return &**mKeyVals;
+        }
+
+        template <bool O>
+        bool operator==(Iter<O> const& o) const noexcept {
+            return mKeyVals == o.mKeyVals;
+        }
+
+        template <bool O>
+        bool operator!=(Iter<O> const& o) const noexcept {
+            return mKeyVals != o.mKeyVals;
+        }
+
+    private:
+        // fast forward to the next non-free info byte
+        // I've tried a few variants that don't depend on intrinsics, but unfortunately they are
+        // quite a bit slower than this one. So I've reverted that change again. See map_benchmark.
+        void fastForward() noexcept {
+            size_t n = 0;
+            while (0U == (n = detail::unaligned_load<size_t>(mInfo))) {
+                mInfo += sizeof(size_t);
+                mKeyVals += sizeof(size_t);
+            }
+#if ROBIN_HOOD(LITTLE_ENDIAN)
+            auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8;
+#else
+            auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8;
+#endif
+            mInfo += inc;
+            mKeyVals += inc;
+        }
+
+        friend class Table<IsFlat, MaxLoadFactor100, key_type, mapped_type, hasher, key_equal>;
+        NodePtr mKeyVals{nullptr};
+        uint8_t const* mInfo{nullptr};
+    };
+
+    ////////////////////////////////////////////////////////////////////
+
+    // highly performance relevant code.
+    // Lower bits are used for indexing into the array (2^n size)
+    // The upper 1-5 bits need to be a reasonable good hash, to save comparisons.
+    template <typename HashKey>
+    void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const {
+        // for a user-specified hash that is *not* robin_hood::hash, apply robin_hood::hash as
+        // an additional mixing step. This serves as a bad hash prevention, if the given data is
+        // badly mixed.
+        using Mix =
+            typename std::conditional<std::is_same<::robin_hood::hash<key_type>, hasher>::value,
+                                      ::robin_hood::detail::identity_hash<size_t>,
+                                      ::robin_hood::hash<size_t>>::type;
+        *idx = Mix{}(WHash::operator()(key));
+
+        *info = mInfoInc + static_cast<InfoType>(*idx >> mInfoHashShift);
+        *idx &= mMask;
+    }
+
+    // forwards the index by one, wrapping around at the end
+    void next(InfoType* info, size_t* idx) const noexcept {
+        *idx = *idx + 1;
+        *info += mInfoInc;
+    }
+
+    void nextWhileLess(InfoType* info, size_t* idx) const noexcept {
+        // unrolling this by hand did not bring any speedups.
+        while (*info < mInfo[*idx]) {
+            next(info, idx);
+        }
+    }
+
+    // Shift everything up by one element. Tries to move stuff around.
+    void
+    shiftUp(size_t startIdx,
+            size_t const insertion_idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        auto idx = startIdx;
+        ::new (static_cast<void*>(mKeyVals + idx)) Node(std::move(mKeyVals[idx - 1]));
+        while (--idx != insertion_idx) {
+            mKeyVals[idx] = std::move(mKeyVals[idx - 1]);
+        }
+
+        idx = startIdx;
+        while (idx != insertion_idx) {
+            ROBIN_HOOD_COUNT(shiftUp)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx - 1] + mInfoInc);
+            if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+            --idx;
+        }
+    }
+
+    void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable<Node>::value) {
+        // until we find one that is either empty or has zero offset.
+        // TODO(martinus) we don't need to move everything, just the last one for the same
+        // bucket.
+        mKeyVals[idx].destroy(*this);
+
+        // until we find one that is either empty or has zero offset.
+        while (mInfo[idx + 1] >= 2 * mInfoInc) {
+            ROBIN_HOOD_COUNT(shiftDown)
+            mInfo[idx] = static_cast<uint8_t>(mInfo[idx + 1] - mInfoInc);
+            mKeyVals[idx] = std::move(mKeyVals[idx + 1]);
+            ++idx;
+        }
+
+        mInfo[idx] = 0;
+        // don't destroy, we've moved it
+        // mKeyVals[idx].destroy(*this);
+        mKeyVals[idx].~Node();
+    }
+
+    // copy of find(), except that it returns iterator instead of const_iterator.
+    template <typename Other>
+    ROBIN_HOOD(NODISCARD)
+    size_t findIdx(Other const& key) const {
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        do {
+            // unrolling this twice gives a bit of a speedup. More unrolling did not help.
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+            if (info == mInfo[idx] &&
+                ROBIN_HOOD_LIKELY(WKeyEqual::operator()(key, mKeyVals[idx].getFirst()))) {
+                return idx;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found!
+        return mMask == 0 ? 0
+                          : static_cast<size_t>(std::distance(
+                                mKeyVals, reinterpret_cast_no_cast_align_warning<Node*>(mInfo)));
+    }
+
+    void cloneData(const Table& o) {
+        Cloner<Table, IsFlat && ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(Node)>()(o, *this);
+    }
+
+    // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized.
+    // @return index where the element was created
+    size_t insert_move(Node&& keyval) {
+        // we don't retry, fail if overflowing
+        // don't need to check max num elements
+        if (0 == mMaxNumElementsAllowed && !try_increase_info()) {
+            throwOverflowError(); // impossible to reach LCOV_EXCL_LINE
+        }
+
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(keyval.getFirst(), &idx, &info);
+
+        // skip forward. Use <= because we are certain that the element is not there.
+        while (info <= mInfo[idx]) {
+            idx = idx + 1;
+            info += mInfoInc;
+        }
+
+        // key not found, so we are now exactly where we want to insert it.
+        auto const insertion_idx = idx;
+        auto const insertion_info = static_cast<uint8_t>(info);
+        if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+            mMaxNumElementsAllowed = 0;
+        }
+
+        // find an empty spot
+        while (0 != mInfo[idx]) {
+            next(&info, &idx);
+        }
+
+        auto& l = mKeyVals[insertion_idx];
+        if (idx == insertion_idx) {
+            ::new (static_cast<void*>(&l)) Node(std::move(keyval));
+        } else {
+            shiftUp(idx, insertion_idx);
+            l = std::move(keyval);
+        }
+
+        // put at empty spot
+        mInfo[insertion_idx] = insertion_info;
+
+        ++mNumElements;
+        return insertion_idx;
+    }
+
+public:
+    using iterator = Iter<false>;
+    using const_iterator = Iter<true>;
+
+    // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert.
+    // This tremendously speeds up ctor & dtor of a map that never receives an element. The
+    // penalty is payed at the first insert, and not before. Lookup of this empty map works
+    // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the
+    // standard, but we can ignore it.
+    explicit Table(
+        size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
+        const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal)))
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+    }
+
+    template <typename Iter>
+    Table(Iter first, Iter last, size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0,
+          const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(first, last);
+    }
+
+    Table(std::initializer_list<value_type> initlist,
+          size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{},
+          const KeyEqual& equal = KeyEqual{})
+        : WHash(h)
+        , WKeyEqual(equal) {
+        ROBIN_HOOD_TRACE(this)
+        insert(initlist.begin(), initlist.end());
+    }
+
+    Table(Table&& o) noexcept
+        : WHash(std::move(static_cast<WHash&>(o)))
+        , WKeyEqual(std::move(static_cast<WKeyEqual&>(o)))
+        , DataPool(std::move(static_cast<DataPool&>(o))) {
+        ROBIN_HOOD_TRACE(this)
+        if (o.mMask) {
+            mKeyVals = std::move(o.mKeyVals);
+            mInfo = std::move(o.mInfo);
+            mNumElements = std::move(o.mNumElements);
+            mMask = std::move(o.mMask);
+            mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+            mInfoInc = std::move(o.mInfoInc);
+            mInfoHashShift = std::move(o.mInfoHashShift);
+            // set other's mask to 0 so its destructor won't do anything
+            o.init();
+        }
+    }
+
+    Table& operator=(Table&& o) noexcept {
+        ROBIN_HOOD_TRACE(this)
+        if (&o != this) {
+            if (o.mMask) {
+                // only move stuff if the other map actually has some data
+                destroy();
+                mKeyVals = std::move(o.mKeyVals);
+                mInfo = std::move(o.mInfo);
+                mNumElements = std::move(o.mNumElements);
+                mMask = std::move(o.mMask);
+                mMaxNumElementsAllowed = std::move(o.mMaxNumElementsAllowed);
+                mInfoInc = std::move(o.mInfoInc);
+                mInfoHashShift = std::move(o.mInfoHashShift);
+                WHash::operator=(std::move(static_cast<WHash&>(o)));
+                WKeyEqual::operator=(std::move(static_cast<WKeyEqual&>(o)));
+                DataPool::operator=(std::move(static_cast<DataPool&>(o)));
+
+                o.init();
+
+            } else {
+                // nothing in the other map => just clear us.
+                clear();
+            }
+        }
+        return *this;
+    }
+
+    Table(const Table& o)
+        : WHash(static_cast<const WHash&>(o))
+        , WKeyEqual(static_cast<const WKeyEqual&>(o))
+        , DataPool(static_cast<const DataPool&>(o)) {
+        ROBIN_HOOD_TRACE(this)
+        if (!o.empty()) {
+            // not empty: create an exact copy. it is also possible to just iterate through all
+            // elements and insert them, but copying is probably faster.
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            mKeyVals = static_cast<Node*>(detail::assertNotNull<std::bad_alloc>(
+                malloc(calcNumBytesTotal(numElementsWithBuffer))));
+            // no need for calloc because clonData does memcpy
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            mNumElements = o.mNumElements;
+            mMask = o.mMask;
+            mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+            mInfoInc = o.mInfoInc;
+            mInfoHashShift = o.mInfoHashShift;
+            cloneData(o);
+        }
+    }
+
+    // Creates a copy of the given map. Copy constructor of each entry is used.
+    // Not sure why clang-tidy thinks this doesn't handle self assignment, it does
+    // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp)
+    Table& operator=(Table const& o) {
+        ROBIN_HOOD_TRACE(this)
+        if (&o == this) {
+            // prevent assigning of itself
+            return *this;
+        }
+
+        // we keep using the old allocator and not assign the new one, because we want to keep
+        // the memory available. when it is the same size.
+        if (o.empty()) {
+            if (0 == mMask) {
+                // nothing to do, we are empty too
+                return *this;
+            }
+
+            // not empty: destroy what we have there
+            // clear also resets mInfo to 0, that's sometimes not necessary.
+            destroy();
+            init();
+            WHash::operator=(static_cast<const WHash&>(o));
+            WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+            DataPool::operator=(static_cast<DataPool const&>(o));
+
+            return *this;
+        }
+
+        // clean up old stuff
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        if (mMask != o.mMask) {
+            // no luck: we don't have the same array size allocated, so we need to realloc.
+            if (0 != mMask) {
+                // only deallocate if we actually have data!
+                free(mKeyVals);
+            }
+
+            auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1);
+            mKeyVals = static_cast<Node*>(detail::assertNotNull<std::bad_alloc>(
+                malloc(calcNumBytesTotal(numElementsWithBuffer))));
+
+            // no need for calloc here because cloneData performs a memcpy.
+            mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+            // sentinel is set in cloneData
+        }
+        WHash::operator=(static_cast<const WHash&>(o));
+        WKeyEqual::operator=(static_cast<const WKeyEqual&>(o));
+        DataPool::operator=(static_cast<DataPool const&>(o));
+        mNumElements = o.mNumElements;
+        mMask = o.mMask;
+        mMaxNumElementsAllowed = o.mMaxNumElementsAllowed;
+        mInfoInc = o.mInfoInc;
+        mInfoHashShift = o.mInfoHashShift;
+        cloneData(o);
+
+        return *this;
+    }
+
+    // Swaps everything between the two maps.
+    void swap(Table& o) {
+        ROBIN_HOOD_TRACE(this)
+        using std::swap;
+        swap(o, *this);
+    }
+
+    // Clears all data, without resizing.
+    void clear() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            // don't do anything! also important because we don't want to write to
+            // DummyInfoByte::b, even though we would just write 0 to it.
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}.nodes(*this);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+        // clear everything, then set the sentinel again
+        uint8_t const z = 0;
+        std::fill(mInfo, mInfo + calcNumBytesInfo(numElementsWithBuffer), z);
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // Destroys the map and all it's contents.
+    ~Table() {
+        ROBIN_HOOD_TRACE(this)
+        destroy();
+    }
+
+    // Checks if both tables contain the same entries. Order is irrelevant.
+    bool operator==(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        if (other.size() != size()) {
+            return false;
+        }
+        for (auto const& otherEntry : other) {
+            if (!has(otherEntry)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool operator!=(const Table& other) const {
+        ROBIN_HOOD_TRACE(this)
+        return !operator==(other);
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        return doCreateByKey(key);
+    }
+
+    template <typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type operator[](key_type&& key) {
+        ROBIN_HOOD_TRACE(this)
+        return doCreateByKey(std::move(key));
+    }
+
+    template <typename Iter>
+    void insert(Iter first, Iter last) {
+        for (; first != last; ++first) {
+            // value_type ctor needed because this might be called with std::pair's
+            insert(value_type(*first));
+        }
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> emplace(Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        Node n{*this, std::forward<Args>(args)...};
+        auto r = doInsert(std::move(n));
+        if (!r.second) {
+            // insertion not possible: destroy node
+            // NOLINTNEXTLINE(bugprone-use-after-move)
+            n.destroy(*this);
+        }
+        return r;
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(const key_type& key, Args&&... args) {
+        return try_emplace_impl(key, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(key_type&& key, Args&&... args) {
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(const_iterator hint, const key_type& key,
+                                          Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(key, std::forward<Args>(args)...);
+    }
+
+    template <typename... Args>
+    std::pair<iterator, bool> try_emplace(const_iterator hint, key_type&& key, Args&&... args) {
+        (void)hint;
+        return try_emplace_impl(std::move(key), std::forward<Args>(args)...);
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(const key_type& key, Mapped&& obj) {
+        return insert_or_assign_impl(key, std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(key_type&& key, Mapped&& obj) {
+        return insert_or_assign_impl(std::move(key), std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(const_iterator hint, const key_type& key,
+                                               Mapped&& obj) {
+        (void)hint;
+        return insert_or_assign_impl(key, std::forward<Mapped>(obj));
+    }
+
+    template <typename Mapped>
+    std::pair<iterator, bool> insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) {
+        (void)hint;
+        return insert_or_assign_impl(std::move(key), std::forward<Mapped>(obj));
+    }
+
+    std::pair<iterator, bool> insert(const value_type& keyval) {
+        ROBIN_HOOD_TRACE(this)
+        return doInsert(keyval);
+    }
+
+    std::pair<iterator, bool> insert(value_type&& keyval) {
+        return doInsert(std::move(keyval));
+    }
+
+    // Returns 1 if key is found, 0 otherwise.
+    size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, size_t>::type count(const OtherKey& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv != reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            return 1;
+        }
+        return 0;
+    }
+
+    bool contains(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        return 1U == count(key);
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<Self_::is_transparent, bool>::type contains(const OtherKey& key) const {
+        return 1U == count(key);
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type at(key_type const& key) {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    // Returns a reference to the value found for key.
+    // Throws std::out_of_range if element cannot be found
+    template <typename Q = mapped_type>
+    // NOLINTNEXTLINE(modernize-use-nodiscard)
+    typename std::enable_if<!std::is_void<Q>::value, Q const&>::type at(key_type const& key) const {
+        ROBIN_HOOD_TRACE(this)
+        auto kv = mKeyVals + findIdx(key);
+        if (kv == reinterpret_cast_no_cast_align_warning<Node*>(mInfo)) {
+            doThrow<std::out_of_range>("key not found");
+        }
+        return kv->getSecond();
+    }
+
+    const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, // NOLINT(modernize-use-nodiscard)
+                            const_iterator>::type  // NOLINT(modernize-use-nodiscard)
+    find(const OtherKey& key) const {              // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return const_iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator find(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey>
+    iterator find(const OtherKey& key, is_transparent_tag /*unused*/) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    template <typename OtherKey, typename Self_ = Self>
+    typename std::enable_if<Self_::is_transparent, iterator>::type find(const OtherKey& key) {
+        ROBIN_HOOD_TRACE(this)
+        const size_t idx = findIdx(key);
+        return iterator{mKeyVals + idx, mInfo + idx};
+    }
+
+    iterator begin() {
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return end();
+        }
+        return iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+    const_iterator begin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cbegin();
+    }
+    const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        if (empty()) {
+            return cend();
+        }
+        return const_iterator(mKeyVals, mInfo, fast_forward_tag{});
+    }
+
+    iterator end() {
+        ROBIN_HOOD_TRACE(this)
+        // no need to supply valid info pointer: end() must not be dereferenced, and only node
+        // pointer is compared.
+        return iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+    const_iterator end() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return cend();
+    }
+    const_iterator cend() const { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return const_iterator{reinterpret_cast_no_cast_align_warning<Node*>(mInfo), nullptr};
+    }
+
+    iterator erase(const_iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // its safe to perform const cast here
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
+        return erase(iterator{const_cast<Node*>(pos.mKeyVals), const_cast<uint8_t*>(pos.mInfo)});
+    }
+
+    // Erases element at pos, returns iterator to the next element.
+    iterator erase(iterator pos) {
+        ROBIN_HOOD_TRACE(this)
+        // we assume that pos always points to a valid entry, and not end().
+        auto const idx = static_cast<size_t>(pos.mKeyVals - mKeyVals);
+
+        shiftDown(idx);
+        --mNumElements;
+
+        if (*pos.mInfo) {
+            // we've backward shifted, return this again
+            return pos;
+        }
+
+        // no backward shift, return next element
+        return ++pos;
+    }
+
+    size_t erase(const key_type& key) {
+        ROBIN_HOOD_TRACE(this)
+        size_t idx{};
+        InfoType info{};
+        keyToIdx(key, &idx, &info);
+
+        // check while info matches with the source idx
+        do {
+            if (info == mInfo[idx] && WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                shiftDown(idx);
+                --mNumElements;
+                return 1;
+            }
+            next(&info, &idx);
+        } while (info <= mInfo[idx]);
+
+        // nothing found to delete
+        return 0;
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // exactly the same as reserve(c).
+    void rehash(size_t c) {
+        reserve(c);
+    }
+
+    // reserves space for the specified number of elements. Makes sure the old data fits.
+    // Exactly the same as resize(c). Use resize(0) to shrink to fit.
+    void reserve(size_t c) {
+        ROBIN_HOOD_TRACE(this)
+        auto const minElementsAllowed = (std::max)(c, mNumElements);
+        auto newSize = InitialNumElements;
+        while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) {
+            newSize *= 2;
+        }
+        if (ROBIN_HOOD_UNLIKELY(newSize == 0)) {
+            throwOverflowError();
+        }
+
+        rehashPowerOfTwo(newSize);
+    }
+
+    size_type size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return mNumElements;
+    }
+
+    size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<size_type>(-1);
+    }
+
+    ROBIN_HOOD(NODISCARD) bool empty() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return 0 == mNumElements;
+    }
+
+    float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return MaxLoadFactor100 / 100.0F;
+    }
+
+    // Average number of elements per bucket. Since we allow only 1 per bucket
+    float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard)
+        ROBIN_HOOD_TRACE(this)
+        return static_cast<float>(size()) / static_cast<float>(mMask + 1);
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t mask() const noexcept {
+        ROBIN_HOOD_TRACE(this)
+        return mMask;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcMaxNumElementsAllowed(size_t maxElements) const noexcept {
+        if (ROBIN_HOOD_LIKELY(maxElements <= (std::numeric_limits<size_t>::max)() / 100)) {
+            return maxElements * MaxLoadFactor100 / 100;
+        }
+
+        // we might be a bit inprecise, but since maxElements is quite large that doesn't matter
+        return (maxElements / 100) * MaxLoadFactor100;
+    }
+
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesInfo(size_t numElements) const noexcept {
+        // we add a uint64_t, which houses the sentinel (first byte) and padding so we can load
+        // 64bit types.
+        return numElements + sizeof(uint64_t);
+    }
+
+    ROBIN_HOOD(NODISCARD)
+    size_t calcNumElementsWithBuffer(size_t numElements) const noexcept {
+        auto maxNumElementsAllowed = calcMaxNumElementsAllowed(numElements);
+        return numElements + (std::min)(maxNumElementsAllowed, (static_cast<size_t>(0xFF)));
+    }
+
+    // calculation only allowed for 2^n values
+    ROBIN_HOOD(NODISCARD) size_t calcNumBytesTotal(size_t numElements) const {
+#if ROBIN_HOOD(BITNESS) == 64
+        return numElements * sizeof(Node) + calcNumBytesInfo(numElements);
+#else
+        // make sure we're doing 64bit operations, so we are at least safe against 32bit overflows.
+        auto const ne = static_cast<uint64_t>(numElements);
+        auto const s = static_cast<uint64_t>(sizeof(Node));
+        auto const infos = static_cast<uint64_t>(calcNumBytesInfo(numElements));
+
+        auto const total64 = ne * s + infos;
+        auto const total = static_cast<size_t>(total64);
+
+        if (ROBIN_HOOD_UNLIKELY(static_cast<uint64_t>(total) != total64)) {
+            throwOverflowError();
+        }
+        return total;
+#endif
+    }
+
+private:
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<!std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        auto it = find(e.first);
+        return it != end() && it->second == e.second;
+    }
+
+    template <typename Q = mapped_type>
+    ROBIN_HOOD(NODISCARD)
+    typename std::enable_if<std::is_void<Q>::value, bool>::type has(const value_type& e) const {
+        ROBIN_HOOD_TRACE(this)
+        return find(e) != end();
+    }
+
+    // reserves space for at least the specified number of elements.
+    // only works if numBuckets if power of two
+    void rehashPowerOfTwo(size_t numBuckets) {
+        ROBIN_HOOD_TRACE(this)
+
+        Node* const oldKeyVals = mKeyVals;
+        uint8_t const* const oldInfo = mInfo;
+
+        const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        // resize operation: move stuff
+        init_data(numBuckets);
+        if (oldMaxElementsWithBuffer > 1) {
+            for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) {
+                if (oldInfo[i] != 0) {
+                    insert_move(std::move(oldKeyVals[i]));
+                    // destroy the node but DON'T destroy the data.
+                    oldKeyVals[i].~Node();
+                }
+            }
+
+            // don't destroy old data: put it into the pool instead
+            DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer));
+        }
+    }
+
+    ROBIN_HOOD(NOINLINE) void throwOverflowError() const {
+#if ROBIN_HOOD(HAS_EXCEPTIONS)
+        throw std::overflow_error("robin_hood::map overflow");
+#else
+        abort();
+#endif
+    }
+
+    template <typename OtherKey, typename... Args>
+    std::pair<iterator, bool> try_emplace_impl(OtherKey&& key, Args&&... args) {
+        ROBIN_HOOD_TRACE(this)
+        auto it = find(key);
+        if (it == end()) {
+            return emplace(std::piecewise_construct,
+                           std::forward_as_tuple(std::forward<OtherKey>(key)),
+                           std::forward_as_tuple(std::forward<Args>(args)...));
+        }
+        return {it, false};
+    }
+
+    template <typename OtherKey, typename Mapped>
+    std::pair<iterator, bool> insert_or_assign_impl(OtherKey&& key, Mapped&& obj) {
+        ROBIN_HOOD_TRACE(this)
+        auto it = find(key);
+        if (it == end()) {
+            return emplace(std::forward<OtherKey>(key), std::forward<Mapped>(obj));
+        }
+        it->second = std::forward<Mapped>(obj);
+        return {it, false};
+    }
+
+    void init_data(size_t max_elements) {
+        mNumElements = 0;
+        mMask = max_elements - 1;
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements);
+
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements);
+
+        // calloc also zeroes everything
+        mKeyVals = reinterpret_cast<Node*>(detail::assertNotNull<std::bad_alloc>(
+            calloc(1, calcNumBytesTotal(numElementsWithBuffer))));
+        mInfo = reinterpret_cast<uint8_t*>(mKeyVals + numElementsWithBuffer);
+
+        // set sentinel
+        mInfo[numElementsWithBuffer] = 1;
+
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    template <typename Arg, typename Q = mapped_type>
+    typename std::enable_if<!std::is_void<Q>::value, Q&>::type doCreateByKey(Arg&& key) {
+        while (true) {
+            size_t idx{};
+            InfoType info{};
+            keyToIdx(key, &idx, &info);
+            nextWhileLess(&info, &idx);
+
+            // while we potentially have a match. Can't do a do-while here because when mInfo is
+            // 0 we don't want to skip forward
+            while (info == mInfo[idx]) {
+                if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) {
+                    // key already exists, do not insert.
+                    return mKeyVals[idx].getSecond();
+                }
+                next(&info, &idx);
+            }
+
+            // unlikely that this evaluates to true
+            if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) {
+                increase_size();
+                continue;
+            }
+
+            // key not found, so we are now exactly where we want to insert it.
+            auto const insertion_idx = idx;
+            auto const insertion_info = info;
+            if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+
+            // find an empty spot
+            while (0 != mInfo[idx]) {
+                next(&info, &idx);
+            }
+
+            auto& l = mKeyVals[insertion_idx];
+            if (idx == insertion_idx) {
+                // put at empty spot. This forwards all arguments into the node where the object
+                // is constructed exactly where it is needed.
+                ::new (static_cast<void*>(&l))
+                    Node(*this, std::piecewise_construct,
+                         std::forward_as_tuple(std::forward<Arg>(key)), std::forward_as_tuple());
+            } else {
+                shiftUp(idx, insertion_idx);
+                l = Node(*this, std::piecewise_construct,
+                         std::forward_as_tuple(std::forward<Arg>(key)), std::forward_as_tuple());
+            }
+
+            // mKeyVals[idx].getFirst() = std::move(key);
+            mInfo[insertion_idx] = static_cast<uint8_t>(insertion_info);
+
+            ++mNumElements;
+            return mKeyVals[insertion_idx].getSecond();
+        }
+    }
+
+    // This is exactly the same code as operator[], except for the return values
+    template <typename Arg>
+    std::pair<iterator, bool> doInsert(Arg&& keyval) {
+        while (true) {
+            size_t idx{};
+            InfoType info{};
+            keyToIdx(getFirstConst(keyval), &idx, &info);
+            nextWhileLess(&info, &idx);
+
+            // while we potentially have a match
+            while (info == mInfo[idx]) {
+                if (WKeyEqual::operator()(getFirstConst(keyval), mKeyVals[idx].getFirst())) {
+                    // key already exists, do NOT insert.
+                    // see http://en.cppreference.com/w/cpp/container/unordered_map/insert
+                    return std::make_pair<iterator, bool>(iterator(mKeyVals + idx, mInfo + idx),
+                                                          false);
+                }
+                next(&info, &idx);
+            }
+
+            // unlikely that this evaluates to true
+            if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) {
+                increase_size();
+                continue;
+            }
+
+            // key not found, so we are now exactly where we want to insert it.
+            auto const insertion_idx = idx;
+            auto const insertion_info = info;
+            if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) {
+                mMaxNumElementsAllowed = 0;
+            }
+
+            // find an empty spot
+            while (0 != mInfo[idx]) {
+                next(&info, &idx);
+            }
+
+            auto& l = mKeyVals[insertion_idx];
+            if (idx == insertion_idx) {
+                ::new (static_cast<void*>(&l)) Node(*this, std::forward<Arg>(keyval));
+            } else {
+                shiftUp(idx, insertion_idx);
+                l = Node(*this, std::forward<Arg>(keyval));
+            }
+
+            // put at empty spot
+            mInfo[insertion_idx] = static_cast<uint8_t>(insertion_info);
+
+            ++mNumElements;
+            return std::make_pair(iterator(mKeyVals + insertion_idx, mInfo + insertion_idx), true);
+        }
+    }
+
+    bool try_increase_info() {
+        ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements
+                                   << ", maxNumElementsAllowed="
+                                   << calcMaxNumElementsAllowed(mMask + 1))
+        if (mInfoInc <= 2) {
+            // need to be > 2 so that shift works (otherwise undefined behavior!)
+            return false;
+        }
+        // we got space left, try to make info smaller
+        mInfoInc = static_cast<uint8_t>(mInfoInc >> 1U);
+
+        // remove one bit of the hash, leaving more space for the distance info.
+        // This is extremely fast because we can operate on 8 bytes at once.
+        ++mInfoHashShift;
+        auto const numElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1);
+
+        for (size_t i = 0; i < numElementsWithBuffer; i += 8) {
+            auto val = unaligned_load<uint64_t>(mInfo + i);
+            val = (val >> 1U) & UINT64_C(0x7f7f7f7f7f7f7f7f);
+            std::memcpy(mInfo + i, &val, sizeof(val));
+        }
+        // update sentinel, which might have been cleared out!
+        mInfo[numElementsWithBuffer] = 1;
+
+        mMaxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        return true;
+    }
+
+    void increase_size() {
+        // nothing allocated yet? just allocate InitialNumElements
+        if (0 == mMask) {
+            init_data(InitialNumElements);
+            return;
+        }
+
+        auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1);
+        if (mNumElements < maxNumElementsAllowed && try_increase_info()) {
+            return;
+        }
+
+        ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed="
+                                       << maxNumElementsAllowed << ", load="
+                                       << (static_cast<double>(mNumElements) * 100.0 /
+                                           (static_cast<double>(mMask) + 1)))
+        // it seems we have a really bad hash function! don't try to resize again
+        if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) {
+            throwOverflowError();
+        }
+
+        rehashPowerOfTwo((mMask + 1) * 2);
+    }
+
+    void destroy() {
+        if (0 == mMask) {
+            // don't deallocate!
+            return;
+        }
+
+        Destroyer<Self, IsFlat && std::is_trivially_destructible<Node>::value>{}
+            .nodesDoNotDeallocate(*this);
+
+        // This protection against not deleting mMask shouldn't be needed as it's sufficiently
+        // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise
+        // reports a compile error: attempt to free a non-heap object ‘fm’
+        // [-Werror=free-nonheap-object]
+        if (mKeyVals != reinterpret_cast_no_cast_align_warning<Node*>(&mMask)) {
+            free(mKeyVals);
+        }
+    }
+
+    void init() noexcept {
+        mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask);
+        mInfo = reinterpret_cast<uint8_t*>(&mMask);
+        mNumElements = 0;
+        mMask = 0;
+        mMaxNumElementsAllowed = 0;
+        mInfoInc = InitialInfoInc;
+        mInfoHashShift = InitialInfoHashShift;
+    }
+
+    // members are sorted so no padding occurs
+    Node* mKeyVals = reinterpret_cast_no_cast_align_warning<Node*>(&mMask); // 8 byte  8
+    uint8_t* mInfo = reinterpret_cast<uint8_t*>(&mMask);                    // 8 byte 16
+    size_t mNumElements = 0;                                                // 8 byte 24
+    size_t mMask = 0;                                                       // 8 byte 32
+    size_t mMaxNumElementsAllowed = 0;                                      // 8 byte 40
+    InfoType mInfoInc = InitialInfoInc;                                     // 4 byte 44
+    InfoType mInfoHashShift = InitialInfoHashShift;                         // 4 byte 48
+                                                    // 16 byte 56 if NodeAllocator
+};
+
+} // namespace detail
+
+// map
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_flat_map = detail::Table<true, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_node_map = detail::Table<false, MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+template <typename Key, typename T, typename Hash = hash<Key>,
+          typename KeyEqual = std::equal_to<Key>, size_t MaxLoadFactor100 = 80>
+using unordered_map =
+    detail::Table<sizeof(robin_hood::pair<Key, T>) <= sizeof(size_t) * 6 &&
+                      std::is_nothrow_move_constructible<robin_hood::pair<Key, T>>::value &&
+                      std::is_nothrow_move_assignable<robin_hood::pair<Key, T>>::value,
+                  MaxLoadFactor100, Key, T, Hash, KeyEqual>;
+
+// set
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_flat_set = detail::Table<true, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_node_set = detail::Table<false, MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+template <typename Key, typename Hash = hash<Key>, typename KeyEqual = std::equal_to<Key>,
+          size_t MaxLoadFactor100 = 80>
+using unordered_set = detail::Table<sizeof(Key) <= sizeof(size_t) * 6 &&
+                                        std::is_nothrow_move_constructible<Key>::value &&
+                                        std::is_nothrow_move_assignable<Key>::value,
+                                    MaxLoadFactor100, Key, void, Hash, KeyEqual>;
+
+} // namespace robin_hood
+
+#endif
diff --git a/benchmarks/others/sparsepp/spp.h b/benchmarks/others/sparsepp/spp.h
new file mode 100644
index 00000000..35d58492
--- /dev/null
+++ b/benchmarks/others/sparsepp/spp.h
@@ -0,0 +1,4358 @@
+#if !defined(sparsepp_h_guard_)
+#define sparsepp_h_guard_
+
+
+// ----------------------------------------------------------------------
+// Copyright (c) 2016, Gregory Popovitch - [email protected]
+// All rights reserved.
+//
+// This work is derived from Google's sparsehash library
+//
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ----------------------------------------------------------------------
+
+
+// some macros for portability
+// ---------------------------
+// includes
+// --------
+#include <cassert>
+#include <cstring>
+#include <string>
+#include <limits>                           // for numeric_limits
+#include <algorithm>                        // For swap(), eg
+#include <iterator>                         // for iterator tags
+#include <functional>                       // for equal_to<>, select1st<>, std::unary_function, etc
+#include <memory>                           // for alloc, uninitialized_copy, uninitialized_fill
+#include <cstdlib>                          // for malloc/realloc/free
+#include <cstddef>                          // for ptrdiff_t
+#include <new>                              // for placement new
+#include <stdexcept>                        // For length_error
+#include <utility>                          // for pair<>
+#include <cstdio>
+#include <iosfwd>
+#include <ios>
+
+#include "spp_stdint.h"  // includes spp_config.h
+#include "spp_traits.h"
+#include "spp_utils.h"
+
+#ifdef SPP_INCLUDE_SPP_ALLOC
+    #include "spp_dlalloc.h"
+#endif
+
+#if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
+    #include <initializer_list>
+#endif
+
+#if (SPP_GROUP_SIZE == 32)
+    #define SPP_SHIFT_ 5
+    #define SPP_MASK_  0x1F
+    typedef uint32_t group_bm_type;
+#elif (SPP_GROUP_SIZE == 64)
+    #define SPP_SHIFT_ 6
+    #define SPP_MASK_  0x3F
+    typedef uint64_t group_bm_type;
+#else
+    #error "SPP_GROUP_SIZE must be either 32 or 64"
+#endif
+
+namespace spp_ {
+
+//  ----------------------------------------------------------------------
+//                  U T I L    F U N C T I O N S
+//  ----------------------------------------------------------------------
+template <class E>
+inline void throw_exception(const E& exception)
+{
+#if !defined(SPP_NO_EXCEPTIONS)
+    throw exception;
+#else
+    assert(0);
+    abort();
+#endif
+}
+
+//  ----------------------------------------------------------------------
+//              M U T A B L E     P A I R      H A C K
+// turn std::pair<const K, V> into mutable std::pair<K, V>
+//  ----------------------------------------------------------------------
+template <class T>
+struct cvt
+{
+    typedef T type;
+};
+
+template <class K, class V>
+struct cvt<std::pair<const K, V> >
+{
+    typedef std::pair<K, V> type;
+};
+
+template <class K, class V>
+struct cvt<const std::pair<const K, V> >
+{
+    typedef const std::pair<K, V> type;
+};
+
+//  ----------------------------------------------------------------------
+//              M O V E   I T E R A T O R
+//  ----------------------------------------------------------------------
+#ifdef SPP_NO_CXX11_RVALUE_REFERENCES
+    #define MK_MOVE_IT(p) (p)
+#else
+    #define MK_MOVE_IT(p) std::make_move_iterator(p)
+#endif
+
+
+//  ----------------------------------------------------------------------
+//             I N T E R N A L    S T U F F
+//  ----------------------------------------------------------------------
+#ifdef SPP_NO_CXX11_STATIC_ASSERT
+    template <bool> struct SppCompileAssert { };
+    #define SPP_COMPILE_ASSERT(expr, msg) \
+      SPP_ATTRIBUTE_UNUSED typedef SppCompileAssert<(bool(expr))> spp_bogus_[bool(expr) ? 1 : -1]
+#else
+    #define SPP_COMPILE_ASSERT static_assert
+#endif
+
+namespace sparsehash_internal
+{
+
+// Adaptor methods for reading/writing data from an INPUT or OUPTUT
+// variable passed to serialize() or unserialize().  For now we
+// have implemented INPUT/OUTPUT for FILE*, istream*/ostream* (note
+// they are pointers, unlike typical use), or else a pointer to
+// something that supports a Read()/Write() method.
+//
+// For technical reasons, we implement read_data/write_data in two
+// stages.  The actual work is done in *_data_internal, which takes
+// the stream argument twice: once as a template type, and once with
+// normal type information.  (We only use the second version.)  We do
+// this because of how C++ picks what function overload to use.  If we
+// implemented this the naive way:
+//    bool read_data(istream* is, const void* data, size_t length);
+//    template<typename T> read_data(T* fp,  const void* data, size_t length);
+// C++ would prefer the second version for every stream type except
+// istream.  However, we want C++ to prefer the first version for
+// streams that are *subclasses* of istream, such as istringstream.
+// This is not possible given the way template types are resolved.  So
+// we split the stream argument in two, one of which is templated and
+// one of which is not.  The specialized functions (like the istream
+// version above) ignore the template arg and use the second, 'type'
+// arg, getting subclass matching as normal.  The 'catch-all'
+// functions (the second version above) use the template arg to deduce
+// the type, and use a second, void* arg to achieve the desired
+// 'catch-all' semantics.
+
+    // ----- low-level I/O for FILE* ----
+
+    template<typename Ignored>
+    inline bool read_data_internal(Ignored* /*unused*/, FILE* fp,
+                                   void* data, size_t length)
+    {
+        return fread(data, length, 1, fp) == 1;
+    }
+
+    template<typename Ignored>
+    inline bool write_data_internal(Ignored* /*unused*/, FILE* fp,
+                                    const void* data, size_t length)
+    {
+        return fwrite(data, length, 1, fp) == 1;
+    }
+
+    // ----- low-level I/O for iostream ----
+
+    // We want the caller to be responsible for #including <iostream>, not
+    // us, because iostream is a big header!  According to the standard,
+    // it's only legal to delay the instantiation the way we want to if
+    // the istream/ostream is a template type.  So we jump through hoops.
+    template<typename ISTREAM>
+    inline bool read_data_internal_for_istream(ISTREAM* fp,
+                                               void* data, size_t length)
+    {
+        return fp->read(reinterpret_cast<char*>(data),
+                        static_cast<std::streamsize>(length)).good();
+    }
+    template<typename Ignored>
+    inline bool read_data_internal(Ignored* /*unused*/, std::istream* fp,
+                                   void* data, size_t length)
+    {
+        return read_data_internal_for_istream(fp, data, length);
+    }
+
+    template<typename OSTREAM>
+    inline bool write_data_internal_for_ostream(OSTREAM* fp,
+                                                const void* data, size_t length)
+    {
+        return fp->write(reinterpret_cast<const char*>(data),
+                         static_cast<std::streamsize>(length)).good();
+    }
+    template<typename Ignored>
+    inline bool write_data_internal(Ignored* /*unused*/, std::ostream* fp,
+                                    const void* data, size_t length)
+    {
+        return write_data_internal_for_ostream(fp, data, length);
+    }
+
+    // ----- low-level I/O for custom streams ----
+
+    // The INPUT type needs to support a Read() method that takes a
+    // buffer and a length and returns the number of bytes read.
+    template <typename INPUT>
+    inline bool read_data_internal(INPUT* fp, void* /*unused*/,
+                                   void* data, size_t length)
+    {
+        return static_cast<size_t>(fp->Read(data, length)) == length;
+    }
+
+    // The OUTPUT type needs to support a Write() operation that takes
+    // a buffer and a length and returns the number of bytes written.
+    template <typename OUTPUT>
+    inline bool write_data_internal(OUTPUT* fp, void* /*unused*/,
+                                    const void* data, size_t length)
+    {
+        return static_cast<size_t>(fp->Write(data, length)) == length;
+    }
+
+    // ----- low-level I/O: the public API ----
+
+    template <typename INPUT>
+    inline bool read_data(INPUT* fp, void* data, size_t length)
+    {
+        return read_data_internal(fp, fp, data, length);
+    }
+
+    template <typename OUTPUT>
+    inline bool write_data(OUTPUT* fp, const void* data, size_t length)
+    {
+        return write_data_internal(fp, fp, data, length);
+    }
+
+    // Uses read_data() and write_data() to read/write an integer.
+    // length is the number of bytes to read/write (which may differ
+    // from sizeof(IntType), allowing us to save on a 32-bit system
+    // and load on a 64-bit system).  Excess bytes are taken to be 0.
+    // INPUT and OUTPUT must match legal inputs to read/write_data (above).
+    // --------------------------------------------------------------------
+    template <typename INPUT, typename IntType>
+    bool read_bigendian_number(INPUT* fp, IntType* value, size_t length)
+    {
+        *value = 0;
+        unsigned char byte;
+        // We require IntType to be unsigned or else the shifting gets all screwy.
+        SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0), "serializing_int_requires_an_unsigned_type");
+        for (size_t i = 0; i < length; ++i)
+        {
+            if (!read_data(fp, &byte, sizeof(byte)))
+                return false;
+            *value |= static_cast<IntType>(byte) << ((length - 1 - i) * 8);
+        }
+        return true;
+    }
+
+    template <typename OUTPUT, typename IntType>
+    bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length)
+    {
+        unsigned char byte;
+        // We require IntType to be unsigned or else the shifting gets all screwy.
+        SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0), "serializing_int_requires_an_unsigned_type");
+        for (size_t i = 0; i < length; ++i)
+        {
+            byte = (sizeof(value) <= length-1 - i)
+                ? static_cast<unsigned char>(0) : static_cast<unsigned char>((value >> ((length-1 - i) * 8)) & 255);
+            if (!write_data(fp, &byte, sizeof(byte))) return false;
+        }
+        return true;
+    }
+
+    // If your keys and values are simple enough, you can pass this
+    // serializer to serialize()/unserialize().  "Simple enough" means
+    // value_type is a POD type that contains no pointers.  Note,
+    // however, we don't try to normalize endianness.
+    // This is the type used for NopointerSerializer.
+    // ---------------------------------------------------------------
+    template <typename value_type> struct pod_serializer
+    {
+        template <typename INPUT>
+        bool operator()(INPUT* fp, value_type* value) const
+        {
+            return read_data(fp, value, sizeof(*value));
+        }
+
+        template <typename OUTPUT>
+        bool operator()(OUTPUT* fp, const value_type& value) const
+        {
+            return write_data(fp, &value, sizeof(value));
+        }
+    };
+
+
+    // Settings contains parameters for growing and shrinking the table.
+    // It also packages zero-size functor (ie. hasher).
+    //
+    // It does some munging of the hash value for the cases where
+    // the original hash function is not be very good.
+    // ---------------------------------------------------------------
+    template<typename Key, typename HashFunc, typename SizeType, int HT_MIN_BUCKETS>
+    class sh_hashtable_settings : public HashFunc
+    {
+    private:
+#ifndef SPP_MIX_HASH
+        template <class T, int sz> struct Mixer
+        {
+            inline T operator()(T h) const { return h; }
+        };
+#else
+        template <class T, int sz> struct Mixer
+        {
+            inline T operator()(T h) const;
+        };
+
+         template <class T> struct Mixer<T, 4>
+        {
+            inline T operator()(T h) const
+            {
+                // from Thomas Wang - https://gist.github.com/badboy/6267743
+                // ---------------------------------------------------------
+                h = (h ^ 61) ^ (h >> 16);
+                h = h + (h << 3);
+                h = h ^ (h >> 4);
+                h = h * 0x27d4eb2d;
+                h = h ^ (h >> 15);
+                return h;
+            }
+        };
+
+        template <class T> struct Mixer<T, 8>
+        {
+            inline T operator()(T h) const
+            {
+                // from Thomas Wang - https://gist.github.com/badboy/6267743
+                // ---------------------------------------------------------
+                h = (~h) + (h << 21);              // h = (h << 21) - h - 1;
+                h = h ^ (h >> 24);
+                h = (h + (h << 3)) + (h << 8);     // h * 265
+                h = h ^ (h >> 14);
+                h = (h + (h << 2)) + (h << 4);     // h * 21
+                h = h ^ (h >> 28);
+                h = h + (h << 31);
+                return h;
+            }
+        };
+#endif
+
+    public:
+        typedef Key key_type;
+        typedef HashFunc hasher;
+        typedef SizeType size_type;
+
+    public:
+        sh_hashtable_settings(const hasher& hf,
+                              const float ht_occupancy_flt,
+                              const float ht_empty_flt)
+            : hasher(hf),
+              enlarge_threshold_(0),
+              shrink_threshold_(0),
+              consider_shrink_(false),
+              num_ht_copies_(0)
+        {
+            set_enlarge_factor(ht_occupancy_flt);
+            set_shrink_factor(ht_empty_flt);
+        }
+
+        size_t hash(const key_type& v) const
+        {
+            size_t h = hasher::operator()(v);
+            Mixer<size_t, sizeof(size_t)> mixer;
+
+            return mixer(h);
+        }
+
+        float enlarge_factor() const            { return enlarge_factor_; }
+        void set_enlarge_factor(float f)        { enlarge_factor_ = f;    }
+        float shrink_factor() const             { return shrink_factor_;  }
+        void set_shrink_factor(float f)         { shrink_factor_ = f;     }
+
+        size_type enlarge_threshold() const     { return enlarge_threshold_; }
+        void set_enlarge_threshold(size_type t) { enlarge_threshold_ = t; }
+        size_type shrink_threshold() const      { return shrink_threshold_; }
+        void set_shrink_threshold(size_type t)  { shrink_threshold_ = t; }
+
+        size_type enlarge_size(size_type x) const { return static_cast<size_type>(x * enlarge_factor_); }
+        size_type shrink_size(size_type x) const { return static_cast<size_type>(x * shrink_factor_); }
+
+        bool consider_shrink() const            { return consider_shrink_; }
+        void set_consider_shrink(bool t)        { consider_shrink_ = t; }
+
+        unsigned int num_ht_copies() const      { return num_ht_copies_; }
+        void inc_num_ht_copies()                { ++num_ht_copies_; }
+
+        // Reset the enlarge and shrink thresholds
+        void reset_thresholds(size_type num_buckets)
+        {
+            set_enlarge_threshold(enlarge_size(num_buckets));
+            set_shrink_threshold(shrink_size(num_buckets));
+            // whatever caused us to reset already considered
+            set_consider_shrink(false);
+        }
+
+        // Caller is resposible for calling reset_threshold right after
+        // set_resizing_parameters.
+        // ------------------------------------------------------------
+        void set_resizing_parameters(float shrink, float grow)
+        {
+            assert(shrink >= 0);
+            assert(grow <= 1);
+            if (shrink > grow/2.0f)
+                shrink = grow / 2.0f;     // otherwise we thrash hashtable size
+            set_shrink_factor(shrink);
+            set_enlarge_factor(grow);
+        }
+
+        // This is the smallest size a hashtable can be without being too crowded
+        // If you like, you can give a min #buckets as well as a min #elts
+        // ----------------------------------------------------------------------
+        size_type min_buckets(size_type num_elts, size_type min_buckets_wanted)
+        {
+            float enlarge = enlarge_factor();
+            size_type sz = HT_MIN_BUCKETS;             // min buckets allowed
+            while (sz < min_buckets_wanted ||
+                   num_elts >= static_cast<size_type>(sz * enlarge))
+            {
+                // This just prevents overflowing size_type, since sz can exceed
+                // max_size() here.
+                // -------------------------------------------------------------
+                if (static_cast<size_type>(sz * 2) < sz)
+                    throw_exception(std::length_error("resize overflow"));  // protect against overflow
+                sz *= 2;
+            }
+            return sz;
+        }
+
+    private:
+        size_type enlarge_threshold_;  // table.size() * enlarge_factor
+        size_type shrink_threshold_;   // table.size() * shrink_factor
+        float enlarge_factor_;         // how full before resize
+        float shrink_factor_;          // how empty before resize
+        bool consider_shrink_;         // if we should try to shrink before next insert
+
+        unsigned int num_ht_copies_;   // num_ht_copies is a counter incremented every Copy/Move
+    };
+
+}  // namespace sparsehash_internal
+
+#undef SPP_COMPILE_ASSERT
+
+//  ----------------------------------------------------------------------
+//                    S P A R S E T A B L E
+//  ----------------------------------------------------------------------
+//
+// A sparsetable is a random container that implements a sparse array,
+// that is, an array that uses very little memory to store unassigned
+// indices (in this case, between 1-2 bits per unassigned index).  For
+// instance, if you allocate an array of size 5 and assign a[2] = <big
+// struct>, then a[2] will take up a lot of memory but a[0], a[1],
+// a[3], and a[4] will not.  Array elements that have a value are
+// called "assigned".  Array elements that have no value yet, or have
+// had their value cleared using erase() or clear(), are called
+// "unassigned".
+//
+// Unassigned values seem to have the default value of T (see below).
+// Nevertheless, there is a difference between an unassigned index and
+// one explicitly assigned the value of T().  The latter is considered
+// assigned.
+//
+// Access to an array element is constant time, as is insertion and
+// deletion.  Insertion and deletion may be fairly slow, however:
+// because of this container's memory economy, each insert and delete
+// causes a memory reallocation.
+//
+// NOTE: You should not test(), get(), or set() any index that is
+// greater than sparsetable.size().  If you need to do that, call
+// resize() first.
+//
+// --- Template parameters
+// PARAMETER   DESCRIPTION                           DEFAULT
+// T           The value of the array: the type of   --
+//             object that is stored in the array.
+//
+// Alloc:      Allocator to use to allocate memory.
+//
+// --- Model of
+// Random Access Container
+//
+// --- Type requirements
+// T must be Copy Constructible. It need not be Assignable.
+//
+// --- Public base classes
+// None.
+//
+// --- Members
+//
+// [*] All iterators are const in a sparsetable (though nonempty_iterators
+//     may not be).  Use get() and set() to assign values, not iterators.
+//
+// [+] iterators are random-access iterators.  nonempty_iterators are
+//     bidirectional iterators.
+
+// [*] If you shrink a sparsetable using resize(), assigned elements
+// past the end of the table are removed using erase().  If you grow
+// a sparsetable, new unassigned indices are created.
+//
+// [+] Note that operator[] returns a const reference.  You must use
+// set() to change the value of a table element.
+//
+// [!] Unassignment also calls the destructor.
+//
+// Iterators are invalidated whenever an item is inserted or
+// deleted (ie set() or erase() is used) or when the size of
+// the table changes (ie resize() or clear() is used).
+
+
+
+// ---------------------------------------------------------------------------
+// Our iterator as simple as iterators can be: basically it's just
+// the index into our table.  Dereference, the only complicated
+// thing, we punt to the table class.  This just goes to show how
+// much machinery STL requires to do even the most trivial tasks.
+//
+// A NOTE ON ASSIGNING:
+// A sparse table does not actually allocate memory for entries
+// that are not filled.  Because of this, it becomes complicated
+// to have a non-const iterator: we don't know, if the iterator points
+// to a not-filled bucket, whether you plan to fill it with something
+// or whether you plan to read its value (in which case you'll get
+// the default bucket value).  Therefore, while we can define const
+// operations in a pretty 'normal' way, for non-const operations, we
+// define something that returns a helper object with operator= and
+// operator& that allocate a bucket lazily.  We use this for table[]
+// and also for regular table iterators.
+
+// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// Our iterator as simple as iterators can be: basically it's just
+// the index into our table.  Dereference, the only complicated
+// thing, we punt to the table class.  This just goes to show how
+// much machinery STL requires to do even the most trivial tasks.
+//
+// By templatizing over tabletype, we have one iterator type which
+// we can use for both sparsetables and sparsebins.  In fact it
+// works on any class that allows size() and operator[] (eg vector),
+// as long as it does the standard STL typedefs too (eg value_type).
+
+// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+template <class tabletype>
+class table_iterator
+{
+public:
+    typedef table_iterator iterator;
+
+    typedef std::random_access_iterator_tag      iterator_category;
+    typedef typename tabletype::value_type       value_type;
+    typedef typename tabletype::difference_type  difference_type;
+    typedef typename tabletype::size_type        size_type;
+
+    explicit table_iterator(tabletype *tbl = 0, size_type p = 0) :
+        table(tbl), pos(p)
+    { }
+
+    // Helper function to assert things are ok; eg pos is still in range
+    void check() const
+    {
+        assert(table);
+        assert(pos <= table->size());
+    }
+
+    // Arithmetic: we just do arithmetic on pos.  We don't even need to
+    // do bounds checking, since STL doesn't consider that its job.  :-)
+    iterator& operator+=(size_type t) { pos += t; check(); return *this; }
+    iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
+    iterator& operator++()            { ++pos; check(); return *this; }
+    iterator& operator--()            { --pos; check(); return *this; }
+    iterator operator++(int)
+    {
+        iterator tmp(*this);     // for x++
+        ++pos; check(); return tmp;
+    }
+
+    iterator operator--(int)
+    {
+        iterator tmp(*this);     // for x--
+        --pos; check(); return tmp;
+    }
+
+    iterator operator+(difference_type i) const
+    {
+        iterator tmp(*this);
+        tmp += i; return tmp;
+    }
+
+    iterator operator-(difference_type i) const
+    {
+        iterator tmp(*this);
+        tmp -= i; return tmp;
+    }
+
+    difference_type operator-(iterator it) const
+    {
+        // for "x = it2 - it"
+        assert(table == it.table);
+        return pos - it.pos;
+    }
+
+    // Comparisons.
+    bool operator==(const iterator& it) const
+    {
+        return table == it.table && pos == it.pos;
+    }
+
+    bool operator<(const iterator& it) const
+    {
+        assert(table == it.table);              // life is bad bad bad otherwise
+        return pos < it.pos;
+    }
+
+    bool operator!=(const iterator& it) const { return !(*this == it); }
+    bool operator<=(const iterator& it) const { return !(it < *this); }
+    bool operator>(const iterator& it) const { return it < *this; }
+    bool operator>=(const iterator& it) const { return !(*this < it); }
+
+    // Here's the info we actually need to be an iterator
+    tabletype *table;              // so we can dereference and bounds-check
+    size_type pos;                 // index into the table
+};
+
+// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+template <class tabletype>
+class const_table_iterator
+{
+public:
+    typedef table_iterator<tabletype> iterator;
+    typedef const_table_iterator const_iterator;
+
+    typedef std::random_access_iterator_tag iterator_category;
+    typedef typename tabletype::value_type value_type;
+    typedef typename tabletype::difference_type difference_type;
+    typedef typename tabletype::size_type size_type;
+    typedef typename tabletype::const_reference reference;  // we're const-only
+    typedef typename tabletype::const_pointer pointer;
+
+    // The "real" constructor
+    const_table_iterator(const tabletype *tbl, size_type p)
+        : table(tbl), pos(p) { }
+
+    // The default constructor, used when I define vars of type table::iterator
+    const_table_iterator() : table(NULL), pos(0) { }
+
+    // The copy constructor, for when I say table::iterator foo = tbl.begin()
+    // Also converts normal iterators to const iterators // not explicit on purpose
+    const_table_iterator(const iterator &from)
+        : table(from.table), pos(from.pos) { }
+
+    // The default destructor is fine; we don't define one
+    // The default operator= is fine; we don't define one
+
+    // The main thing our iterator does is dereference.  If the table entry
+    // we point to is empty, we return the default value type.
+    reference operator*() const       { return (*table)[pos]; }
+    pointer operator->() const        { return &(operator*()); }
+
+    // Helper function to assert things are ok; eg pos is still in range
+    void check() const
+    {
+        assert(table);
+        assert(pos <= table->size());
+    }
+
+    // Arithmetic: we just do arithmetic on pos.  We don't even need to
+    // do bounds checking, since STL doesn't consider that its job.  :-)
+    const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
+    const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
+    const_iterator& operator++()            { ++pos; check(); return *this; }
+    const_iterator& operator--()            { --pos; check(); return *this; }
+    const_iterator operator++(int)          
+    {
+        const_iterator tmp(*this); // for x++
+        ++pos; check(); 
+        return tmp; 
+    }
+    const_iterator operator--(int)          
+    {
+        const_iterator tmp(*this); // for x--
+        --pos; check(); 
+        return tmp;
+    }
+    const_iterator operator+(difference_type i) const
+    {
+        const_iterator tmp(*this);
+        tmp += i;
+        return tmp;
+    }
+    const_iterator operator-(difference_type i) const
+    {
+        const_iterator tmp(*this);
+        tmp -= i;
+        return tmp;
+    }
+    difference_type operator-(const_iterator it) const
+    {
+        // for "x = it2 - it"
+        assert(table == it.table);
+        return pos - it.pos;
+    }
+    reference operator[](difference_type n) const
+    {
+        return *(*this + n);            // simple though not totally efficient
+    }
+
+    // Comparisons.
+    bool operator==(const const_iterator& it) const
+    {
+        return table == it.table && pos == it.pos;
+    }
+
+    bool operator<(const const_iterator& it) const
+    {
+        assert(table == it.table);              // life is bad bad bad otherwise
+        return pos < it.pos;
+    }
+    bool operator!=(const const_iterator& it) const { return !(*this == it); }
+    bool operator<=(const const_iterator& it) const { return !(it < *this); }
+    bool operator>(const const_iterator& it) const { return it < *this; }
+    bool operator>=(const const_iterator& it) const { return !(*this < it); }
+
+    // Here's the info we actually need to be an iterator
+    const tabletype *table;        // so we can dereference and bounds-check
+    size_type pos;                 // index into the table
+};
+
+// ---------------------------------------------------------------------------
+// This is a 2-D iterator.  You specify a begin and end over a list
+// of *containers*.  We iterate over each container by iterating over
+// it.  It's actually simple:
+// VECTOR.begin() VECTOR[0].begin()  --------> VECTOR[0].end() ---,
+//     |          ________________________________________________/
+//     |          \_> VECTOR[1].begin()  -------->  VECTOR[1].end() -,
+//     |          ___________________________________________________/
+//     v          \_> ......
+// VECTOR.end()
+//
+// It's impossible to do random access on one of these things in constant
+// time, so it's just a bidirectional iterator.
+//
+// Unfortunately, because we need to use this for a non-empty iterator,
+// we use ne_begin() and ne_end() instead of begin() and end()
+// (though only going across, not down).
+// ---------------------------------------------------------------------------
+
+// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+template <class T, class row_it, class col_it, class iter_type>
+class Two_d_iterator
+{
+public:
+    typedef Two_d_iterator iterator;
+    typedef iter_type      iterator_category;
+    typedef T              value_type;
+    typedef std::ptrdiff_t difference_type;
+    typedef T*             pointer;
+    typedef T&             reference;
+
+    explicit Two_d_iterator(row_it curr) : row_current(curr), col_current(0)
+    {
+        if (row_current && !row_current->is_marked())
+        {
+            col_current = row_current->ne_begin();
+            advance_past_end();                 // in case cur->begin() == cur->end()
+        }
+    }
+
+    explicit Two_d_iterator(row_it curr, col_it col) : row_current(curr), col_current(col)
+    {
+        assert(col);
+    }
+
+    // The default constructor
+    Two_d_iterator() :  row_current(0), col_current(0) { }
+
+    // Need this explicitly so we can convert normal iterators <=> const iterators
+    // not explicit on purpose
+    // ---------------------------------------------------------------------------
+    template <class T2, class row_it2, class col_it2, class iter_type2>
+    Two_d_iterator(const Two_d_iterator<T2, row_it2, col_it2, iter_type2>& it) :
+        row_current (*(row_it *)&it.row_current),
+        col_current (*(col_it *)&it.col_current)
+    { }
+
+    // The default destructor is fine; we don't define one
+    // The default operator= is fine; we don't define one
+
+    value_type& operator*() const  { return *(col_current); }
+    value_type* operator->() const { return &(operator*()); }
+
+    // Arithmetic: we just do arithmetic on pos.  We don't even need to
+    // do bounds checking, since STL doesn't consider that its job.  :-)
+    // NOTE: this is not amortized constant time!  What do we do about it?
+    // ------------------------------------------------------------------
+    void advance_past_end()
+    {
+        // used when col_current points to end()
+        while (col_current == row_current->ne_end())
+        {
+            // end of current row
+            // ------------------
+            ++row_current;                                // go to beginning of next
+            if (!row_current->is_marked())                // col is irrelevant at end
+                col_current = row_current->ne_begin();
+            else
+                break;                                    // don't go past row_end
+        }
+    }
+
+    friend size_t operator-(iterator l, iterator f)
+    {
+        if (f.row_current->is_marked())
+            return 0;
+
+        size_t diff(0);
+        while (f != l)
+        {
+            ++diff;
+            ++f;
+        }
+        return diff;
+    }
+
+    iterator& operator++()
+    {
+        // assert(!row_current->is_marked());               // how to ++ from there?
+        ++col_current;
+        advance_past_end();                              // in case col_current is at end()
+        return *this;
+    }
+
+    iterator& operator--()
+    {
+        while (row_current->is_marked() ||
+               col_current == row_current->ne_begin())
+        {
+            --row_current;
+            col_current = row_current->ne_end();             // this is 1 too far
+        }
+        --col_current;
+        return *this;
+    }
+    iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
+    iterator operator--(int)       { iterator tmp(*this); --*this; return tmp; }
+
+
+    // Comparisons.
+    bool operator==(const iterator& it) const
+    {
+        return (row_current == it.row_current &&
+                (!row_current || row_current->is_marked() || col_current == it.col_current));
+    }
+
+    bool operator!=(const iterator& it) const { return !(*this == it); }
+
+    // Here's the info we actually need to be an iterator
+    // These need to be public so we convert from iterator to const_iterator
+    // ---------------------------------------------------------------------
+    row_it row_current;
+    col_it col_current;
+};
+
+
+// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+template <class T, class row_it, class col_it, class iter_type, class Alloc>
+class Two_d_destructive_iterator : public Two_d_iterator<T, row_it, col_it, iter_type>
+{
+public:
+    typedef Two_d_destructive_iterator iterator;
+
+    Two_d_destructive_iterator(Alloc &alloc, row_it curr) :
+        _alloc(alloc)
+    {
+        this->row_current = curr;
+        this->col_current = 0;
+        if (this->row_current && !this->row_current->is_marked())
+        {
+            this->col_current = this->row_current->ne_begin();
+            advance_past_end();                 // in case cur->begin() == cur->end()
+        }
+    }
+
+    // Arithmetic: we just do arithmetic on pos.  We don't even need to
+    // do bounds checking, since STL doesn't consider that its job.  :-)
+    // NOTE: this is not amortized constant time!  What do we do about it?
+    // ------------------------------------------------------------------
+    void advance_past_end()
+    {
+        // used when col_current points to end()
+        while (this->col_current == this->row_current->ne_end())
+        {
+            this->row_current->clear(_alloc, true);  // This is what differs from non-destructive iterators above
+
+            // end of current row
+            // ------------------
+            ++this->row_current;                          // go to beginning of next
+            if (!this->row_current->is_marked())          // col is irrelevant at end
+                this->col_current = this->row_current->ne_begin();
+            else
+                break;                                    // don't go past row_end
+        }
+    }
+
+    iterator& operator++()
+    {
+        // assert(!this->row_current->is_marked());         // how to ++ from there?
+        ++this->col_current;
+        advance_past_end();                              // in case col_current is at end()
+        return *this;
+    }
+
+private:
+    Two_d_destructive_iterator& operator=(const Two_d_destructive_iterator &o);
+
+    Alloc &_alloc;
+};
+
+
+// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+#if defined(SPP_POPCNT_CHECK)
+static inline bool spp_popcount_check()
+{
+    int cpuInfo[4] = { -1 };
+    spp_cpuid(cpuInfo, 1);
+    if (cpuInfo[2] & (1 << 23))
+        return true;   // means SPP_POPCNT supported
+    return false;
+}
+#endif
+
+#if defined(SPP_POPCNT_CHECK) && defined(SPP_POPCNT)
+
+static inline uint32_t spp_popcount(uint32_t i)
+{
+    static const bool s_ok = spp_popcount_check();
+    return s_ok ? SPP_POPCNT(i) : s_spp_popcount_default(i);
+}
+
+#else
+
+static inline uint32_t spp_popcount(uint32_t i)
+{
+#if defined(SPP_POPCNT)
+    return static_cast<uint32_t>(SPP_POPCNT(i));
+#else
+    return s_spp_popcount_default(i);
+#endif
+}
+
+#endif
+
+#if defined(SPP_POPCNT_CHECK) && defined(SPP_POPCNT64)
+
+static inline uint32_t spp_popcount(uint64_t i)
+{
+    static const bool s_ok = spp_popcount_check();
+    return s_ok ? (uint32_t)SPP_POPCNT64(i) : s_spp_popcount_default(i);
+}
+
+#else
+
+static inline uint32_t spp_popcount(uint64_t i)
+{
+#if defined(SPP_POPCNT64)
+    return static_cast<uint32_t>(SPP_POPCNT64(i));
+#elif 1
+    return s_spp_popcount_default(i);
+#endif
+}
+
+#endif
+
+// ---------------------------------------------------------------------------
+// SPARSE-TABLE
+// ------------
+// The idea is that a table with (logically) t buckets is divided
+// into t/M *groups* of M buckets each.  (M is a constant, typically
+// 32)  Each group is stored sparsely.
+// Thus, inserting into the table causes some array to grow, which is
+// slow but still constant time.  Lookup involves doing a
+// logical-position-to-sparse-position lookup, which is also slow but
+// constant time.  The larger M is, the slower these operations are
+// but the less overhead (slightly).
+//
+// To store the sparse array, we store a bitmap B, where B[i] = 1 iff
+// bucket i is non-empty.  Then to look up bucket i we really look up
+// array[# of 1s before i in B].  This is constant time for fixed M.
+//
+// Terminology: the position of an item in the overall table (from
+// 1 .. t) is called its "location."  The logical position in a group
+// (from 1 .. M) is called its "position."  The actual location in
+// the array (from 1 .. # of non-empty buckets in the group) is
+// called its "offset."
+// ---------------------------------------------------------------------------
+
+template <class T, class Alloc>
+class sparsegroup
+{
+public:
+    // Basic types
+    typedef T                                              value_type;
+    typedef Alloc                                          allocator_type;
+    typedef value_type&                                    reference;
+    typedef const value_type&                              const_reference;
+    typedef value_type*                                    pointer;
+    typedef const value_type*                              const_pointer;
+
+    typedef uint8_t                                        size_type;        // max # of buckets
+
+    // These are our special iterators, that go over non-empty buckets in a
+    // group.  These aren't const-only because you can change non-empty bcks.
+    // ---------------------------------------------------------------------
+    typedef pointer                                        ne_iterator;
+    typedef const_pointer                                  const_ne_iterator;
+    typedef std::reverse_iterator<ne_iterator>             reverse_ne_iterator;
+    typedef std::reverse_iterator<const_ne_iterator>       const_reverse_ne_iterator;
+
+    // We'll have versions for our special non-empty iterator too
+    // ----------------------------------------------------------
+    ne_iterator               ne_begin()         { return reinterpret_cast<pointer>(_group); }
+    const_ne_iterator         ne_begin() const   { return reinterpret_cast<pointer>(_group); }
+    const_ne_iterator         ne_cbegin() const  { return reinterpret_cast<pointer>(_group); }
+    ne_iterator               ne_end()           { return reinterpret_cast<pointer>(_group + _num_items()); }
+    const_ne_iterator         ne_end() const     { return reinterpret_cast<pointer>(_group + _num_items()); }
+    const_ne_iterator         ne_cend() const    { return reinterpret_cast<pointer>(_group + _num_items()); }
+    reverse_ne_iterator       ne_rbegin()        { return reverse_ne_iterator(ne_end()); }
+    const_reverse_ne_iterator ne_rbegin() const  { return const_reverse_ne_iterator(ne_cend());  }
+    const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_cend());  }
+    reverse_ne_iterator       ne_rend()          { return reverse_ne_iterator(ne_begin()); }
+    const_reverse_ne_iterator ne_rend() const    { return const_reverse_ne_iterator(ne_cbegin());  }
+    const_reverse_ne_iterator ne_crend() const   { return const_reverse_ne_iterator(ne_cbegin());  }
+
+private:
+    // T can be std::pair<const K, V>, but sometime we need to cast to a mutable type
+    // ------------------------------------------------------------------------------
+    typedef typename spp_::cvt<T>::type                    mutable_value_type;
+    typedef mutable_value_type &                           mutable_reference;
+    typedef mutable_value_type *                           mutable_pointer;
+    typedef const mutable_value_type *                     const_mutable_pointer;
+
+    bool _bmtest(size_type i) const   { return !!(_bitmap & (static_cast<group_bm_type>(1) << i)); }
+    void _bmset(size_type i)          { _bitmap |= static_cast<group_bm_type>(1) << i; }
+    void _bmclear(size_type i)        { _bitmap &= ~(static_cast<group_bm_type>(1) << i); }
+
+    bool _bme_test(size_type i) const { return !!(_bm_erased & (static_cast<group_bm_type>(1) << i)); }
+    void _bme_set(size_type i)        { _bm_erased |= static_cast<group_bm_type>(1) << i; }
+    void _bme_clear(size_type i)      { _bm_erased &= ~(static_cast<group_bm_type>(1) << i); }
+
+    bool _bmtest_strict(size_type i) const
+    { return !!((_bitmap | _bm_erased) & (static_cast<group_bm_type>(1) << i)); }
+
+
+    static uint32_t _sizing(uint32_t n)
+    {
+#if !defined(SPP_ALLOC_SZ) || (SPP_ALLOC_SZ == 0)
+        // aggressive allocation first, then decreasing as sparsegroups fill up
+        // --------------------------------------------------------------------
+        struct alloc_batch_size
+        {
+            // 32 bit bitmap
+            // ........ .... .... .. .. .. .. .  .  .  .  .  .  .  .
+            //     8     12   16  18 20 22 24 25 26   ...          32
+            // ------------------------------------------------------
+            SPP_CXX14_CONSTEXPR alloc_batch_size()
+                : data()
+            {
+                uint8_t group_sz          = SPP_GROUP_SIZE / 4;
+                uint8_t group_start_alloc = SPP_GROUP_SIZE / 8; //4;
+                uint8_t alloc_sz          = group_start_alloc;
+                for (int i=0; i<4; ++i)
+                {
+                    for (int j=0; j<group_sz; ++j)
+                    {
+                        if (j && j % group_start_alloc == 0)
+                            alloc_sz += group_start_alloc;
+                        data[i * group_sz + j] = alloc_sz;
+                    }
+                    if (group_start_alloc > 2)
+                        group_start_alloc /= 2;
+                    alloc_sz += group_start_alloc;
+                }
+            }
+            uint8_t data[SPP_GROUP_SIZE];
+        };
+
+        static alloc_batch_size s_alloc_batch_sz;
+        return n ? static_cast<uint32_t>(s_alloc_batch_sz.data[n-1]) : 0; // more aggressive alloc at the beginning
+
+#elif (SPP_ALLOC_SZ == 1)
+        // use as little memory as possible - slowest insert/delete in table
+        // -----------------------------------------------------------------
+        return n;
+#else
+        // decent compromise when SPP_ALLOC_SZ == 2
+        // ----------------------------------------
+        static size_type sz_minus_1 = SPP_ALLOC_SZ - 1;
+        return (n + sz_minus_1) & ~sz_minus_1;
+#endif
+    }
+
+    pointer _allocate_group(allocator_type &alloc, uint32_t n /* , bool tight = false */)
+    {
+        // ignore tight since we don't store num_alloc
+        // num_alloc = (uint8_t)(tight ? n : _sizing(n));
+
+        uint32_t num_alloc = (uint8_t)_sizing(n);
+        _set_num_alloc(num_alloc);
+        pointer retval = alloc.allocate(static_cast<size_type>(num_alloc));
+        if (retval == NULL)
+        {
+            // the allocator is supposed to throw an exception if the allocation fails.
+            throw_exception(std::bad_alloc());
+        }
+        return retval;
+    }
+
+    void _free_group(allocator_type &alloc, uint32_t num_alloc)
+    {
+        if (_group)
+        {
+            uint32_t num_buckets = _num_items();
+            if (num_buckets)
+            {
+                mutable_pointer end_it = (mutable_pointer)(_group + num_buckets);
+                for (mutable_pointer p = (mutable_pointer)_group; p != end_it; ++p)
+                    p->~mutable_value_type();
+            }
+            alloc.deallocate(_group, (typename allocator_type::size_type)num_alloc);
+            _group = NULL;
+        }
+    }
+
+    // private because should not be called - no allocator!
+    sparsegroup &operator=(const sparsegroup& x);
+
+    static size_type _pos_to_offset(group_bm_type bm, size_type pos)
+    {
+        //return (size_type)((uint32_t)~((int32_t(-1) + pos) >> 31) & spp_popcount(bm << (SPP_GROUP_SIZE - pos)));
+        //return (size_type)(pos ? spp_popcount(bm << (SPP_GROUP_SIZE - pos)) : 0);
+        return static_cast<size_type>(spp_popcount(bm & ((static_cast<group_bm_type>(1) << pos) - 1)));
+    }
+
+public:
+
+    // get_iter() in sparsetable needs it
+    size_type pos_to_offset(size_type pos) const
+    {
+        return _pos_to_offset(_bitmap, pos);
+    }
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+
+    // Returns the (logical) position in the bm[] array, i, such that
+    // bm[i] is the offset-th set bit in the array.  It is the inverse
+    // of pos_to_offset.  get_pos() uses this function to find the index
+    // of an ne_iterator in the table.  Bit-twiddling from
+    // http://hackersdelight.org/basics.pdf
+    // -----------------------------------------------------------------
+    static size_type offset_to_pos(group_bm_type bm, size_type offset)
+    {
+        for (; offset > 0; offset--)
+            bm &= (bm-1);  // remove right-most set bit
+
+        // Clear all bits to the left of the rightmost bit (the &),
+        // and then clear the rightmost bit but set all bits to the
+        // right of it (the -1).
+        // --------------------------------------------------------
+        bm = (bm & -bm) - 1;
+        return  static_cast<size_type>(spp_popcount(bm));
+    }
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+    size_type offset_to_pos(size_type offset) const
+    {
+        return offset_to_pos(_bitmap, offset);
+    }
+
+public:
+    // Constructors -- default and copy -- and destructor
+    explicit sparsegroup() :
+        _group(0), _bitmap(0), _bm_erased(0)
+    {
+        _set_num_items(0);
+        _set_num_alloc(0);
+    }
+
+    sparsegroup(const sparsegroup& x) :
+        _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
+    {
+        _set_num_items(0);
+        _set_num_alloc(0);
+         assert(_group == 0); 
+    }
+
+    sparsegroup(const sparsegroup& x, allocator_type& a) :
+        _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
+    {
+        _set_num_items(0);
+        _set_num_alloc(0);
+
+        uint32_t num_items = x._num_items();
+        if (num_items)
+        {
+            _group = _allocate_group(a, num_items /* , true */);
+            _set_num_items(num_items);
+            std::uninitialized_copy(x._group, x._group + num_items, _group);
+        }
+    }
+
+    ~sparsegroup() { assert(_group == 0); }
+
+    void destruct(allocator_type& a) { _free_group(a, _num_alloc()); }
+
+    // Many STL algorithms use swap instead of copy constructors
+    void swap(sparsegroup& x)
+    {
+        using std::swap;
+
+        swap(_group, x._group);
+        swap(_bitmap, x._bitmap);
+        swap(_bm_erased, x._bm_erased);
+#ifdef SPP_STORE_NUM_ITEMS
+        swap(_num_buckets,   x._num_buckets);
+        swap(_num_allocated, x._num_allocated);
+#endif
+    }
+
+    // It's always nice to be able to clear a table without deallocating it
+    void clear(allocator_type &alloc, bool erased)
+    {
+        _free_group(alloc, _num_alloc());
+        _bitmap = 0;
+        if (erased)
+            _bm_erased = 0;
+        _set_num_items(0);
+        _set_num_alloc(0);
+    }
+
+    // Functions that tell you about size.  Alas, these aren't so useful
+    // because our table is always fixed size.
+    size_type size() const           { return static_cast<size_type>(SPP_GROUP_SIZE); }
+    size_type max_size() const       { return static_cast<size_type>(SPP_GROUP_SIZE); }
+
+    bool empty() const               { return false; }
+
+    // We also may want to know how many *used* buckets there are
+    size_type num_nonempty() const   { return (size_type)_num_items(); }
+
+    // TODO(csilvers): make protected + friend
+    // This is used by sparse_hashtable to get an element from the table
+    // when we know it exists.
+    reference unsafe_get(size_type i) const
+    {
+        // assert(_bmtest(i));
+        return (reference)_group[pos_to_offset(i)];
+    }
+
+    typedef std::pair<pointer, bool> SetResult;
+
+private:
+    //typedef spp_::integral_constant<bool, spp_::is_relocatable<value_type>::value> check_relocatable;
+    typedef spp_::true_type  realloc_ok_type;
+    typedef spp_::false_type realloc_not_ok_type;
+
+    //typedef spp_::zero_type  libc_reloc_type;
+    //typedef spp_::one_type   spp_reloc_type;
+    //typedef spp_::two_type   spp_not_reloc_type;
+    //typedef spp_::three_type generic_alloc_type;
+
+#if 1
+    typedef typename if_<((spp_::is_same<allocator_type, libc_allocator<value_type> >::value ||
+                           spp_::is_same<allocator_type,  spp_allocator<value_type> >::value) &&
+                          spp_::is_relocatable<value_type>::value), realloc_ok_type, realloc_not_ok_type>::type
+             check_alloc_type;
+#else
+    typedef typename if_<spp_::is_same<allocator_type, spp_allocator<value_type> >::value,
+                         typename if_<spp_::is_relocatable<value_type>::value, spp_reloc_type, spp_not_reloc_type>::type,
+                         typename if_<(spp_::is_same<allocator_type, libc_allocator<value_type> >::value &&
+                                       spp_::is_relocatable<value_type>::value), libc_reloc_type, generic_alloc_type>::type >::type 
+        check_alloc_type;
+#endif
+
+
+    //typedef if_<spp_::is_same<allocator_type, libc_allocator<value_type> >::value,
+    //            libc_alloc_type,
+    //            if_<spp_::is_same<allocator_type, spp_allocator<value_type> >::value,
+    //                spp_alloc_type, user_alloc_type> > check_alloc_type;
+
+    //typedef spp_::integral_constant<bool,
+    //            (spp_::is_relocatable<value_type>::value &&
+    //             (spp_::is_same<allocator_type, spp_allocator<value_type> >::value ||
+    //              spp_::is_same<allocator_type, libc_allocator<value_type> >::value)) >
+    //        realloc_and_memmove_ok;
+
+    // ------------------------- memory at *p is uninitialized => need to construct
+    void _init_val(mutable_value_type *p, reference val)
+    {
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+        ::new (p) value_type(std::move((mutable_reference)val));
+#else
+        ::new (p) value_type((mutable_reference)val);
+#endif
+    }
+
+    // ------------------------- memory at *p is uninitialized => need to construct
+    void _init_val(mutable_value_type *p, const_reference val)
+    {
+        ::new (p) value_type(val);
+    }
+
+    // ------------------------------------------------ memory at *p is initialized
+    void _set_val(value_type *p, reference val)
+    {
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+        *(mutable_pointer)p = std::move((mutable_reference)val);
+#else
+        using std::swap;
+        swap(*(mutable_pointer)p, *(mutable_pointer)&val);
+#endif
+    }
+
+    // ------------------------------------------------ memory at *p is initialized
+    void _set_val(value_type *p, const_reference val)
+    {
+        *(mutable_pointer)p = *(const_mutable_pointer)&val;
+    }
+
+    // Create space at _group[offset], assuming value_type is relocatable, and the 
+    // allocator_type is the spp allocator.
+    // return true if the slot was constructed (i.e. contains a valid value_type
+    // ---------------------------------------------------------------------------------
+    template <class Val>
+    void _set_aux(allocator_type &alloc, size_type offset, Val &val, realloc_ok_type)
+    {
+        //static int x=0;  if (++x < 10) printf("x\n"); // check we are getting here
+
+        uint32_t  num_items = _num_items();
+        uint32_t  num_alloc = _sizing(num_items);
+
+        if (num_items == num_alloc)
+        {
+            num_alloc = _sizing(num_items + 1);
+            _group = alloc.reallocate(_group, num_alloc);
+            _set_num_alloc(num_alloc);
+        }
+
+        for (uint32_t i = num_items; i > offset; --i)
+            memcpy(static_cast<void *>(_group + i), _group + i-1, sizeof(*_group));
+
+        _init_val((mutable_pointer)(_group + offset), val);
+    }
+
+    // Create space at _group[offset], assuming value_type is *not* relocatable, and the 
+    // allocator_type is the spp allocator.
+    // return true if the slot was constructed (i.e. contains a valid value_type
+    // ---------------------------------------------------------------------------------
+    template <class Val>
+    void _set_aux(allocator_type &alloc, size_type offset, Val &val, realloc_not_ok_type)
+    {
+        uint32_t  num_items = _num_items();
+        uint32_t  num_alloc = _sizing(num_items);
+
+        //assert(num_alloc == (uint32_t)_num_allocated);
+        if (num_items < num_alloc)
+        {
+            // create new object at end and rotate it to position
+            _init_val((mutable_pointer)&_group[num_items], val);
+            std::rotate((mutable_pointer)(_group + offset),
+                        (mutable_pointer)(_group + num_items),
+                        (mutable_pointer)(_group + num_items + 1));
+            return;
+        }
+
+        // This is valid because 0 <= offset <= num_items
+        pointer p = _allocate_group(alloc, _sizing(num_items + 1));
+        if (offset)
+            std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)_group),
+                                    MK_MOVE_IT((mutable_pointer)(_group + offset)),
+                                    (mutable_pointer)p);
+        if (num_items > offset)
+            std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset)),
+                                    MK_MOVE_IT((mutable_pointer)(_group + num_items)),
+                                    (mutable_pointer)(p + offset + 1));
+        _init_val((mutable_pointer)(p + offset), val);
+        _free_group(alloc, num_alloc);
+        _group = p;
+    }
+
+    // ----------------------------------------------------------------------------------
+    template <class Val>
+    void _set(allocator_type &alloc, size_type i, size_type offset, Val &val)
+    {
+        if (!_bmtest(i))
+        {
+            _set_aux(alloc, offset, val, check_alloc_type());
+            _incr_num_items();
+            _bmset(i);
+        }
+        else
+            _set_val(&_group[offset], val);
+    }
+
+public:
+
+    // This returns the pointer to the inserted item
+    // ---------------------------------------------
+    template <class Val>
+    pointer set(allocator_type &alloc, size_type i, Val &val)
+    {
+        _bme_clear(i); // in case this was an "erased" location
+
+        size_type offset = pos_to_offset(i);
+        _set(alloc, i, offset, val);            // may change _group pointer
+        return (pointer)(_group + offset);
+    }
+
+    // We let you see if a bucket is non-empty without retrieving it
+    // -------------------------------------------------------------
+    bool test(size_type i) const
+    {
+        return _bmtest(i);
+    }
+
+    // also tests for erased values
+    // ----------------------------
+    bool test_strict(size_type i) const
+    {
+        return _bmtest_strict(i);
+    }
+
+private:
+    // Shrink the array, assuming value_type is relocatable, and the 
+    // allocator_type is the libc allocator (supporting reallocate).
+    // -------------------------------------------------------------
+    void _group_erase_aux(allocator_type &alloc, size_type offset, realloc_ok_type)
+    {
+        // static int x=0;  if (++x < 10) printf("Y\n"); // check we are getting here
+        uint32_t  num_items = _num_items();
+        uint32_t  num_alloc = _sizing(num_items);
+
+        if (num_items == 1)
+        {
+            assert(offset == 0);
+            _free_group(alloc, num_alloc);
+            _set_num_alloc(0);
+            return;
+        }
+
+        _group[offset].~value_type();
+
+        for (size_type i = offset; i < num_items - 1; ++i)
+            memcpy(static_cast<void *>(_group + i), _group + i + 1, sizeof(*_group));
+
+        if (_sizing(num_items - 1) != num_alloc)
+        {
+            num_alloc = _sizing(num_items - 1);
+            assert(num_alloc);            // because we have at least 1 item left
+            _set_num_alloc(num_alloc);
+            _group = alloc.reallocate(_group, num_alloc);
+        }
+    }
+
+    // Shrink the array, without any special assumptions about value_type and
+    // allocator_type.
+    // --------------------------------------------------------------------------
+    void _group_erase_aux(allocator_type &alloc, size_type offset, realloc_not_ok_type)
+    {
+        uint32_t  num_items = _num_items();
+        uint32_t  num_alloc   = _sizing(num_items);
+
+        if (_sizing(num_items - 1) != num_alloc)
+        {
+            pointer p = 0;
+            if (num_items > 1)
+            {
+                p = _allocate_group(alloc, num_items - 1);
+                if (offset)
+                    std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group)),
+                                            MK_MOVE_IT((mutable_pointer)(_group + offset)),
+                                            (mutable_pointer)(p));
+                if (static_cast<uint32_t>(offset + 1) < num_items)
+                    std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset + 1)),
+                                            MK_MOVE_IT((mutable_pointer)(_group + num_items)),
+                                            (mutable_pointer)(p + offset));
+            }
+            else
+            {
+                assert(offset == 0);
+                _set_num_alloc(0);
+            }
+            _free_group(alloc, num_alloc);
+            _group = p;
+        }
+        else
+        {
+            std::rotate((mutable_pointer)(_group + offset),
+                        (mutable_pointer)(_group + offset + 1),
+                        (mutable_pointer)(_group + num_items));
+            ((mutable_pointer)(_group + num_items - 1))->~mutable_value_type();
+        }
+    }
+
+    void _group_erase(allocator_type &alloc, size_type offset)
+    {
+        _group_erase_aux(alloc, offset, check_alloc_type());
+    }
+
+public:
+    template <class twod_iter>
+    bool erase_ne(allocator_type &alloc, twod_iter &it)
+    {
+        assert(_group && it.col_current != ne_end());
+        size_type offset = (size_type)(it.col_current - ne_begin());
+        size_type pos    = offset_to_pos(offset);
+
+        if (_num_items() <= 1)
+        {
+            clear(alloc, false);
+            it.col_current = 0;
+        }
+        else
+        {
+            _group_erase(alloc, offset);
+            _decr_num_items();
+            _bmclear(pos);
+
+            // in case _group_erase reallocated the buffer
+            it.col_current = reinterpret_cast<pointer>(_group) + offset;
+        }
+        _bme_set(pos);  // remember that this position has been erased
+        it.advance_past_end();
+        return true;
+    }
+
+
+    // This takes the specified elements out of the group.  This is
+    // "undefining", rather than "clearing".
+    // TODO(austern): Make this exception safe: handle exceptions from
+    // value_type's copy constructor.
+    // ---------------------------------------------------------------
+    void erase(allocator_type &alloc, size_type i)
+    {
+        if (_bmtest(i))
+        {
+            // trivial to erase empty bucket
+            if (_num_items() == 1)
+                clear(alloc, false);
+            else
+            {
+                _group_erase(alloc, pos_to_offset(i));
+                _decr_num_items();
+                _bmclear(i);
+            }
+            _bme_set(i); // remember that this position has been erased
+        }
+    }
+
+    // I/O
+    // We support reading and writing groups to disk.  We don't store
+    // the actual array contents (which we don't know how to store),
+    // just the bitmap and size.  Meant to be used with table I/O.
+    // --------------------------------------------------------------
+    template <typename OUTPUT> bool write_metadata(OUTPUT *fp) const
+    {
+        // warning: we write 4 or 8 bytes for the bitmap, instead of 6 in the
+        //          original google sparsehash
+        // ------------------------------------------------------------------
+        if (!sparsehash_internal::write_data(fp, &_bitmap, sizeof(_bitmap)))
+            return false;
+
+        return true;
+    }
+
+    // Reading destroys the old group contents!  Returns true if all was ok.
+    template <typename INPUT> bool read_metadata(allocator_type &alloc, INPUT *fp)
+    {
+        clear(alloc, true);
+
+        if (!sparsehash_internal::read_data(fp, &_bitmap, sizeof(_bitmap)))
+            return false;
+
+        // We'll allocate the space, but we won't fill it: it will be
+        // left as uninitialized raw memory.
+        uint32_t num_items = spp_popcount(_bitmap); // yes, _num_buckets not set
+        _set_num_items(num_items);
+        _group = num_items ? _allocate_group(alloc, num_items/* , true */) : 0;
+        return true;
+    }
+
+    // Again, only meaningful if value_type is a POD.
+    template <typename INPUT> bool read_nopointer_data(INPUT *fp)
+    {
+        for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
+            if (!sparsehash_internal::read_data(fp, &(*it), sizeof(*it)))
+                return false;
+        return true;
+    }
+
+    // If your keys and values are simple enough, we can write them
+    // to disk for you.  "simple enough" means POD and no pointers.
+    // However, we don't try to normalize endianness.
+    // ------------------------------------------------------------
+    template <typename OUTPUT> bool write_nopointer_data(OUTPUT *fp) const
+    {
+        for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
+            if (!sparsehash_internal::write_data(fp, &(*it), sizeof(*it)))
+                return false;
+        return true;
+    }
+
+
+    // Comparisons.  We only need to define == and < -- we get
+    // != > <= >= via relops.h (which we happily included above).
+    // Note the comparisons are pretty arbitrary: we compare
+    // values of the first index that isn't equal (using default
+    // value for empty buckets).
+    // ---------------------------------------------------------
+    bool operator==(const sparsegroup& x) const
+    {
+        return (_bitmap == x._bitmap &&
+                _bm_erased == x._bm_erased &&
+                std::equal(_group, _group + _num_items(), x._group));
+    }
+
+    bool operator<(const sparsegroup& x) const
+    {
+        // also from <algorithm>
+        return std::lexicographical_compare(_group, _group + _num_items(),
+                                            x._group, x._group + x._num_items());
+    }
+
+    bool operator!=(const sparsegroup& x) const { return !(*this == x); }
+    bool operator<=(const sparsegroup& x) const { return !(x < *this); }
+    bool operator> (const sparsegroup& x) const { return x < *this; }
+    bool operator>=(const sparsegroup& x) const { return !(*this < x); }
+
+    void mark()            { _group = (value_type *)static_cast<uintptr_t>(-1); }
+    bool is_marked() const { return _group == (value_type *)static_cast<uintptr_t>(-1); }
+
+private:
+    // ---------------------------------------------------------------------------
+    template <class A>
+    class alloc_impl : public A
+    {
+    public:
+        typedef typename A::pointer pointer;
+        typedef typename A::size_type size_type;
+
+        // Convert a normal allocator to one that has realloc_or_die()
+        explicit alloc_impl(const A& a) : A(a) { }
+
+        // realloc_or_die should only be used when using the default
+        // allocator (spp::spp_allocator).
+        pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/)
+        {
+            throw_exception(std::runtime_error("realloc_or_die is only supported for spp::spp_allocator\n"));
+            return NULL;
+        }
+    };
+
+    // A template specialization of alloc_impl for
+    // spp::libc_allocator that can handle realloc_or_die.
+    // -----------------------------------------------------------
+    template <class A>
+    class alloc_impl<spp_::libc_allocator<A> > : public spp_::libc_allocator<A>
+    {
+    public:
+        typedef typename spp_::libc_allocator<A>::pointer pointer;
+        typedef typename spp_::libc_allocator<A>::size_type size_type;
+
+        explicit alloc_impl(const spp_::libc_allocator<A>& a)
+            : spp_::libc_allocator<A>(a)
+        { }
+
+        pointer realloc_or_die(pointer ptr, size_type n)
+        {
+            pointer retval = this->reallocate(ptr, n);
+            if (retval == NULL) 
+            {
+                // the allocator is supposed to throw an exception if the allocation fails.
+                throw_exception(std::bad_alloc());
+            }
+            return retval;
+        }
+    };
+
+    // A template specialization of alloc_impl for
+    // spp::spp_allocator that can handle realloc_or_die.
+    // -----------------------------------------------------------
+    template <class A>
+    class alloc_impl<spp_::spp_allocator<A> > : public spp_::spp_allocator<A>
+    {
+    public:
+        typedef typename spp_::spp_allocator<A>::pointer pointer;
+        typedef typename spp_::spp_allocator<A>::size_type size_type;
+
+        explicit alloc_impl(const spp_::spp_allocator<A>& a)
+            : spp_::spp_allocator<A>(a)
+        { }
+
+        pointer realloc_or_die(pointer ptr, size_type n)
+        {
+            pointer retval = this->reallocate(ptr, n);
+            if (retval == NULL) 
+            {
+                // the allocator is supposed to throw an exception if the allocation fails.
+                throw_exception(std::bad_alloc());
+            }
+            return retval;
+        }
+    };
+
+
+#ifdef SPP_STORE_NUM_ITEMS
+    uint32_t _num_items() const           { return (uint32_t)_num_buckets; }
+    void     _set_num_items(uint32_t val) { _num_buckets = static_cast<size_type>(val); }
+    void     _incr_num_items()            { ++_num_buckets; }
+    void     _decr_num_items()            { --_num_buckets; }
+    uint32_t _num_alloc() const           { return (uint32_t)_num_allocated; }
+    void     _set_num_alloc(uint32_t val) { _num_allocated = static_cast<size_type>(val); }
+#else
+    uint32_t _num_items() const           { return spp_popcount(_bitmap); }
+    void     _set_num_items(uint32_t )    { }
+    void     _incr_num_items()            { }
+    void     _decr_num_items()            { }
+    uint32_t _num_alloc() const           { return _sizing(_num_items()); }
+    void     _set_num_alloc(uint32_t val) { }
+#endif
+
+    // The actual data
+    // ---------------
+    value_type *         _group;                             // (small) array of T's
+    group_bm_type        _bitmap;
+    group_bm_type        _bm_erased;                         // ones where items have been erased
+
+#ifdef SPP_STORE_NUM_ITEMS
+    size_type            _num_buckets;
+    size_type            _num_allocated;
+#endif
+};
+
+// ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+template <class T, class Alloc>
+class sparsetable
+{
+public:
+    typedef T                                             value_type;
+    typedef Alloc                                         allocator_type;
+    typedef sparsegroup<value_type, allocator_type>       group_type;
+
+private:
+    typedef typename Alloc::template rebind<group_type>::other group_alloc_type;
+    typedef typename group_alloc_type::size_type          group_size_type;
+
+public:
+    // Basic types
+    // -----------
+    typedef typename allocator_type::size_type            size_type;
+    typedef typename allocator_type::difference_type      difference_type;
+    typedef value_type&                                   reference;
+    typedef const value_type&                             const_reference;
+    typedef value_type*                                   pointer;
+    typedef const value_type*                             const_pointer;
+
+    typedef group_type&                                   GroupsReference;
+    typedef const group_type&                             GroupsConstReference;
+
+    typedef typename group_type::ne_iterator              ColIterator;
+    typedef typename group_type::const_ne_iterator        ColConstIterator;
+
+    typedef table_iterator<sparsetable<T, allocator_type> >        iterator;       // defined with index
+    typedef const_table_iterator<sparsetable<T, allocator_type> >  const_iterator; // defined with index
+    typedef std::reverse_iterator<const_iterator>         const_reverse_iterator;
+    typedef std::reverse_iterator<iterator>               reverse_iterator;
+
+    // These are our special iterators, that go over non-empty buckets in a
+    // table.  These aren't const only because you can change non-empty bcks.
+    // ----------------------------------------------------------------------
+    typedef Two_d_iterator<T,
+                           group_type *,
+                           ColIterator,
+                           std::bidirectional_iterator_tag> ne_iterator;
+
+    typedef Two_d_iterator<const T,
+                           const group_type *,
+                           ColConstIterator,
+                           std::bidirectional_iterator_tag> const_ne_iterator;
+
+    // Another special iterator: it frees memory as it iterates (used to resize).
+    // Obviously, you can only iterate over it once, which is why it's an input iterator
+    // ---------------------------------------------------------------------------------
+    typedef Two_d_destructive_iterator<T,
+                                       group_type *,
+                                       ColIterator,
+                                       std::input_iterator_tag,
+                                       allocator_type>     destructive_iterator;
+
+    typedef std::reverse_iterator<ne_iterator>               reverse_ne_iterator;
+    typedef std::reverse_iterator<const_ne_iterator>         const_reverse_ne_iterator;
+
+
+    // Iterator functions
+    // ------------------
+    iterator               begin()         { return iterator(this, 0); }
+    const_iterator         begin() const   { return const_iterator(this, 0); }
+    const_iterator         cbegin() const  { return const_iterator(this, 0); }
+    iterator               end()           { return iterator(this, size()); }
+    const_iterator         end() const     { return const_iterator(this, size()); }
+    const_iterator         cend() const    { return const_iterator(this, size()); }
+    reverse_iterator       rbegin()        { return reverse_iterator(end()); }
+    const_reverse_iterator rbegin() const  { return const_reverse_iterator(cend()); }
+    const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
+    reverse_iterator       rend()          { return reverse_iterator(begin()); }
+    const_reverse_iterator rend() const    { return const_reverse_iterator(cbegin()); }
+    const_reverse_iterator crend() const   { return const_reverse_iterator(cbegin()); }
+
+    // Versions for our special non-empty iterator
+    // ------------------------------------------
+    ne_iterator       ne_begin()           { return ne_iterator      (_first_group); }
+    const_ne_iterator ne_begin() const     { return const_ne_iterator(_first_group); }
+    const_ne_iterator ne_cbegin() const    { return const_ne_iterator(_first_group); }
+    ne_iterator       ne_end()             { return ne_iterator      (_last_group); }
+    const_ne_iterator ne_end() const       { return const_ne_iterator(_last_group); }
+    const_ne_iterator ne_cend() const      { return const_ne_iterator(_last_group); }
+
+    reverse_ne_iterator       ne_rbegin()        { return reverse_ne_iterator(ne_end()); }
+    const_reverse_ne_iterator ne_rbegin() const  { return const_reverse_ne_iterator(ne_end());  }
+    const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_end());  }
+    reverse_ne_iterator       ne_rend()          { return reverse_ne_iterator(ne_begin()); }
+    const_reverse_ne_iterator ne_rend() const    { return const_reverse_ne_iterator(ne_begin()); }
+    const_reverse_ne_iterator ne_crend() const   { return const_reverse_ne_iterator(ne_begin()); }
+
+    destructive_iterator destructive_begin()
+    {
+        return destructive_iterator(_alloc, _first_group);
+    }
+
+    destructive_iterator destructive_end()
+    {
+        return destructive_iterator(_alloc, _last_group);
+    }
+
+    // How to deal with the proper group
+    static group_size_type num_groups(size_type num)
+    {
+        // how many to hold num buckets
+        return num == 0 ? (group_size_type)0 :
+            (group_size_type)(((num-1) / SPP_GROUP_SIZE) + 1);
+    }
+
+    typename group_type::size_type pos_in_group(size_type i) const
+    {
+        return static_cast<typename group_type::size_type>(i & SPP_MASK_);
+    }
+
+    size_type group_num(size_type i) const
+    {
+        return (size_type)(i >> SPP_SHIFT_);
+    }
+
+    GroupsReference which_group(size_type i)
+    {
+        return _first_group[group_num(i)];
+    }
+
+    GroupsConstReference which_group(size_type i) const
+    {
+        return _first_group[group_num(i)];
+    }
+
+    void _alloc_group_array(group_size_type sz, group_type *&first, group_type *&last)
+    {
+        if (sz)
+        {
+            first = _group_alloc.allocate((size_type)(sz + 1)); // + 1 for end marker
+            first[sz].mark();                      // for the ne_iterator
+            last = first + sz;
+        }
+    }
+
+    void _free_group_array(group_type *&first, group_type *&last)
+    {
+        if (first)
+        {
+            _group_alloc.deallocate(first, (group_size_type)(last - first + 1)); // + 1 for end marker
+            first = last = 0;
+        }
+    }
+
+    void _allocate_groups(size_type sz)
+    {
+        if (sz)
+        {
+            _alloc_group_array(sz, _first_group, _last_group);
+            std::uninitialized_fill(_first_group, _last_group, group_type());
+        }
+    }
+
+    void _free_groups()
+    {
+        if (_first_group)
+        {
+            for (group_type *g = _first_group; g != _last_group; ++g)
+                g->destruct(_alloc);
+            _free_group_array(_first_group, _last_group);
+        }
+    }
+
+    void _cleanup()
+    {
+        _free_groups();    // sets _first_group = _last_group = 0
+        _table_size  = 0;
+        _num_buckets = 0;
+    }
+
+    void _init()
+    {
+        _first_group = 0;
+        _last_group  = 0;
+        _table_size  = 0;
+        _num_buckets = 0;
+    }
+
+    void _copy(const sparsetable &o)
+    {
+        _table_size = o._table_size;
+        _num_buckets = o._num_buckets;
+        _alloc = o._alloc;                // todo - copy or move allocator according to...
+        _group_alloc = o._group_alloc;    // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
+
+        group_size_type sz = (group_size_type)(o._last_group - o._first_group);
+        if (sz)
+        {
+            _alloc_group_array(sz, _first_group, _last_group);
+            for (group_size_type i=0; i<sz; ++i)
+                new (_first_group + i) group_type(o._first_group[i], _alloc);
+        }
+    }
+
+public:
+    // Constructors -- default, normal (when you specify size), and copy
+    explicit sparsetable(size_type sz = 0, const allocator_type &alloc = allocator_type()) :
+        _first_group(0),
+        _last_group(0),
+        _table_size(sz),
+        _num_buckets(0),
+        _group_alloc(alloc),
+        _alloc(alloc)
+                       // todo - copy or move allocator according to
+                       // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
+    {
+        _allocate_groups(num_groups(sz));
+    }
+
+    ~sparsetable()
+    {
+        _free_groups();
+    }
+
+    sparsetable(const sparsetable &o)
+    {
+        _init();
+        _copy(o);
+    }
+
+    sparsetable& operator=(const sparsetable &o)
+    {
+        _cleanup();
+        _copy(o);
+        return *this;
+    }
+
+
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+    sparsetable(sparsetable&& o)
+    {
+        _init();
+        this->swap(o);
+    }
+
+    sparsetable(sparsetable&& o, const allocator_type &alloc)
+    {
+        _init();
+        this->swap(o);
+        _alloc = alloc; // [gp todo] is this correct?
+    }
+
+    sparsetable& operator=(sparsetable&& o)
+    {
+        _cleanup();
+        this->swap(o);
+        return *this;
+    }
+#endif
+
+    // Many STL algorithms use swap instead of copy constructors
+    void swap(sparsetable& o)
+    {
+        using std::swap;
+
+        swap(_first_group, o._first_group);
+        swap(_last_group,  o._last_group);
+        swap(_table_size,  o._table_size);
+        swap(_num_buckets, o._num_buckets);
+        if (_alloc != o._alloc)
+            swap(_alloc, o._alloc);
+        if (_group_alloc != o._group_alloc)
+            swap(_group_alloc, o._group_alloc);
+    }
+
+    // It's always nice to be able to clear a table without deallocating it
+    void clear()
+    {
+        _free_groups();
+        _num_buckets = 0;
+        _table_size = 0;
+    }
+
+    inline allocator_type get_allocator() const
+    {
+        return _alloc;
+    }
+
+
+    // Functions that tell you about size.
+    // NOTE: empty() is non-intuitive!  It does not tell you the number
+    // of not-empty buckets (use num_nonempty() for that).  Instead
+    // it says whether you've allocated any buckets or not.
+    // ----------------------------------------------------------------
+    size_type size() const           { return _table_size; }
+    size_type max_size() const       { return _alloc.max_size(); }
+    bool empty() const               { return _table_size == 0; }
+    size_type num_nonempty() const   { return _num_buckets; }
+
+    // OK, we'll let you resize one of these puppies
+    void resize(size_type new_size)
+    {
+        group_size_type sz = num_groups(new_size);
+        group_size_type old_sz = (group_size_type)(_last_group - _first_group);
+
+        if (sz != old_sz)
+        {
+            // resize group array
+            // ------------------
+            group_type *first = 0, *last = 0;
+            if (sz)
+            {
+                _alloc_group_array(sz, first, last);
+                if (old_sz)
+                    memcpy(static_cast<void *>(first), _first_group, sizeof(*first) * (std::min)(sz, old_sz));
+            }
+
+            if (sz < old_sz)
+            {
+                for (group_type *g = _first_group + sz; g != _last_group; ++g)
+                    g->destruct(_alloc);
+            }
+            else
+                std::uninitialized_fill(first + old_sz, last, group_type());
+
+            _free_group_array(_first_group, _last_group);
+            _first_group = first;
+            _last_group  = last;
+        }
+#if 0
+        // used only in test program
+        // todo: fix if sparsetable to be used directly
+        // --------------------------------------------
+        if (new_size < _table_size)
+        {
+            // lower num_buckets, clear last group
+            if (pos_in_group(new_size) > 0)     // need to clear inside last group
+                groups.back().erase(_alloc, groups.back().begin() + pos_in_group(new_size),
+                                    groups.back().end());
+            _num_buckets = 0;                   // refigure # of used buckets
+            for (const group_type *group = _first_group; group != _last_group; ++group)
+                _num_buckets += group->num_nonempty();
+        }
+#endif
+        _table_size = new_size;
+    }
+
+    // We let you see if a bucket is non-empty without retrieving it
+    // -------------------------------------------------------------
+    bool test(size_type i) const
+    {
+        // assert(i < _table_size);
+        return which_group(i).test(pos_in_group(i));
+    }
+
+    // also tests for erased values
+    // ----------------------------
+    bool test_strict(size_type i) const
+    {
+        // assert(i < _table_size);
+        return which_group(i).test_strict(pos_in_group(i));
+    }
+
+    friend struct GrpPos;
+
+    struct GrpPos
+    {
+        typedef typename sparsetable::ne_iterator ne_iter;
+        GrpPos(const sparsetable &table, size_type i) :
+            grp(table.which_group(i)), pos(table.pos_in_group(i)) {}
+
+        bool test_strict() const { return grp.test_strict(pos); }
+        bool test() const        { return grp.test(pos); }
+        typename sparsetable::reference unsafe_get() const { return  grp.unsafe_get(pos); }
+        ne_iter get_iter(typename sparsetable::reference ref)
+        {
+            return ne_iter((group_type *)&grp, &ref);
+        }
+
+        void erase(sparsetable &table) // item *must* be present
+        {
+            assert(table._num_buckets);
+            ((group_type &)grp).erase(table._alloc, pos);
+            --table._num_buckets;
+        }
+
+    private:
+        GrpPos* operator=(const GrpPos&);
+
+        const group_type &grp;
+        typename group_type::size_type pos;
+    };
+
+    bool test(iterator pos) const
+    {
+        return which_group(pos.pos).test(pos_in_group(pos.pos));
+    }
+
+    bool test(const_iterator pos) const
+    {
+        return which_group(pos.pos).test(pos_in_group(pos.pos));
+    }
+
+    // TODO(csilvers): make protected + friend
+    // This is used by sparse_hashtable to get an element from the table
+    // when we know it exists (because the caller has called test(i)).
+    // -----------------------------------------------------------------
+    reference unsafe_get(size_type i) const
+    {
+        assert(i < _table_size);
+        // assert(test(i));
+        return which_group(i).unsafe_get(pos_in_group(i));
+    }
+
+    // Needed for hashtables, gets as a ne_iterator.  Crashes for empty bcks
+    const_ne_iterator get_iter(size_type i) const
+    {
+        //assert(test(i));    // how can a ne_iterator point to an empty bucket?
+
+        size_type grp_idx = group_num(i);
+
+        return const_ne_iterator(_first_group + grp_idx,
+                                 (_first_group[grp_idx].ne_begin() +
+                                  _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
+    }
+
+    const_ne_iterator get_iter(size_type i, ColIterator col_it) const
+    {
+        return const_ne_iterator(_first_group + group_num(i), col_it);
+    }
+
+    // For nonempty we can return a non-const version
+    ne_iterator get_iter(size_type i)
+    {
+        //assert(test(i));    // how can a nonempty_iterator point to an empty bucket?
+
+        size_type grp_idx = group_num(i);
+
+        return ne_iterator(_first_group + grp_idx,
+                           (_first_group[grp_idx].ne_begin() +
+                            _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
+    }
+
+    ne_iterator get_iter(size_type i, ColIterator col_it)
+    {
+        return ne_iterator(_first_group + group_num(i), col_it);
+    }
+
+    // And the reverse transformation.
+    size_type get_pos(const const_ne_iterator& it) const
+    {
+        difference_type current_row = it.row_current - _first_group;
+        difference_type current_col = (it.col_current - _first_group[current_row].ne_begin());
+        return ((current_row * SPP_GROUP_SIZE) +
+                _first_group[current_row].offset_to_pos(current_col));
+    }
+
+    // Val can be reference or const_reference
+    // ---------------------------------------
+    template <class Val>
+    reference set(size_type i, Val &val)
+    {
+        assert(i < _table_size);
+        group_type &group = which_group(i);
+        typename group_type::size_type old_numbuckets = group.num_nonempty();
+        pointer p(group.set(_alloc, pos_in_group(i), val));
+        _num_buckets += group.num_nonempty() - old_numbuckets;
+        return *p;
+    }
+
+    // used in _move_from (where we can move the old value instead of copying it
+    void move(size_type i, reference val)
+    {
+        assert(i < _table_size);
+        which_group(i).set(_alloc, pos_in_group(i), val);
+        ++_num_buckets;
+    }
+
+    // This takes the specified elements out of the table.
+    // --------------------------------------------------
+    void erase(size_type i)
+    {
+        assert(i < _table_size);
+
+        GroupsReference grp(which_group(i));
+        typename group_type::size_type old_numbuckets = grp.num_nonempty();
+        grp.erase(_alloc, pos_in_group(i));
+        _num_buckets += grp.num_nonempty() - old_numbuckets;
+    }
+
+    void erase(iterator pos)
+    {
+        erase(pos.pos);
+    }
+
+    void erase(iterator start_it, iterator end_it)
+    {
+        // This could be more efficient, but then we'd need to figure
+        // out if we spanned groups or not.  Doesn't seem worth it.
+        for (; start_it != end_it; ++start_it)
+            erase(start_it);
+    }
+
+    const_ne_iterator erase(const_ne_iterator it)
+    {
+        ne_iterator res(it);
+        if (res.row_current->erase_ne(_alloc, res))
+            _num_buckets--;
+        return res;
+    }
+
+    const_ne_iterator erase(const_ne_iterator f, const_ne_iterator l)
+    {
+        size_t diff = l - f;
+        while (diff--)
+            f = erase(f);
+        return f;
+    }
+
+    // We support reading and writing tables to disk.  We don't store
+    // the actual array contents (which we don't know how to store),
+    // just the groups and sizes.  Returns true if all went ok.
+
+private:
+    // Every time the disk format changes, this should probably change too
+    typedef unsigned long MagicNumberType;
+    static const MagicNumberType MAGIC_NUMBER = 0x24687531;
+
+    // Old versions of this code write all data in 32 bits.  We need to
+    // support these files as well as having support for 64-bit systems.
+    // So we use the following encoding scheme: for values < 2^32-1, we
+    // store in 4 bytes in big-endian order.  For values > 2^32, we
+    // store 0xFFFFFFF followed by 8 bytes in big-endian order.  This
+    // causes us to mis-read old-version code that stores exactly
+    // 0xFFFFFFF, but I don't think that is likely to have happened for
+    // these particular values.
+    template <typename OUTPUT, typename IntType>
+    static bool write_32_or_64(OUTPUT* fp, IntType value)
+    {
+        if (value < 0xFFFFFFFFULL)        // fits in 4 bytes
+        {
+            if (!sparsehash_internal::write_bigendian_number(fp, value, 4))
+                return false;
+        }
+        else
+        {
+            if (!sparsehash_internal::write_bigendian_number(fp, 0xFFFFFFFFUL, 4))
+                return false;
+            if (!sparsehash_internal::write_bigendian_number(fp, value, 8))
+                return false;
+        }
+        return true;
+    }
+
+    template <typename INPUT, typename IntType>
+    static bool read_32_or_64(INPUT* fp, IntType *value)
+    {
+        // reads into value
+        MagicNumberType first4 = 0;   // a convenient 32-bit unsigned type
+        if (!sparsehash_internal::read_bigendian_number(fp, &first4, 4))
+            return false;
+
+        if (first4 < 0xFFFFFFFFULL)
+        {
+            *value = first4;
+        }
+        else
+        {
+            if (!sparsehash_internal::read_bigendian_number(fp, value, 8))
+                return false;
+        }
+        return true;
+    }
+
+public:
+    // read/write_metadata() and read_write/nopointer_data() are DEPRECATED.
+    // Use serialize() and unserialize(), below, for new code.
+
+    template <typename OUTPUT>
+    bool write_metadata(OUTPUT *fp) const
+    {
+        if (!write_32_or_64(fp, MAGIC_NUMBER))  return false;
+        if (!write_32_or_64(fp, _table_size))  return false;
+        if (!write_32_or_64(fp, _num_buckets))  return false;
+
+        for (const group_type *group = _first_group; group != _last_group; ++group)
+            if (group->write_metadata(fp) == false)
+                return false;
+        return true;
+    }
+
+    // Reading destroys the old table contents!  Returns true if read ok.
+    template <typename INPUT>
+    bool read_metadata(INPUT *fp)
+    {
+        size_type magic_read = 0;
+        if (!read_32_or_64(fp, &magic_read))  return false;
+        if (magic_read != MAGIC_NUMBER)
+        {
+            clear();                        // just to be consistent
+            return false;
+        }
+
+        if (!read_32_or_64(fp, &_table_size))  return false;
+        if (!read_32_or_64(fp, &_num_buckets))  return false;
+
+        resize(_table_size);                    // so the vector's sized ok
+        for (group_type *group = _first_group; group != _last_group; ++group)
+            if (group->read_metadata(_alloc, fp) == false)
+                return false;
+        return true;
+    }
+
+    // This code is identical to that for SparseGroup
+    // If your keys and values are simple enough, we can write them
+    // to disk for you.  "simple enough" means no pointers.
+    // However, we don't try to normalize endianness
+    bool write_nopointer_data(FILE *fp) const
+    {
+        for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
+            if (!fwrite(&*it, sizeof(*it), 1, fp))
+                return false;
+        return true;
+    }
+
+    // When reading, we have to override the potential const-ness of *it
+    bool read_nopointer_data(FILE *fp)
+    {
+        for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
+            if (!fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp))
+                return false;
+        return true;
+    }
+
+    // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
+    //    (istream, ostream, etc) *or* a class providing
+    //    Read(void*, size_t) and Write(const void*, size_t)
+    //    (respectively), which writes a buffer into a stream
+    //    (which the INPUT/OUTPUT instance presumably owns).
+
+    typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
+
+    // ValueSerializer: a functor.  operator()(OUTPUT*, const value_type&)
+    template <typename ValueSerializer, typename OUTPUT>
+    bool serialize(ValueSerializer serializer, OUTPUT *fp)
+    {
+        if (!write_metadata(fp))
+            return false;
+        for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
+            if (!serializer(fp, *it))
+                return false;
+        return true;
+    }
+
+    // ValueSerializer: a functor.  operator()(INPUT*, value_type*)
+    template <typename ValueSerializer, typename INPUT>
+    bool unserialize(ValueSerializer serializer, INPUT *fp)
+    {
+        clear();
+        if (!read_metadata(fp))
+            return false;
+        for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
+            if (!serializer(fp, &*it))
+                return false;
+        return true;
+    }
+
+    // Comparisons.  Note the comparisons are pretty arbitrary: we
+    // compare values of the first index that isn't equal (using default
+    // value for empty buckets).
+    bool operator==(const sparsetable& x) const
+    {
+        return (_table_size == x._table_size &&
+                _num_buckets == x._num_buckets &&
+                _first_group == x._first_group);
+    }
+
+    bool operator<(const sparsetable& x) const
+    {
+        return std::lexicographical_compare(begin(), end(), x.begin(), x.end());
+    }
+    bool operator!=(const sparsetable& x) const { return !(*this == x); }
+    bool operator<=(const sparsetable& x) const { return !(x < *this); }
+    bool operator>(const sparsetable& x)  const { return x < *this; }
+    bool operator>=(const sparsetable& x) const { return !(*this < x); }
+
+
+private:
+    // The actual data
+    // ---------------
+    group_type *     _first_group;
+    group_type *     _last_group;
+    size_type        _table_size;          // how many buckets they want
+    size_type        _num_buckets;         // number of non-empty buckets
+    group_alloc_type _group_alloc;
+    allocator_type   _alloc;
+};
+
+//  ----------------------------------------------------------------------
+//                  S P A R S E _ H A S H T A B L E
+//  ----------------------------------------------------------------------
+// Hashtable class, used to implement the hashed associative containers
+// hash_set and hash_map.
+//
+// Value: what is stored in the table (each bucket is a Value).
+// Key: something in a 1-to-1 correspondence to a Value, that can be used
+//      to search for a Value in the table (find() takes a Key).
+// HashFcn: Takes a Key and returns an integer, the more unique the better.
+// ExtractKey: given a Value, returns the unique Key associated with it.
+//             Must inherit from unary_function, or at least have a
+//             result_type enum indicating the return type of operator().
+// EqualKey: Given two Keys, says whether they are the same (that is,
+//           if they are both associated with the same Value).
+// Alloc: STL allocator to use to allocate memory.
+//
+//  ----------------------------------------------------------------------
+
+// The probing method
+// ------------------
+// Linear probing
+// #define JUMP_(key, num_probes)    ( 1 )
+// Quadratic probing
+#define JUMP_(key, num_probes)    ( num_probes )
+
+
+// -------------------------------------------------------------------
+// -------------------------------------------------------------------
+template <class Value, class Key, class HashFcn,
+          class ExtractKey, class SetKey, class EqualKey, class Alloc>
+class sparse_hashtable
+{
+public:
+    typedef Key                                        key_type;
+    typedef Value                                      value_type;
+    typedef HashFcn                                    hasher; // user provided or spp_hash<Key>
+    typedef EqualKey                                   key_equal;
+    typedef Alloc                                      allocator_type;
+
+    typedef typename allocator_type::size_type         size_type;
+    typedef typename allocator_type::difference_type   difference_type;
+    typedef value_type&                                reference;
+    typedef const value_type&                          const_reference;
+    typedef value_type*                                pointer;
+    typedef const value_type*                          const_pointer;
+
+    // Table is the main storage class.
+    typedef sparsetable<value_type, allocator_type>   Table;
+    typedef typename Table::ne_iterator               ne_it;
+    typedef typename Table::const_ne_iterator         cne_it;
+    typedef typename Table::destructive_iterator      dest_it;
+    typedef typename Table::ColIterator               ColIterator;
+
+    typedef ne_it                                     iterator;
+    typedef cne_it                                    const_iterator;
+    typedef dest_it                                   destructive_iterator;
+
+    // These come from tr1.  For us they're the same as regular iterators.
+    // -------------------------------------------------------------------
+    typedef iterator                                  local_iterator;
+    typedef const_iterator                            const_local_iterator;
+
+    // How full we let the table get before we resize
+    // ----------------------------------------------
+    static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
+
+    // How empty we let the table get before we resize lower, by default.
+    // (0.0 means never resize lower.)
+    // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
+    // ------------------------------------------------------------------
+    static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
+
+    // Minimum size we're willing to let hashtables be.
+    // Must be a power of two, and at least 4.
+    // Note, however, that for a given hashtable, the initial size is a
+    // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
+    // ------------------------------------------------------------------
+    static const size_type HT_MIN_BUCKETS = 4;
+
+    // By default, if you don't specify a hashtable size at
+    // construction-time, we use this size.  Must be a power of two, and
+    // at least HT_MIN_BUCKETS.
+    // -----------------------------------------------------------------
+    static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
+
+    // iterators
+    // ---------
+    iterator       begin()        { return _mk_iterator(table.ne_begin());  }
+    iterator       end()          { return _mk_iterator(table.ne_end());    }
+    const_iterator begin() const  { return _mk_const_iterator(table.ne_cbegin()); }
+    const_iterator end() const    { return _mk_const_iterator(table.ne_cend());   }
+    const_iterator cbegin() const { return _mk_const_iterator(table.ne_cbegin()); }
+    const_iterator cend() const   { return _mk_const_iterator(table.ne_cend());   }
+
+    // These come from tr1 unordered_map.  They iterate over 'bucket' n.
+    // For sparsehashtable, we could consider each 'group' to be a bucket,
+    // I guess, but I don't really see the point.  We'll just consider
+    // bucket n to be the n-th element of the sparsetable, if it's occupied,
+    // or some empty element, otherwise.
+    // ---------------------------------------------------------------------
+    local_iterator begin(size_type i)
+    {
+        return _mk_iterator(table.test(i) ? table.get_iter(i) : table.ne_end());
+    }
+
+    local_iterator end(size_type i)
+    {
+        local_iterator it = begin(i);
+        if (table.test(i))
+            ++it;
+        return _mk_iterator(it);
+    }
+
+    const_local_iterator begin(size_type i) const
+    {
+        return _mk_const_iterator(table.test(i) ? table.get_iter(i) : table.ne_cend());
+    }
+
+    const_local_iterator end(size_type i) const
+    {
+        const_local_iterator it = begin(i);
+        if (table.test(i))
+            ++it;
+        return _mk_const_iterator(it);
+    }
+
+    const_local_iterator cbegin(size_type i) const { return begin(i); }
+    const_local_iterator cend(size_type i)   const { return end(i); }
+
+    // This is used when resizing
+    // --------------------------
+    destructive_iterator destructive_begin()       { return _mk_destructive_iterator(table.destructive_begin()); }
+    destructive_iterator destructive_end()         { return _mk_destructive_iterator(table.destructive_end());   }
+
+
+    // accessor functions for the things we templatize on, basically
+    // -------------------------------------------------------------
+    hasher hash_funct() const               { return settings; }
+    key_equal key_eq() const                { return key_info; }
+    allocator_type get_allocator() const    { return table.get_allocator(); }
+
+    // Accessor function for statistics gathering.
+    unsigned int num_table_copies() const { return settings.num_ht_copies(); }
+
+private:
+    // This is used as a tag for the copy constructor, saying to destroy its
+    // arg We have two ways of destructively copying: with potentially growing
+    // the hashtable as we copy, and without.  To make sure the outside world
+    // can't do a destructive copy, we make the typename private.
+    // -----------------------------------------------------------------------
+    enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};
+
+    // creating iterators from sparsetable::ne_iterators
+    // -------------------------------------------------
+    iterator             _mk_iterator(ne_it it) const               { return it; }
+    const_iterator       _mk_const_iterator(cne_it it) const        { return it; }
+    destructive_iterator _mk_destructive_iterator(dest_it it) const { return it; }
+
+public:
+    size_type size() const              { return table.num_nonempty(); }
+    size_type max_size() const          { return table.max_size(); }
+    bool empty() const                  { return size() == 0; }
+    size_type bucket_count() const      { return table.size(); }
+    size_type max_bucket_count() const  { return max_size(); }
+    // These are tr1 methods.  Their idea of 'bucket' doesn't map well to
+    // what we do.  We just say every bucket has 0 or 1 items in it.
+    size_type bucket_size(size_type i) const
+    {
+        return (size_type)(begin(i) == end(i) ? 0 : 1);
+    }
+
+private:
+    // Because of the above, size_type(-1) is never legal; use it for errors
+    // ---------------------------------------------------------------------
+    static const size_type ILLEGAL_BUCKET = size_type(-1);
+
+    // Used after a string of deletes.  Returns true if we actually shrunk.
+    // TODO(csilvers): take a delta so we can take into account inserts
+    // done after shrinking.  Maybe make part of the Settings class?
+    // --------------------------------------------------------------------
+    bool _maybe_shrink()
+    {
+        assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
+        assert(bucket_count() >= HT_MIN_BUCKETS);
+        bool retval = false;
+
+        // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
+        // we'll never shrink until you get relatively big, and we'll never
+        // shrink below HT_DEFAULT_STARTING_BUCKETS.  Otherwise, something
+        // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
+        // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
+        // ---------------------------------------------------------------
+        const size_type num_remain = table.num_nonempty();
+        const size_type shrink_threshold = settings.shrink_threshold();
+        if (shrink_threshold > 0 && num_remain < shrink_threshold &&
+            bucket_count() > HT_DEFAULT_STARTING_BUCKETS)
+        {
+            const float shrink_factor = settings.shrink_factor();
+            size_type sz = (size_type)(bucket_count() / 2);    // find how much we should shrink
+            while (sz > HT_DEFAULT_STARTING_BUCKETS &&
+                   num_remain < static_cast<size_type>(sz * shrink_factor))
+            {
+                sz /= 2;                            // stay a power of 2
+            }
+            sparse_hashtable tmp(MoveDontCopy, *this, sz);
+            swap(tmp);                            // now we are tmp
+            retval = true;
+        }
+        settings.set_consider_shrink(false);   // because we just considered it
+        return retval;
+    }
+
+    // We'll let you resize a hashtable -- though this makes us copy all!
+    // When you resize, you say, "make it big enough for this many more elements"
+    // Returns true if we actually resized, false if size was already ok.
+    // --------------------------------------------------------------------------
+    bool _resize_delta(size_type delta)
+    {
+        bool did_resize = false;
+        if (settings.consider_shrink())
+        {
+            // see if lots of deletes happened
+            if (_maybe_shrink())
+                did_resize = true;
+        }
+        if (table.num_nonempty() >=
+            (std::numeric_limits<size_type>::max)() - delta)
+        {
+            throw_exception(std::length_error("resize overflow"));
+        }
+
+        size_type num_occupied = (size_type)(table.num_nonempty() + num_deleted);
+
+        if (bucket_count() >= HT_MIN_BUCKETS &&
+             (num_occupied + delta) <= settings.enlarge_threshold())
+            return did_resize;                       // we're ok as we are
+
+        // Sometimes, we need to resize just to get rid of all the
+        // "deleted" buckets that are clogging up the hashtable.  So when
+        // deciding whether to resize, count the deleted buckets (which
+        // are currently taking up room).
+        // -------------------------------------------------------------
+        const size_type needed_size =
+                  settings.min_buckets((size_type)(num_occupied + delta), (size_type)0);
+
+        if (needed_size <= bucket_count())      // we have enough buckets
+            return did_resize;
+
+        size_type resize_to = settings.min_buckets((size_type)(num_occupied + delta), bucket_count());
+
+        if (resize_to < needed_size &&    // may double resize_to
+            resize_to < (std::numeric_limits<size_type>::max)() / 2)
+        {
+            // This situation means that we have enough deleted elements,
+            // that once we purge them, we won't actually have needed to
+            // grow.  But we may want to grow anyway: if we just purge one
+            // element, say, we'll have to grow anyway next time we
+            // insert.  Might as well grow now, since we're already going
+            // through the trouble of copying (in order to purge the
+            // deleted elements).
+            const size_type target =
+                static_cast<size_type>(settings.shrink_size((size_type)(resize_to*2)));
+            if (table.num_nonempty() + delta >= target)
+            {
+                // Good, we won't be below the shrink threshhold even if we double.
+                resize_to *= 2;
+            }
+        }
+
+        sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
+        swap(tmp);                             // now we are tmp
+        return true;
+    }
+
+    // Used to actually do the rehashing when we grow/shrink a hashtable
+    // -----------------------------------------------------------------
+    void _copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted)
+    {
+        clear();            // clear table, set num_deleted to 0
+
+        // If we need to change the size of our table, do it now
+        const size_type resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
+
+        if (resize_to > bucket_count())
+        {
+            // we don't have enough buckets
+            table.resize(resize_to);               // sets the number of buckets
+            settings.reset_thresholds(bucket_count());
+        }
+
+        // We use a normal iterator to get bcks from ht
+        // We could use insert() here, but since we know there are
+        // no duplicates, we can be more efficient
+        assert((bucket_count() & (bucket_count()-1)) == 0);      // a power of two
+        for (const_iterator it = ht.begin(); it != ht.end(); ++it)
+        {
+            size_type num_probes = 0;              // how many times we've probed
+            size_type bucknum;
+            const size_type bucket_count_minus_one = bucket_count() - 1;
+            for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
+                 table.test(bucknum);                                   // table.test() OK since no erase()
+                 bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one)
+            {
+                ++num_probes;
+                assert(num_probes < bucket_count()
+                       && "Hashtable is full: an error in key_equal<> or hash<>");
+            }
+            table.set(bucknum, *it);               // copies the value to here
+        }
+        settings.inc_num_ht_copies();
+    }
+
+    // Implementation is like _copy_from, but it destroys the table of the
+    // "from" guy by freeing sparsetable memory as we iterate.  This is
+    // useful in resizing, since we're throwing away the "from" guy anyway.
+    // --------------------------------------------------------------------
+    void _move_from(MoveDontCopyT mover, sparse_hashtable &ht,
+                   size_type min_buckets_wanted)
+    {
+        clear();
+
+        // If we need to change the size of our table, do it now
+        size_type resize_to;
+        if (mover == MoveDontGrow)
+            resize_to = ht.bucket_count();       // keep same size as old ht
+        else                                     // MoveDontCopy
+            resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
+        if (resize_to > bucket_count())
+        {
+            // we don't have enough buckets
+            table.resize(resize_to);               // sets the number of buckets
+            settings.reset_thresholds(bucket_count());
+        }
+
+        // We use a normal iterator to get bcks from ht
+        // We could use insert() here, but since we know there are
+        // no duplicates, we can be more efficient
+        assert((bucket_count() & (bucket_count()-1)) == 0);      // a power of two
+        const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
+
+        // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
+        for (destructive_iterator it = ht.destructive_begin();
+              it != ht.destructive_end(); ++it)
+        {
+            size_type num_probes = 0;
+            size_type bucknum;
+            for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
+                 table.test(bucknum);                          // table.test() OK since no erase()
+                 bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & (bucket_count()-1)))
+            {
+                ++num_probes;
+                assert(num_probes < bucket_count()
+                       && "Hashtable is full: an error in key_equal<> or hash<>");
+            }
+            table.move(bucknum, *it);    // moves the value to here
+        }
+        settings.inc_num_ht_copies();
+    }
+
+
+    // Required by the spec for hashed associative container
+public:
+    // Though the docs say this should be num_buckets, I think it's much
+    // more useful as num_elements.  As a special feature, calling with
+    // req_elements==0 will cause us to shrink if we can, saving space.
+    // -----------------------------------------------------------------
+    void resize(size_type req_elements)
+    {
+        // resize to this or larger
+        if (settings.consider_shrink() || req_elements == 0)
+            _maybe_shrink();
+        if (req_elements > table.num_nonempty())    // we only grow
+            _resize_delta((size_type)(req_elements - table.num_nonempty()));
+    }
+
+    // Get and change the value of shrink_factor and enlarge_factor.  The
+    // description at the beginning of this file explains how to choose
+    // the values.  Setting the shrink parameter to 0.0 ensures that the
+    // table never shrinks.
+    // ------------------------------------------------------------------
+    void get_resizing_parameters(float* shrink, float* grow) const
+    {
+        *shrink = settings.shrink_factor();
+        *grow = settings.enlarge_factor();
+    }
+
+    float get_shrink_factor() const  { return settings.shrink_factor(); }
+    float get_enlarge_factor() const { return settings.enlarge_factor(); }
+
+    void set_resizing_parameters(float shrink, float grow) 
+    {
+        settings.set_resizing_parameters(shrink, grow);
+        settings.reset_thresholds(bucket_count());
+    }
+
+    void set_shrink_factor(float shrink)
+    {
+        set_resizing_parameters(shrink, get_enlarge_factor());
+    }
+
+    void set_enlarge_factor(float grow)
+    {
+        set_resizing_parameters(get_shrink_factor(), grow);
+    }
+
+    // CONSTRUCTORS -- as required by the specs, we take a size,
+    // but also let you specify a hashfunction, key comparator,
+    // and key extractor.  We also define a copy constructor and =.
+    // DESTRUCTOR -- the default is fine, surprisingly.
+    // ------------------------------------------------------------
+    explicit sparse_hashtable(size_type expected_max_items_in_table = 0,
+                              const HashFcn& hf = HashFcn(),
+                              const EqualKey& eql = EqualKey(),
+                              const ExtractKey& ext = ExtractKey(),
+                              const SetKey& set = SetKey(),
+                              const allocator_type& alloc = allocator_type())
+        : settings(hf),
+          key_info(ext, set, eql),
+          num_deleted(0),
+          table((expected_max_items_in_table == 0
+                 ? HT_DEFAULT_STARTING_BUCKETS
+                 : settings.min_buckets(expected_max_items_in_table, 0)),
+                alloc)
+    {
+        settings.reset_thresholds(bucket_count());
+    }
+
+    // As a convenience for resize(), we allow an optional second argument
+    // which lets you make this new hashtable a different size than ht.
+    // We also provide a mechanism of saying you want to "move" the ht argument
+    // into us instead of copying.
+    // ------------------------------------------------------------------------
+    sparse_hashtable(const sparse_hashtable& ht,
+                     size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
+        : settings(ht.settings),
+          key_info(ht.key_info),
+          num_deleted(0),
+          table(0)
+    {
+        settings.reset_thresholds(bucket_count());
+        _copy_from(ht, min_buckets_wanted);
+    }
+
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+
+    sparse_hashtable(sparse_hashtable&& o, const allocator_type& alloc = allocator_type()) :
+        settings(o.settings),
+        key_info(o.key_info),
+        num_deleted(0),
+        table(HT_DEFAULT_STARTING_BUCKETS, alloc)
+    {
+        settings.reset_thresholds(bucket_count());
+        this->swap(o);
+    }
+
+    sparse_hashtable& operator=(sparse_hashtable&& o)
+    {
+        this->swap(o);
+        return *this;
+    }
+#endif
+
+    sparse_hashtable(MoveDontCopyT mover,
+                     sparse_hashtable& ht,
+                     size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
+        : settings(ht.settings),
+          key_info(ht.key_info),
+          num_deleted(0),
+          table(min_buckets_wanted, ht.table.get_allocator())
+          //table(min_buckets_wanted)
+    {
+        settings.reset_thresholds(bucket_count());
+        _move_from(mover, ht, min_buckets_wanted);
+    }
+
+    sparse_hashtable& operator=(const sparse_hashtable& ht)
+    {
+        if (&ht == this)
+            return *this;        // don't copy onto ourselves
+        settings = ht.settings;
+        key_info = ht.key_info;
+        num_deleted = ht.num_deleted;
+
+        // _copy_from() calls clear and sets num_deleted to 0 too
+        _copy_from(ht, HT_MIN_BUCKETS);
+
+        // we purposefully don't copy the allocator, which may not be copyable
+        return *this;
+    }
+
+    // Many STL algorithms use swap instead of copy constructors
+    void swap(sparse_hashtable& ht)
+    {
+        using std::swap;
+
+        swap(settings, ht.settings);
+        swap(key_info, ht.key_info);
+        swap(num_deleted, ht.num_deleted);
+        table.swap(ht.table);
+        settings.reset_thresholds(bucket_count());  // also resets consider_shrink
+        ht.settings.reset_thresholds(ht.bucket_count());
+        // we purposefully don't swap the allocator, which may not be swap-able
+    }
+
+    // It's always nice to be able to clear a table without deallocating it
+    void clear()
+    {
+        if (!empty() || num_deleted != 0)
+        {
+            table.clear();
+            table = Table(HT_DEFAULT_STARTING_BUCKETS, table.get_allocator());
+        }
+        settings.reset_thresholds(bucket_count());
+        num_deleted = 0;
+    }
+
+    // LOOKUP ROUTINES
+private:
+
+    enum pos_type { pt_empty = 0, pt_erased, pt_full };
+    // -------------------------------------------------------------------
+    class Position
+    {
+    public:
+
+        Position() : _t(pt_empty) {}
+        Position(pos_type t, size_type idx) : _t(t), _idx(idx) {}
+
+        pos_type  _t;
+        size_type _idx;
+    };
+
+    // Returns a pair:
+    //   - 'first' is a code, 2 if key already present, 0 or 1 otherwise.
+    //   - 'second' is a position, where the key should go
+    // Note: because of deletions where-to-insert is not trivial: it's the
+    // first deleted bucket we see, as long as we don't find the key later
+    // -------------------------------------------------------------------
+    Position _find_position(const key_type &key) const
+    {
+        size_type num_probes = 0;                    // how many times we've probed
+        const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
+        size_type bucknum = hash(key) & bucket_count_minus_one;
+        Position pos;
+
+        while (1)
+        {
+            // probe until something happens
+            // -----------------------------
+            typename Table::GrpPos grp_pos(table, bucknum);
+
+            if (!grp_pos.test_strict())
+            {
+                // bucket is empty => key not present
+                return pos._t ? pos : Position(pt_empty, bucknum);
+            }
+            else if (grp_pos.test())
+            {
+                reference ref(grp_pos.unsafe_get());
+
+                if (equals(key, get_key(ref)))
+                    return Position(pt_full, bucknum);
+            }
+            else if (pos._t == pt_empty)
+            {
+                // first erased position
+                pos._t   = pt_erased;
+                pos._idx = bucknum;
+            }
+
+            ++num_probes;                        // we're doing another probe
+            bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one);
+            assert(num_probes < bucket_count()
+                   && "Hashtable is full: an error in key_equal<> or hash<>");
+        }
+    }
+
+public:
+    // I hate to duplicate find() like that, but it is
+    // significantly faster to not have the intermediate pair
+    // ------------------------------------------------------------------
+    iterator find(const key_type& key)
+    {
+        size_type num_probes = 0;              // how many times we've probed
+        const size_type bucket_count_minus_one = bucket_count() - 1;
+        size_type bucknum = hash(key) & bucket_count_minus_one;
+
+        while (1)                        // probe until something happens
+        {
+            typename Table::GrpPos grp_pos(table, bucknum);
+
+            if (!grp_pos.test_strict())
+                return end();            // bucket is empty
+            if (grp_pos.test())
+            {
+                reference ref(grp_pos.unsafe_get());
+
+                if (equals(key, get_key(ref)))
+                    return grp_pos.get_iter(ref);
+            }
+            ++num_probes;                        // we're doing another probe
+            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
+            assert(num_probes < bucket_count()
+                   && "Hashtable is full: an error in key_equal<> or hash<>");
+        }
+    }
+
+    // Wish I could avoid the duplicate find() const and non-const.
+    // ------------------------------------------------------------
+    const_iterator find(const key_type& key) const
+    {
+        size_type num_probes = 0;              // how many times we've probed
+        const size_type bucket_count_minus_one = bucket_count() - 1;
+        size_type bucknum = hash(key) & bucket_count_minus_one;
+
+        while (1)                        // probe until something happens
+        {
+            typename Table::GrpPos grp_pos(table, bucknum);
+
+            if (!grp_pos.test_strict())
+                return end();            // bucket is empty
+            else if (grp_pos.test())
+            {
+                reference ref(grp_pos.unsafe_get());
+
+                if (equals(key, get_key(ref)))
+                    return _mk_const_iterator(table.get_iter(bucknum, &ref));
+            }
+            ++num_probes;                        // we're doing another probe
+            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
+            assert(num_probes < bucket_count()
+                   && "Hashtable is full: an error in key_equal<> or hash<>");
+        }
+    }
+
+    // This is a tr1 method: the bucket a given key is in, or what bucket
+    // it would be put in, if it were to be inserted.  Shrug.
+    // ------------------------------------------------------------------
+    size_type bucket(const key_type& key) const
+    {
+        Position pos = _find_position(key);
+        return pos._idx;
+    }
+
+    // Counts how many elements have key key.  For maps, it's either 0 or 1.
+    // ---------------------------------------------------------------------
+    size_type count(const key_type &key) const
+    {
+        Position pos = _find_position(key);
+        return (size_type)(pos._t == pt_full ? 1 : 0);
+    }
+
+    // Likewise, equal_range doesn't really make sense for us.  Oh well.
+    // -----------------------------------------------------------------
+    std::pair<iterator,iterator> equal_range(const key_type& key)
+    {
+        iterator pos = find(key);      // either an iterator or end
+        if (pos == end())
+            return std::pair<iterator,iterator>(pos, pos);
+        else
+        {
+            const iterator startpos = pos++;
+            return std::pair<iterator,iterator>(startpos, pos);
+        }
+    }
+
+    std::pair<const_iterator,const_iterator> equal_range(const key_type& key) const
+    {
+        const_iterator pos = find(key);      // either an iterator or end
+        if (pos == end())
+            return std::pair<const_iterator,const_iterator>(pos, pos);
+        else
+        {
+            const const_iterator startpos = pos++;
+            return std::pair<const_iterator,const_iterator>(startpos, pos);
+        }
+    }
+
+
+    // INSERTION ROUTINES
+private:
+    // Private method used by insert_noresize and find_or_insert.
+    template <class T>
+    reference _insert_at(T& obj, size_type pos, bool erased)
+    {
+        if (size() >= max_size())
+        {
+            throw_exception(std::length_error("insert overflow"));
+        }
+        if (erased)
+        {
+            assert(num_deleted);
+            --num_deleted;
+        }
+        return table.set(pos, obj);
+    }
+
+    // If you know *this is big enough to hold obj, use this routine
+    template <class T>
+    std::pair<iterator, bool> _insert_noresize(T& obj)
+    {
+        Position pos = _find_position(get_key(obj));
+        bool already_there = (pos._t == pt_full);
+
+        if (!already_there)
+        {
+            reference ref(_insert_at(obj, pos._idx, pos._t == pt_erased));
+            return std::pair<iterator, bool>(_mk_iterator(table.get_iter(pos._idx, &ref)), true);
+        }
+        return std::pair<iterator,bool>(_mk_iterator(table.get_iter(pos._idx)), false);
+    }
+
+    // Specializations of insert(it, it) depending on the power of the iterator:
+    // (1) Iterator supports operator-, resize before inserting
+    template <class ForwardIterator>
+    void _insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag /*unused*/)
+    {
+        int64_t dist = std::distance(f, l);
+        if (dist < 0 ||  static_cast<size_t>(dist) >= (std::numeric_limits<size_type>::max)())
+            throw_exception(std::length_error("insert-range overflow"));
+
+        _resize_delta(static_cast<size_type>(dist));
+
+        for (; dist > 0; --dist, ++f)
+            _insert_noresize(*f);
+    }
+
+    // (2) Arbitrary iterator, can't tell how much to resize
+    template <class InputIterator>
+    void _insert(InputIterator f, InputIterator l, std::input_iterator_tag /*unused*/)
+    {
+        for (; f != l; ++f)
+            _insert(*f);
+    }
+
+public:
+
+#if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
+    template <class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args)
+    {
+        _resize_delta(1);
+        value_type obj(std::forward<Args>(args)...);
+        return _insert_noresize(obj);
+    }
+#endif
+
+    // This is the normal insert routine, used by the outside world
+    std::pair<iterator, bool> insert(const_reference obj)
+    {
+        _resize_delta(1);                      // adding an object, grow if need be
+        return _insert_noresize(obj);
+    }
+
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+    template< class P >
+    std::pair<iterator, bool> insert(P &&obj)
+    {
+        _resize_delta(1);                      // adding an object, grow if need be
+        value_type val(std::forward<P>(obj));
+        return _insert_noresize(val);
+    }
+#endif
+
+    // When inserting a lot at a time, we specialize on the type of iterator
+    template <class InputIterator>
+    void insert(InputIterator f, InputIterator l)
+    {
+        // specializes on iterator type
+        _insert(f, l,
+               typename std::iterator_traits<InputIterator>::iterator_category());
+    }
+
+    // DefaultValue is a functor that takes a key and returns a value_type
+    // representing the default value to be inserted if none is found.
+#if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
+    template <class DefaultValue, class KT>
+    value_type& find_or_insert(KT&& key)
+#else
+    template <class DefaultValue>
+    value_type& find_or_insert(const key_type& key)
+#endif
+    {
+        size_type num_probes = 0;              // how many times we've probed
+        const size_type bucket_count_minus_one = bucket_count() - 1;
+        size_type bucknum = hash(key) & bucket_count_minus_one;
+        DefaultValue default_value;
+        size_type erased_pos = 0;
+        bool erased = false;
+
+        while (1)                        // probe until something happens
+        {
+            typename Table::GrpPos grp_pos(table, bucknum);
+
+            if (!grp_pos.test_strict())
+            {
+                // not found
+#if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
+                auto&& def(default_value(std::forward<KT>(key)));
+#else
+                value_type def(default_value(key));
+#endif                
+                if (_resize_delta(1))
+                {
+                    // needed to rehash to make room
+                    // Since we resized, we can't use pos, so recalculate where to insert.
+                    return *(_insert_noresize(def).first);
+                }
+                else
+                {
+                    // no need to rehash, insert right here
+                    return _insert_at(def, erased ? erased_pos : bucknum, erased);
+                }
+            }
+            if (grp_pos.test())
+            {
+                reference ref(grp_pos.unsafe_get());
+
+                if (equals(key, get_key(ref)))
+                    return ref;
+            }
+            else if (!erased)
+            {
+                // first erased position
+                erased_pos = bucknum;
+                erased = true;
+            }
+
+            ++num_probes;                        // we're doing another probe
+            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
+            assert(num_probes < bucket_count()
+                   && "Hashtable is full: an error in key_equal<> or hash<>");
+        }
+    }
+
+    size_type erase(const key_type& key)
+    {
+        size_type num_probes = 0;              // how many times we've probed
+        const size_type bucket_count_minus_one = bucket_count() - 1;
+        size_type bucknum = hash(key) & bucket_count_minus_one;
+
+        while (1)                        // probe until something happens
+        {
+            typename Table::GrpPos grp_pos(table, bucknum);
+
+            if (!grp_pos.test_strict())
+                return 0;            // bucket is empty, we deleted nothing
+            if (grp_pos.test())
+            {
+                reference ref(grp_pos.unsafe_get());
+
+                if (equals(key, get_key(ref)))
+                {
+                    grp_pos.erase(table);
+                    ++num_deleted;
+                    settings.set_consider_shrink(true); // will think about shrink after next insert
+                    return 1;                           // because we deleted one thing
+                }
+            }
+            ++num_probes;                        // we're doing another probe
+            bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
+            assert(num_probes < bucket_count()
+                   && "Hashtable is full: an error in key_equal<> or hash<>");
+        }
+    }
+
+    const_iterator erase(const_iterator pos)
+    {
+        if (pos == cend())
+            return cend();                 // sanity check
+
+        const_iterator nextpos = table.erase(pos);
+        ++num_deleted;
+        settings.set_consider_shrink(true);
+        return nextpos;
+    }
+
+    const_iterator erase(const_iterator f, const_iterator l)
+    {
+        if (f == cend())
+            return cend();                // sanity check
+
+        size_type num_before = table.num_nonempty();
+        const_iterator nextpos = table.erase(f, l);
+        num_deleted += num_before - table.num_nonempty();
+        settings.set_consider_shrink(true);
+        return nextpos;
+    }
+
+    // Deleted key routines - just to keep google test framework happy
+    // we don't actually use the deleted key
+    // ---------------------------------------------------------------
+    void set_deleted_key(const key_type&)
+    {
+    }
+
+    void clear_deleted_key()
+    {
+    }
+
+    bool operator==(const sparse_hashtable& ht) const
+    {
+        if (this == &ht)
+            return true;
+
+        if (size() != ht.size())
+            return false;
+
+        for (const_iterator it = begin(); it != end(); ++it)
+        {
+            const_iterator it2 = ht.find(get_key(*it));
+            if ((it2 == ht.end()) || (*it != *it2))
+                return false;
+        }
+
+        return true;
+    }
+
+    bool operator!=(const sparse_hashtable& ht) const
+    {
+        return !(*this == ht);
+    }
+
+
+    // I/O
+    // We support reading and writing hashtables to disk.  NOTE that
+    // this only stores the hashtable metadata, not the stuff you've
+    // actually put in the hashtable!  Alas, since I don't know how to
+    // write a hasher or key_equal, you have to make sure everything
+    // but the table is the same.  We compact before writing.
+    //
+    // The OUTPUT type needs to support a Write() operation. File and
+    // OutputBuffer are appropriate types to pass in.
+    //
+    // The INPUT type needs to support a Read() operation. File and
+    // InputBuffer are appropriate types to pass in.
+    // -------------------------------------------------------------
+    template <typename OUTPUT>
+    bool write_metadata(OUTPUT *fp)
+    {
+        return table.write_metadata(fp);
+    }
+
+    template <typename INPUT>
+    bool read_metadata(INPUT *fp)
+    {
+        num_deleted = 0;            // since we got rid before writing
+        const bool result = table.read_metadata(fp);
+        settings.reset_thresholds(bucket_count());
+        return result;
+    }
+
+    // Only meaningful if value_type is a POD.
+    template <typename OUTPUT>
+    bool write_nopointer_data(OUTPUT *fp)
+    {
+        return table.write_nopointer_data(fp);
+    }
+
+    // Only meaningful if value_type is a POD.
+    template <typename INPUT>
+    bool read_nopointer_data(INPUT *fp)
+    {
+        return table.read_nopointer_data(fp);
+    }
+
+    // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
+    //    (istream, ostream, etc) *or* a class providing
+    //    Read(void*, size_t) and Write(const void*, size_t)
+    //    (respectively), which writes a buffer into a stream
+    //    (which the INPUT/OUTPUT instance presumably owns).
+
+    typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
+
+    // ValueSerializer: a functor.  operator()(OUTPUT*, const value_type&)
+    template <typename ValueSerializer, typename OUTPUT>
+    bool serialize(ValueSerializer serializer, OUTPUT *fp)
+    {
+        return table.serialize(serializer, fp);
+    }
+
+    // ValueSerializer: a functor.  operator()(INPUT*, value_type*)
+    template <typename ValueSerializer, typename INPUT>
+    bool unserialize(ValueSerializer serializer, INPUT *fp)
+    {
+        num_deleted = 0;            // since we got rid before writing
+        const bool result = table.unserialize(serializer, fp);
+        settings.reset_thresholds(bucket_count());
+        return result;
+    }
+
+private:
+
+    // Package templated functors with the other types to eliminate memory
+    // needed for storing these zero-size operators.  Since ExtractKey and
+    // hasher's operator() might have the same function signature, they
+    // must be packaged in different classes.
+    // -------------------------------------------------------------------------
+    struct Settings :
+        sparsehash_internal::sh_hashtable_settings<key_type, hasher,
+                                                   size_type, HT_MIN_BUCKETS>
+    {
+        explicit Settings(const hasher& hf)
+            : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
+              HT_MIN_BUCKETS>
+              (hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
+    };
+
+    // KeyInfo stores delete key and packages zero-size functors:
+    // ExtractKey and SetKey.
+     // ---------------------------------------------------------
+    class KeyInfo : public ExtractKey, public SetKey, public EqualKey
+    {
+    public:
+        KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
+            : ExtractKey(ek), SetKey(sk), EqualKey(eq)
+        {
+        }
+
+        // We want to return the exact same type as ExtractKey: Key or const Key&
+        typename ExtractKey::result_type get_key(const_reference v) const
+        {
+            return ExtractKey::operator()(v);
+        }
+
+        bool equals(const key_type& a, const key_type& b) const
+        {
+            return EqualKey::operator()(a, b);
+        }
+    };
+
+    // Utility functions to access the templated operators
+    size_t hash(const key_type& v) const
+    {
+        return settings.hash(v);
+    }
+
+    bool equals(const key_type& a, const key_type& b) const
+    {
+        return key_info.equals(a, b);
+    }
+
+    typename ExtractKey::result_type get_key(const_reference v) const
+    {
+        return key_info.get_key(v);
+    }
+
+private:
+    // Actual data
+    // -----------
+    Settings  settings;
+    KeyInfo   key_info;
+    size_type num_deleted;
+    Table     table;         // holds num_buckets and num_elements too
+};
+
+#undef JUMP_
+
+// -----------------------------------------------------------------------------
+template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
+const typename sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
+sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
+
+// How full we let the table get before we resize.  Knuth says .8 is
+// good -- higher causes us to probe too much, though saves memory
+// -----------------------------------------------------------------------------
+template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
+const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 50;
+
+// How empty we let the table get before we resize lower.
+// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
+// -----------------------------------------------------------------------------
+template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
+const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
+= static_cast<int>(0.4 *
+                   sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
+
+
+//  ----------------------------------------------------------------------
+//                   S P A R S E _ H A S H _ M A P
+//  ----------------------------------------------------------------------
+template <class Key, class T,
+          class HashFcn  = spp_hash<Key>,
+          class EqualKey = std::equal_to<Key>,
+          class Alloc    = SPP_DEFAULT_ALLOCATOR<std::pair<const Key, T> > >
+class sparse_hash_map
+{
+public:
+    typedef typename std::pair<const Key, T> value_type;
+
+private:
+    // Apparently select1st is not stl-standard, so we define our own
+    struct SelectKey
+    {
+        typedef const Key& result_type;
+
+        inline const Key& operator()(const value_type& p) const
+        {
+            return p.first;
+        }
+    };
+
+    struct SetKey
+    {
+        inline void operator()(value_type* value, const Key& new_key) const
+        {
+            *const_cast<Key*>(&value->first) = new_key;
+        }
+    };
+
+    // For operator[].
+    struct DefaultValue
+    {
+#if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
+        template <class KT>
+        inline value_type operator()(KT&& key)  const
+        {
+            return { std::forward<KT>(key), T() };
+        }
+#else
+        inline value_type operator()(const Key& key)  const
+        {
+            return std::make_pair(key, T());
+        }
+#endif
+    };
+
+    // The actual data
+    typedef sparse_hashtable<value_type, Key, HashFcn, SelectKey,
+                             SetKey, EqualKey, Alloc> ht;
+
+public:
+    typedef typename ht::key_type             key_type;
+    typedef T                                 data_type;
+    typedef T                                 mapped_type;
+    typedef typename ht::hasher               hasher;
+    typedef typename ht::key_equal            key_equal;
+    typedef Alloc                             allocator_type;
+
+    typedef typename ht::size_type            size_type;
+    typedef typename ht::difference_type      difference_type;
+    typedef typename ht::pointer              pointer;
+    typedef typename ht::const_pointer        const_pointer;
+    typedef typename ht::reference            reference;
+    typedef typename ht::const_reference      const_reference;
+
+    typedef typename ht::iterator             iterator;
+    typedef typename ht::const_iterator       const_iterator;
+    typedef typename ht::local_iterator       local_iterator;
+    typedef typename ht::const_local_iterator const_local_iterator;
+
+    // Iterator functions
+    iterator       begin()                         { return rep.begin(); }
+    iterator       end()                           { return rep.end(); }
+    const_iterator begin() const                   { return rep.cbegin(); }
+    const_iterator end() const                     { return rep.cend(); }
+    const_iterator cbegin() const                  { return rep.cbegin(); }
+    const_iterator cend() const                    { return rep.cend(); }
+
+    // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
+    local_iterator begin(size_type i)              { return rep.begin(i); }
+    local_iterator end(size_type i)                { return rep.end(i); }
+    const_local_iterator begin(size_type i) const  { return rep.begin(i); }
+    const_local_iterator end(size_type i) const    { return rep.end(i); }
+    const_local_iterator cbegin(size_type i) const { return rep.cbegin(i); }
+    const_local_iterator cend(size_type i) const   { return rep.cend(i); }
+
+    // Accessor functions
+    // ------------------
+    allocator_type get_allocator() const           { return rep.get_allocator(); }
+    hasher hash_funct() const                      { return rep.hash_funct(); }
+    hasher hash_function() const                   { return hash_funct(); }
+    key_equal key_eq() const                       { return rep.key_eq(); }
+
+
+    // Constructors
+    // ------------
+    explicit sparse_hash_map(size_type n = 0,
+                             const hasher& hf = hasher(),
+                             const key_equal& eql = key_equal(),
+                             const allocator_type& alloc = allocator_type())
+        : rep(n, hf, eql, SelectKey(), SetKey(), alloc)
+    {
+    }
+
+    explicit sparse_hash_map(const allocator_type& alloc) :
+        rep(0, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
+    {
+    }
+
+    sparse_hash_map(size_type n, const allocator_type& alloc) :
+        rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
+    {
+    }
+
+    sparse_hash_map(size_type n, const hasher& hf, const allocator_type& alloc) :
+        rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc)
+    {
+    }
+
+    template <class InputIterator>
+    sparse_hash_map(InputIterator f, InputIterator l,
+                    size_type n = 0,
+                    const hasher& hf = hasher(),
+                    const key_equal& eql = key_equal(),
+                    const allocator_type& alloc = allocator_type())
+        : rep(n, hf, eql, SelectKey(), SetKey(), alloc)
+    {
+        rep.insert(f, l);
+    }
+
+    template <class InputIterator>
+    sparse_hash_map(InputIterator f, InputIterator l,
+                    size_type n, const allocator_type& alloc)
+        : rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
+    {
+        rep.insert(f, l);
+    }
+
+    template <class InputIterator>
+    sparse_hash_map(InputIterator f, InputIterator l,
+                    size_type n, const hasher& hf, const allocator_type& alloc)
+        : rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc)
+    {
+        rep.insert(f, l);
+    }
+
+    sparse_hash_map(const sparse_hash_map &o) :
+        rep(o.rep)
+    {}
+
+    sparse_hash_map(const sparse_hash_map &o,
+                    const allocator_type& alloc) :
+        rep(o.rep, alloc)
+    {}
+
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+    sparse_hash_map(sparse_hash_map &&o) :
+        rep(std::move(o.rep))
+    {}
+
+    sparse_hash_map(sparse_hash_map &&o,
+                    const allocator_type& alloc) :
+        rep(std::move(o.rep), alloc)
+    {}
+
+    sparse_hash_map& operator=(sparse_hash_map &&o) = default;
+#endif
+
+#if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
+    sparse_hash_map(std::initializer_list<value_type> init,
+                    size_type n = 0,
+                    const hasher& hf = hasher(),
+                    const key_equal& eql = key_equal(),
+                    const allocator_type& alloc = allocator_type())
+        : rep(n, hf, eql, SelectKey(), SetKey(), alloc)
+    {
+        rep.insert(init.begin(), init.end());
+    }
+
+    sparse_hash_map(std::initializer_list<value_type> init,
+                    size_type n, const allocator_type& alloc) :
+        rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
+    {
+        rep.insert(init.begin(), init.end());
+    }
+
+    sparse_hash_map(std::initializer_list<value_type> init,
+                    size_type n, const hasher& hf, const allocator_type& alloc) :
+        rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc)
+    {
+        rep.insert(init.begin(), init.end());
+    }
+
+    sparse_hash_map& operator=(std::initializer_list<value_type> init)
+    {
+        rep.clear();
+        rep.insert(init.begin(), init.end());
+        return *this;
+    }
+
+    void insert(std::initializer_list<value_type> init)
+    {
+        rep.insert(init.begin(), init.end());
+    }
+#endif
+
+    sparse_hash_map& operator=(const sparse_hash_map &o)
+    {
+        rep = o.rep;
+        return *this;
+    }
+
+    void clear()                        { rep.clear(); }
+    void swap(sparse_hash_map& hs)      { rep.swap(hs.rep); }
+
+    // Functions concerning size
+    // -------------------------
+    size_type size() const              { return rep.size(); }
+    size_type max_size() const          { return rep.max_size(); }
+    bool empty() const                  { return rep.empty(); }
+    size_type bucket_count() const      { return rep.bucket_count(); }
+    size_type max_bucket_count() const  { return rep.max_bucket_count(); }
+
+    size_type bucket_size(size_type i) const    { return rep.bucket_size(i); }
+    size_type bucket(const key_type& key) const { return rep.bucket(key); }
+    float     load_factor() const       { return size() * 1.0f / bucket_count(); }
+
+    float max_load_factor() const      { return rep.get_enlarge_factor(); }
+    void  max_load_factor(float grow)  { rep.set_enlarge_factor(grow); }
+
+    float min_load_factor() const      { return rep.get_shrink_factor(); }
+    void  min_load_factor(float shrink){ rep.set_shrink_factor(shrink); }
+
+    void set_resizing_parameters(float shrink, float grow)
+    {
+        rep.set_resizing_parameters(shrink, grow);
+    }
+
+    void resize(size_type cnt)        { rep.resize(cnt); }
+    void rehash(size_type cnt)        { resize(cnt); } // c++11 name
+    void reserve(size_type cnt)       { resize(cnt); } // c++11
+
+    // Lookup
+    // ------
+    iterator find(const key_type& key)                 { return rep.find(key); }
+    const_iterator find(const key_type& key) const     { return rep.find(key); }
+    bool contains(const key_type& key) const           { return rep.find(key) != rep.end(); }
+
+#if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
+    template <class KT>
+    mapped_type& operator[](KT&& key)
+    {
+        return rep.template find_or_insert<DefaultValue>(std::forward<KT>(key)).second;
+    }
+#else
+    mapped_type& operator[](const key_type& key)
+    {
+        return rep.template find_or_insert<DefaultValue>(key).second;
+    }
+#endif
+
+    size_type count(const key_type& key) const         { return rep.count(key); }
+
+    std::pair<iterator, iterator>
+    equal_range(const key_type& key)             { return rep.equal_range(key); }
+
+    std::pair<const_iterator, const_iterator>
+    equal_range(const key_type& key) const       { return rep.equal_range(key); }
+
+    mapped_type& at(const key_type& key)
+    {
+        iterator it = rep.find(key);
+        if (it == rep.end())
+            throw_exception(std::out_of_range("at: key not present"));
+        return it->second;
+    }
+
+    const mapped_type& at(const key_type& key) const
+    {
+        const_iterator it = rep.find(key);
+        if (it == rep.cend())
+            throw_exception(std::out_of_range("at: key not present"));
+        return it->second;
+    }
+
+#if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
+    template <class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args)
+    {
+        return rep.emplace(std::forward<Args>(args)...);
+    }
+
+    template <class... Args>
+    iterator emplace_hint(const_iterator , Args&&... args)
+    {
+        return rep.emplace(std::forward<Args>(args)...).first;
+    }
+#endif
+
+    // Insert
+    // ------
+    std::pair<iterator, bool>
+    insert(const value_type& obj)                    { return rep.insert(obj); }
+
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+    template< class P >
+    std::pair<iterator, bool> insert(P&& obj)        { return rep.insert(std::forward<P>(obj)); }
+#endif
+
+    template <class InputIterator>
+    void insert(InputIterator f, InputIterator l)    { rep.insert(f, l); }
+
+    void insert(const_iterator f, const_iterator l)  { rep.insert(f, l); }
+
+    iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
+    iterator insert(const_iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
+
+    // Deleted key routines - just to keep google test framework happy
+    // we don't actually use the deleted key
+    // ---------------------------------------------------------------
+    void set_deleted_key(const key_type& key)   { rep.set_deleted_key(key); }
+    void clear_deleted_key()                    { rep.clear_deleted_key();  }
+    key_type deleted_key() const                { return rep.deleted_key(); }
+
+    // Erase
+    // -----
+    size_type erase(const key_type& key)               { return rep.erase(key); }
+    iterator  erase(iterator it)                       { return rep.erase(it); }
+    iterator  erase(iterator f, iterator l)            { return rep.erase(f, l); }
+    iterator  erase(const_iterator it)                 { return rep.erase(it); }
+    iterator  erase(const_iterator f, const_iterator l){ return rep.erase(f, l); }
+
+    // Comparison
+    // ----------
+    bool operator==(const sparse_hash_map& hs) const   { return rep == hs.rep; }
+    bool operator!=(const sparse_hash_map& hs) const   { return rep != hs.rep; }
+
+
+    // I/O -- this is an add-on for writing metainformation to disk
+    //
+    // For maximum flexibility, this does not assume a particular
+    // file type (though it will probably be a FILE *).  We just pass
+    // the fp through to rep.
+
+    // If your keys and values are simple enough, you can pass this
+    // serializer to serialize()/unserialize().  "Simple enough" means
+    // value_type is a POD type that contains no pointers.  Note,
+    // however, we don't try to normalize endianness.
+    // ---------------------------------------------------------------
+    typedef typename ht::NopointerSerializer NopointerSerializer;
+
+    // serializer: a class providing operator()(OUTPUT*, const value_type&)
+    //    (writing value_type to OUTPUT).  You can specify a
+    //    NopointerSerializer object if appropriate (see above).
+    // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
+    //    pointer to a class providing size_t Write(const void*, size_t),
+    //    which writes a buffer into a stream (which fp presumably
+    //    owns) and returns the number of bytes successfully written.
+    //    Note basic_ostream<not_char> is not currently supported.
+    // ---------------------------------------------------------------
+    template <typename ValueSerializer, typename OUTPUT>
+    bool serialize(ValueSerializer serializer, OUTPUT* fp)
+    {
+        return rep.serialize(serializer, fp);
+    }
+
+    // serializer: a functor providing operator()(INPUT*, value_type*)
+    //    (reading from INPUT and into value_type).  You can specify a
+    //    NopointerSerializer object if appropriate (see above).
+    // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
+    //    pointer to a class providing size_t Read(void*, size_t),
+    //    which reads into a buffer from a stream (which fp presumably
+    //    owns) and returns the number of bytes successfully read.
+    //    Note basic_istream<not_char> is not currently supported.
+    // NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer
+    // may need to do a const cast in order to fill in the key.
+    // NOTE: if Key or T are not POD types, the serializer MUST use
+    // placement-new to initialize their values, rather than a normal
+    // equals-assignment or similar.  (The value_type* passed into the
+    // serializer points to garbage memory.)
+    // ---------------------------------------------------------------
+    template <typename ValueSerializer, typename INPUT>
+    bool unserialize(ValueSerializer serializer, INPUT* fp)
+    {
+        return rep.unserialize(serializer, fp);
+    }
+
+    // The four methods below are DEPRECATED.
+    // Use serialize() and unserialize() for new code.
+    // -----------------------------------------------
+    template <typename OUTPUT>
+    bool write_metadata(OUTPUT *fp)       { return rep.write_metadata(fp); }
+
+    template <typename INPUT>
+    bool read_metadata(INPUT *fp)         { return rep.read_metadata(fp); }
+
+    template <typename OUTPUT>
+    bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
+
+    template <typename INPUT>
+    bool read_nopointer_data(INPUT *fp)   { return rep.read_nopointer_data(fp); }
+
+
+private:
+    // The actual data
+    // ---------------
+    ht rep;
+};
+
+//  ----------------------------------------------------------------------
+//                   S P A R S E _ H A S H _ S E T
+//  ----------------------------------------------------------------------
+
+template <class Value,
+          class HashFcn  = spp_hash<Value>,
+          class EqualKey = std::equal_to<Value>,
+          class Alloc    = SPP_DEFAULT_ALLOCATOR<Value> >
+class sparse_hash_set
+{
+private:
+    // Apparently identity is not stl-standard, so we define our own
+    struct Identity
+    {
+        typedef const Value& result_type;
+        inline const Value& operator()(const Value& v) const { return v; }
+    };
+
+    struct SetKey
+    {
+        inline void operator()(Value* value, const Value& new_key) const
+        {
+            *value = new_key;
+        }
+    };
+
+    typedef sparse_hashtable<Value, Value, HashFcn, Identity, SetKey,
+                             EqualKey, Alloc> ht;
+
+public:
+    typedef typename ht::key_type              key_type;
+    typedef typename ht::value_type            value_type;
+    typedef typename ht::hasher                hasher;
+    typedef typename ht::key_equal             key_equal;
+    typedef Alloc                              allocator_type;
+
+    typedef typename ht::size_type             size_type;
+    typedef typename ht::difference_type       difference_type;
+    typedef typename ht::const_pointer         pointer;
+    typedef typename ht::const_pointer         const_pointer;
+    typedef typename ht::const_reference       reference;
+    typedef typename ht::const_reference       const_reference;
+
+    typedef typename ht::const_iterator        iterator;
+    typedef typename ht::const_iterator        const_iterator;
+    typedef typename ht::const_local_iterator  local_iterator;
+    typedef typename ht::const_local_iterator  const_local_iterator;
+
+
+    // Iterator functions -- recall all iterators are const
+    iterator       begin() const             { return rep.begin(); }
+    iterator       end() const               { return rep.end(); }
+    const_iterator cbegin() const            { return rep.cbegin(); }
+    const_iterator cend() const              { return rep.cend(); }
+
+    // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements.
+    local_iterator begin(size_type i) const  { return rep.begin(i); }
+    local_iterator end(size_type i) const    { return rep.end(i); }
+    local_iterator cbegin(size_type i) const { return rep.cbegin(i); }
+    local_iterator cend(size_type i) const   { return rep.cend(i); }
+
+
+    // Accessor functions
+    // ------------------
+    allocator_type get_allocator() const     { return rep.get_allocator(); }
+    hasher         hash_funct() const        { return rep.hash_funct(); }
+    hasher         hash_function() const     { return hash_funct(); }  // tr1 name
+    key_equal      key_eq() const            { return rep.key_eq(); }
+
+
+    // Constructors
+    // ------------
+    explicit sparse_hash_set(size_type n = 0,
+                             const hasher& hf = hasher(),
+                             const key_equal& eql = key_equal(),
+                             const allocator_type& alloc = allocator_type()) :
+        rep(n, hf, eql, Identity(), SetKey(), alloc)
+    {
+    }
+
+    explicit sparse_hash_set(const allocator_type& alloc) :
+        rep(0, hasher(), key_equal(), Identity(), SetKey(), alloc)
+    {
+    }
+
+    sparse_hash_set(size_type n, const allocator_type& alloc) :
+        rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc)
+    {
+    }
+
+    sparse_hash_set(size_type n, const hasher& hf,
+                    const allocator_type& alloc) :
+        rep(n, hf, key_equal(), Identity(), SetKey(), alloc)
+    {
+    }
+
+    template <class InputIterator>
+    sparse_hash_set(InputIterator f, InputIterator l,
+                    size_type n = 0,
+                    const hasher& hf = hasher(),
+                    const key_equal& eql = key_equal(),
+                    const allocator_type& alloc = allocator_type())
+        : rep(n, hf, eql, Identity(), SetKey(), alloc)
+    {
+        rep.insert(f, l);
+    }
+
+    template <class InputIterator>
+    sparse_hash_set(InputIterator f, InputIterator l,
+                    size_type n, const allocator_type& alloc)
+        : rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc)
+    {
+        rep.insert(f, l);
+    }
+
+    template <class InputIterator>
+    sparse_hash_set(InputIterator f, InputIterator l,
+                    size_type n, const hasher& hf, const allocator_type& alloc)
+        : rep(n, hf, key_equal(), Identity(), SetKey(), alloc)
+    {
+        rep.insert(f, l);
+    }
+
+    sparse_hash_set(const sparse_hash_set &o) :
+        rep(o.rep)
+    {}
+
+    sparse_hash_set(const sparse_hash_set &o,
+                    const allocator_type& alloc) :
+        rep(o.rep, alloc)
+    {}
+
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+    sparse_hash_set(sparse_hash_set &&o) :
+        rep(std::move(o.rep))
+    {}
+
+    sparse_hash_set(sparse_hash_set &&o,
+                    const allocator_type& alloc) :
+        rep(std::move(o.rep), alloc)
+    {}
+#endif
+
+#if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
+    sparse_hash_set(std::initializer_list<value_type> init,
+                    size_type n = 0,
+                    const hasher& hf = hasher(),
+                    const key_equal& eql = key_equal(),
+                    const allocator_type& alloc = allocator_type()) :
+        rep(n, hf, eql, Identity(), SetKey(), alloc)
+    {
+        rep.insert(init.begin(), init.end());
+    }
+
+    sparse_hash_set(std::initializer_list<value_type> init,
+                    size_type n, const allocator_type& alloc) :
+        rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc)
+    {
+        rep.insert(init.begin(), init.end());
+    }
+
+    sparse_hash_set(std::initializer_list<value_type> init,
+                    size_type n, const hasher& hf,
+                    const allocator_type& alloc) :
+        rep(n, hf, key_equal(), Identity(), SetKey(), alloc)
+    {
+        rep.insert(init.begin(), init.end());
+    }
+
+    sparse_hash_set& operator=(std::initializer_list<value_type> init)
+    {
+        rep.clear();
+        rep.insert(init.begin(), init.end());
+        return *this;
+    }
+
+    void insert(std::initializer_list<value_type> init)
+    {
+        rep.insert(init.begin(), init.end());
+    }
+
+#endif
+
+    sparse_hash_set& operator=(const sparse_hash_set &o)
+    {
+        rep = o.rep;
+        return *this;
+    }
+
+    void clear()                        { rep.clear(); }
+    void swap(sparse_hash_set& hs)      { rep.swap(hs.rep); }
+
+
+    // Functions concerning size
+    // -------------------------
+    size_type size() const              { return rep.size(); }
+    size_type max_size() const          { return rep.max_size(); }
+    bool empty() const                  { return rep.empty(); }
+    size_type bucket_count() const      { return rep.bucket_count(); }
+    size_type max_bucket_count() const  { return rep.max_bucket_count(); }
+
+    size_type bucket_size(size_type i) const    { return rep.bucket_size(i); }
+    size_type bucket(const key_type& key) const { return rep.bucket(key); }
+
+    float     load_factor() const       { return size() * 1.0f / bucket_count(); }
+
+    float max_load_factor() const      { return rep.get_enlarge_factor(); }
+    void  max_load_factor(float grow)  { rep.set_enlarge_factor(grow); }
+
+    float min_load_factor() const      { return rep.get_shrink_factor(); }
+    void  min_load_factor(float shrink){ rep.set_shrink_factor(shrink); }
+
+    void set_resizing_parameters(float shrink, float grow)
+    {
+        rep.set_resizing_parameters(shrink, grow);
+    }
+
+    void resize(size_type cnt)        { rep.resize(cnt); }
+    void rehash(size_type cnt)        { resize(cnt); } // c++11 name
+    void reserve(size_type cnt)       { resize(cnt); } // c++11
+
+    // Lookup
+    // ------
+    iterator find(const key_type& key) const     { return rep.find(key); }
+    bool contains(const key_type& key) const     { return rep.find(key) != rep.end(); }
+
+    size_type count(const key_type& key) const   { return rep.count(key); }
+
+    std::pair<iterator, iterator>
+    equal_range(const key_type& key) const       { return rep.equal_range(key); }
+
+#if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
+    template <class... Args>
+    std::pair<iterator, bool> emplace(Args&&... args)
+    {
+        return rep.emplace(std::forward<Args>(args)...);
+    }
+
+    template <class... Args>
+    iterator emplace_hint(const_iterator , Args&&... args)
+    {
+        return rep.emplace(std::forward<Args>(args)...).first;
+    }
+#endif
+
+    // Insert
+    // ------
+    std::pair<iterator, bool> insert(const value_type& obj)
+    {
+        std::pair<typename ht::iterator, bool> p = rep.insert(obj);
+        return std::pair<iterator, bool>(p.first, p.second);   // const to non-const
+    }
+
+#if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
+    template<class P>
+    std::pair<iterator, bool> insert(P&& obj)        { return rep.insert(std::forward<P>(obj)); }
+#endif
+
+    template <class InputIterator>
+    void insert(InputIterator f, InputIterator l)    { rep.insert(f, l); }
+
+    void insert(const_iterator f, const_iterator l)  { rep.insert(f, l); }
+
+    iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
+
+    // Deleted key - do nothing - just to keep google test framework happy
+    // -------------------------------------------------------------------
+    void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
+    void clear_deleted_key()                  { rep.clear_deleted_key();  }
+    key_type deleted_key() const              { return rep.deleted_key(); }
+
+    // Erase
+    // -----
+    size_type erase(const key_type& key)      { return rep.erase(key); }
+    iterator  erase(iterator it)              { return rep.erase(it); }
+    iterator  erase(iterator f, iterator l)   { return rep.erase(f, l); }
+
+    // Comparison
+    // ----------
+    bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; }
+    bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; }
+
+
+    // I/O -- this is an add-on for writing metainformation to disk
+    //
+    // For maximum flexibility, this does not assume a particular
+    // file type (though it will probably be a FILE *).  We just pass
+    // the fp through to rep.
+
+    // If your keys and values are simple enough, you can pass this
+    // serializer to serialize()/unserialize().  "Simple enough" means
+    // value_type is a POD type that contains no pointers.  Note,
+    // however, we don't try to normalize endianness.
+    // ---------------------------------------------------------------
+    typedef typename ht::NopointerSerializer NopointerSerializer;
+
+    // serializer: a class providing operator()(OUTPUT*, const value_type&)
+    //    (writing value_type to OUTPUT).  You can specify a
+    //    NopointerSerializer object if appropriate (see above).
+    // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
+    //    pointer to a class providing size_t Write(const void*, size_t),
+    //    which writes a buffer into a stream (which fp presumably
+    //    owns) and returns the number of bytes successfully written.
+    //    Note basic_ostream<not_char> is not currently supported.
+    // ---------------------------------------------------------------
+    template <typename ValueSerializer, typename OUTPUT>
+    bool serialize(ValueSerializer serializer, OUTPUT* fp)
+    {
+        return rep.serialize(serializer, fp);
+    }
+
+    // serializer: a functor providing operator()(INPUT*, value_type*)
+    //    (reading from INPUT and into value_type).  You can specify a
+    //    NopointerSerializer object if appropriate (see above).
+    // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
+    //    pointer to a class providing size_t Read(void*, size_t),
+    //    which reads into a buffer from a stream (which fp presumably
+    //    owns) and returns the number of bytes successfully read.
+    //    Note basic_istream<not_char> is not currently supported.
+    // NOTE: Since value_type is const Key, ValueSerializer
+    // may need to do a const cast in order to fill in the key.
+    // NOTE: if Key is not a POD type, the serializer MUST use
+    // placement-new to initialize its value, rather than a normal
+    // equals-assignment or similar.  (The value_type* passed into
+    // the serializer points to garbage memory.)
+    // ---------------------------------------------------------------
+    template <typename ValueSerializer, typename INPUT>
+    bool unserialize(ValueSerializer serializer, INPUT* fp)
+    {
+        return rep.unserialize(serializer, fp);
+    }
+
+    // The four methods below are DEPRECATED.
+    // Use serialize() and unserialize() for new code.
+    // -----------------------------------------------
+    template <typename OUTPUT>
+    bool write_metadata(OUTPUT *fp)       { return rep.write_metadata(fp); }
+
+    template <typename INPUT>
+    bool read_metadata(INPUT *fp)         { return rep.read_metadata(fp); }
+
+    template <typename OUTPUT>
+    bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
+
+    template <typename INPUT>
+    bool read_nopointer_data(INPUT *fp)   { return rep.read_nopointer_data(fp); }
+
+private:
+    // The actual data
+    // ---------------
+    ht rep;
+};
+
+} // spp_ namespace
+
+
+// We need a global swap for all our classes as well
+// -------------------------------------------------
+
+template <class T, class Alloc>
+inline void swap(spp_::sparsegroup<T,Alloc> &x, spp_::sparsegroup<T,Alloc> &y)
+{
+    x.swap(y);
+}
+
+template <class T, class Alloc>
+inline void swap(spp_::sparsetable<T,Alloc> &x, spp_::sparsetable<T,Alloc> &y)
+{
+    x.swap(y);
+}
+
+template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
+inline void swap(spp_::sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
+                 spp_::sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &y)
+{
+    x.swap(y);
+}
+
+template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
+inline void swap(spp_::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
+                 spp_::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2)
+{
+    hm1.swap(hm2);
+}
+
+template <class Val, class HashFcn, class EqualKey, class Alloc>
+inline void swap(spp_::sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
+                 spp_::sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2)
+{
+    hs1.swap(hs2);
+}
+
+#endif // sparsepp_h_guard_
diff --git a/benchmarks/others/sparsepp/spp_config.h b/benchmarks/others/sparsepp/spp_config.h
new file mode 100644
index 00000000..46eeee5c
--- /dev/null
+++ b/benchmarks/others/sparsepp/spp_config.h
@@ -0,0 +1,781 @@
+#if !defined(spp_config_h_guard)
+#define spp_config_h_guard
+
+// --------------------------------------------------
+// Sparsepp config macros
+// some can be overriden on the command line
+// --------------------------------------------------
+#ifndef SPP_NAMESPACE
+     #define SPP_NAMESPACE spp
+#endif
+
+#ifndef spp_
+    #define spp_ SPP_NAMESPACE
+#endif
+
+#ifndef SPP_DEFAULT_ALLOCATOR
+    #if (defined(SPP_USE_SPP_ALLOC) && SPP_USE_SPP_ALLOC) && defined(_MSC_VER)
+        // -----------------------------------------------------------------------------
+        // When building with the Microsoft compiler, we use a custom allocator because
+        // the default one fragments memory when reallocating. This is desirable only 
+        // when creating large sparsepp hash maps. If you create lots of small hash_maps,
+        // define the following before including spp.h:
+        //     #define SPP_DEFAULT_ALLOCATOR spp::libc_allocator
+        // -----------------------------------------------------------------------------
+        #define SPP_DEFAULT_ALLOCATOR spp_::spp_allocator
+        #define SPP_INCLUDE_SPP_ALLOC
+    #else
+        #define SPP_DEFAULT_ALLOCATOR spp_::libc_allocator
+    #endif
+#endif
+
+#ifndef SPP_GROUP_SIZE
+    // must be 32 or 64
+    #define SPP_GROUP_SIZE 32
+#endif
+
+#ifndef SPP_ALLOC_SZ
+    // must be power of 2 (0 = agressive alloc, 1 = smallest memory usage, 2 = good compromise)
+    #define SPP_ALLOC_SZ 0
+#endif
+
+#ifndef SPP_STORE_NUM_ITEMS
+    // 1 uses a little bit more memory, but faster!!
+    #define SPP_STORE_NUM_ITEMS 1 
+#endif
+
+
+// ---------------------------------------------------------------------------
+// Compiler detection code (SPP_ proprocessor macros) derived from Boost
+// libraries. Therefore Boost software licence reproduced below.
+// ---------------------------------------------------------------------------
+// Boost Software License - Version 1.0 - August 17th, 2003
+//
+// Permission is hereby granted, free of charge, to any person or organization
+// obtaining a copy of the software and accompanying documentation covered by
+// this license (the "Software") to use, reproduce, display, distribute,
+// execute, and transmit the Software, and to prepare derivative works of the
+// Software, and to permit third-parties to whom the Software is furnished to
+// do so, all subject to the following:
+//
+// The copyright notices in the Software and this entire statement, including
+// the above license grant, this restriction and the following disclaimer,
+// must be included in all copies of the Software, in whole or in part, and
+// all derivative works of the Software, unless such copies or derivative
+// works are solely in the form of machine-executable object code generated by
+// a source language processor.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+// ---------------------------------------------------------------------------
+
+// Boost like configuration
+// ------------------------
+#if defined __clang__
+
+    #if defined(i386)
+        #include <cpuid.h>
+        inline void spp_cpuid(int info[4], int InfoType) {
+            __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
+        }
+    #endif
+
+    #define SPP_POPCNT   __builtin_popcount
+    #define SPP_POPCNT64 __builtin_popcountll
+
+    #define SPP_HAS_CSTDINT
+
+    #ifndef __has_extension
+        #define __has_extension __has_feature
+    #endif
+
+    #if !__has_feature(cxx_exceptions) && !defined(SPP_NO_EXCEPTIONS)
+        #define SPP_NO_EXCEPTIONS
+    #endif
+
+    #if !__has_feature(cxx_rtti) && !defined(SPP_NO_RTTI)
+      #define SPP_NO_RTTI
+    #endif
+
+    #if !__has_feature(cxx_rtti) && !defined(SPP_NO_TYPEID)
+        #define SPP_NO_TYPEID
+    #endif
+
+    #if defined(__int64) && !defined(__GNUC__)
+        #define SPP_HAS_MS_INT64
+    #endif
+
+    #define SPP_HAS_NRVO
+
+    // Branch prediction hints
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_expect)
+             #define SPP_LIKELY(x) __builtin_expect(x, 1)
+             #define SPP_UNLIKELY(x) __builtin_expect(x, 0)
+        #endif
+    #endif
+
+    // Clang supports "long long" in all compilation modes.
+    #define SPP_HAS_LONG_LONG
+
+    #if !__has_feature(cxx_constexpr)
+        #define SPP_NO_CXX11_CONSTEXPR
+    #endif
+
+    #if !__has_feature(cxx_decltype)
+        #define SPP_NO_CXX11_DECLTYPE
+    #endif
+
+    #if !__has_feature(cxx_decltype_incomplete_return_types)
+        #define SPP_NO_CXX11_DECLTYPE_N3276
+    #endif
+
+    #if !__has_feature(cxx_defaulted_functions)
+        #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS
+    #endif
+
+    #if !__has_feature(cxx_deleted_functions)
+        #define SPP_NO_CXX11_DELETED_FUNCTIONS
+    #endif
+
+    #if !__has_feature(cxx_explicit_conversions)
+        #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS
+    #endif
+
+    #if !__has_feature(cxx_default_function_template_args)
+        #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS
+    #endif
+
+    #if !__has_feature(cxx_generalized_initializers)
+        #define SPP_NO_CXX11_HDR_INITIALIZER_LIST
+    #endif
+
+    #if !__has_feature(cxx_lambdas)
+        #define SPP_NO_CXX11_LAMBDAS
+    #endif
+
+    #if !__has_feature(cxx_local_type_template_args)
+        #define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS
+    #endif
+
+    #if !__has_feature(cxx_raw_string_literals)
+        #define SPP_NO_CXX11_RAW_LITERALS
+    #endif
+
+    #if !__has_feature(cxx_reference_qualified_functions)
+        #define SPP_NO_CXX11_REF_QUALIFIERS
+    #endif
+
+    #if !__has_feature(cxx_generalized_initializers)
+        #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX
+    #endif
+
+    #if !__has_feature(cxx_rvalue_references)
+        #define SPP_NO_CXX11_RVALUE_REFERENCES
+    #endif
+
+    #if !__has_feature(cxx_static_assert)
+        #define SPP_NO_CXX11_STATIC_ASSERT
+    #endif
+
+    #if !__has_feature(cxx_alias_templates)
+        #define SPP_NO_CXX11_TEMPLATE_ALIASES
+    #endif
+
+    #if !__has_feature(cxx_variadic_templates)
+        #define SPP_NO_CXX11_VARIADIC_TEMPLATES
+    #endif
+
+    #if !__has_feature(cxx_user_literals)
+        #define SPP_NO_CXX11_USER_DEFINED_LITERALS
+    #endif
+
+    #if !__has_feature(cxx_alignas)
+        #define SPP_NO_CXX11_ALIGNAS
+    #endif
+
+    #if !__has_feature(cxx_trailing_return)
+        #define SPP_NO_CXX11_TRAILING_RESULT_TYPES
+    #endif
+
+    #if !__has_feature(cxx_inline_namespaces)
+        #define SPP_NO_CXX11_INLINE_NAMESPACES
+    #endif
+
+    #if !__has_feature(cxx_override_control)
+        #define SPP_NO_CXX11_FINAL
+    #endif
+
+    #if !(__has_feature(__cxx_binary_literals__) || __has_extension(__cxx_binary_literals__))
+        #define SPP_NO_CXX14_BINARY_LITERALS
+    #endif
+
+    #if !__has_feature(__cxx_decltype_auto__)
+        #define SPP_NO_CXX14_DECLTYPE_AUTO
+    #endif
+
+    #if !__has_feature(__cxx_init_captures__)
+        #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES
+    #endif
+
+    #if !__has_feature(__cxx_generic_lambdas__)
+        #define SPP_NO_CXX14_GENERIC_LAMBDAS
+    #endif
+
+
+    #if !__has_feature(__cxx_generic_lambdas__) || !__has_feature(__cxx_relaxed_constexpr__)
+        #define SPP_NO_CXX14_CONSTEXPR
+    #endif
+
+    #if !__has_feature(__cxx_return_type_deduction__)
+        #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION
+    #endif
+
+    #if !__has_feature(__cxx_variable_templates__)
+        #define SPP_NO_CXX14_VARIABLE_TEMPLATES
+    #endif
+
+    #if __cplusplus < 201400
+        #define SPP_NO_CXX14_DIGIT_SEPARATORS
+    #endif
+
+    #if defined(__has_builtin) && __has_builtin(__builtin_unreachable)
+      #define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable();
+    #endif
+
+    #define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__))
+
+    #ifndef SPP_COMPILER
+        #define SPP_COMPILER "Clang version " __clang_version__
+    #endif
+
+    #define SPP_CLANG 1
+
+
+#elif defined __GNUC__
+
+    #define SPP_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+
+    //  definition to expand macro then apply to pragma message
+    // #define VALUE_TO_STRING(x) #x
+    // #define VALUE(x) VALUE_TO_STRING(x)
+    // #define VAR_NAME_VALUE(var) #var "="  VALUE(var)
+    // #pragma message(VAR_NAME_VALUE(SPP_GCC_VERSION))
+
+    #if defined(i386)
+        #include <cpuid.h>
+        inline void spp_cpuid(int info[4], int InfoType) {
+            __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
+        }
+    #endif
+
+    // __POPCNT__ defined when the compiled with popcount support
+    // (-mpopcnt compiler option is given for example)
+    #ifdef __POPCNT__
+        // slower unless compiled iwith -mpopcnt
+        #define SPP_POPCNT   __builtin_popcount
+        #define SPP_POPCNT64 __builtin_popcountll
+    #endif
+
+    #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L)
+        #define SPP_GCC_CXX11
+    #endif
+
+    #if __GNUC__ == 3
+        #if defined (__PATHSCALE__)
+             #define SPP_NO_TWO_PHASE_NAME_LOOKUP
+             #define SPP_NO_IS_ABSTRACT
+        #endif
+
+        #if __GNUC_MINOR__ < 4
+             #define SPP_NO_IS_ABSTRACT
+        #endif
+
+        #define SPP_NO_CXX11_EXTERN_TEMPLATE
+    #endif
+
+    #if __GNUC__ < 4
+    //
+    // All problems to gcc-3.x and earlier here:
+    //
+    #define SPP_NO_TWO_PHASE_NAME_LOOKUP
+        #ifdef __OPEN64__
+            #define SPP_NO_IS_ABSTRACT
+        #endif
+    #endif
+
+    // GCC prior to 3.4 had     #pragma once too but it didn't work well with filesystem links
+    #if SPP_GCC_VERSION >= 30400
+        #define SPP_HAS_PRAGMA_ONCE
+    #endif
+
+    #if SPP_GCC_VERSION < 40400
+        // Previous versions of GCC did not completely implement value-initialization:
+        // GCC Bug 30111, "Value-initialization of POD base class doesn't initialize
+        // members", reported by Jonathan Wakely in 2006,
+        // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30111 (fixed for GCC 4.4)
+        // GCC Bug 33916, "Default constructor fails to initialize array members",
+        // reported by Michael Elizabeth Chastain in 2007,
+        // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33916 (fixed for GCC 4.2.4)
+        // See also: http://www.boost.org/libs/utility/value_init.htm    #compiler_issues
+        #define SPP_NO_COMPLETE_VALUE_INITIALIZATION
+    #endif
+
+    #if !defined(__EXCEPTIONS) && !defined(SPP_NO_EXCEPTIONS)
+        #define SPP_NO_EXCEPTIONS
+    #endif
+
+    //
+    // Threading support: Turn this on unconditionally here (except for
+    // those platforms where we can know for sure). It will get turned off again
+    // later if no threading API is detected.
+    //
+    #if !defined(__MINGW32__) && !defined(linux) && !defined(__linux) && !defined(__linux__)
+        #define SPP_HAS_THREADS
+    #endif
+
+    //
+    // gcc has "long long"
+    // Except on Darwin with standard compliance enabled (-pedantic)
+    // Apple gcc helpfully defines this macro we can query
+    //
+    #if !defined(__DARWIN_NO_LONG_LONG)
+        #define SPP_HAS_LONG_LONG
+    #endif
+
+    //
+    // gcc implements the named return value optimization since version 3.1
+    //
+    #define SPP_HAS_NRVO
+
+    // Branch prediction hints
+    #define SPP_LIKELY(x) __builtin_expect(x, 1)
+    #define SPP_UNLIKELY(x) __builtin_expect(x, 0)
+
+    //
+    // Dynamic shared object (DSO) and dynamic-link library (DLL) support
+    //
+    #if __GNUC__ >= 4
+       #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) && !defined(__CYGWIN__)
+            // All Win32 development environments, including 64-bit Windows and MinGW, define
+            // _WIN32 or one of its variant spellings. Note that Cygwin is a POSIX environment,
+            // so does not define _WIN32 or its variants.
+            #define SPP_HAS_DECLSPEC
+            #define SPP_SYMBOL_EXPORT __attribute__((__dllexport__))
+            #define SPP_SYMBOL_IMPORT __attribute__((__dllimport__))
+       #else
+            #define SPP_SYMBOL_EXPORT __attribute__((__visibility__("default")))
+            #define SPP_SYMBOL_IMPORT
+       #endif
+
+       #define SPP_SYMBOL_VISIBLE __attribute__((__visibility__("default")))
+    #else
+       // config/platform/win32.hpp will define SPP_SYMBOL_EXPORT, etc., unless already defined
+       #define SPP_SYMBOL_EXPORT
+    #endif
+
+    //
+    // RTTI and typeinfo detection is possible post gcc-4.3:
+    //
+    #if SPP_GCC_VERSION > 40300
+        #ifndef __GXX_RTTI
+            #ifndef SPP_NO_TYPEID
+                #define SPP_NO_TYPEID
+            #endif
+            #ifndef SPP_NO_RTTI
+                #define SPP_NO_RTTI
+            #endif
+        #endif
+    #endif
+
+    //
+    // Recent GCC versions have __int128 when in 64-bit mode.
+    //
+    // We disable this if the compiler is really nvcc with C++03 as it
+    // doesn't actually support __int128 as of CUDA_VERSION=7500
+    // even though it defines __SIZEOF_INT128__.
+    // See https://svn.boost.org/trac/boost/ticket/8048
+    //     https://svn.boost.org/trac/boost/ticket/11852
+    // Only re-enable this for nvcc if you're absolutely sure
+    // of the circumstances under which it's supported:
+    //
+    #if defined(__CUDACC__)
+        #if defined(SPP_GCC_CXX11)
+            #define SPP_NVCC_CXX11
+        #else
+            #define SPP_NVCC_CXX03
+        #endif
+    #endif
+
+    #if defined(__SIZEOF_INT128__) && !defined(SPP_NVCC_CXX03)
+        #define SPP_HAS_INT128
+    #endif
+    //
+    // Recent GCC versions have a __float128 native type, we need to
+    // include a std lib header to detect this - not ideal, but we'll
+    // be including <cstddef> later anyway when we select the std lib.
+    //
+    // Nevertheless, as of CUDA 7.5, using __float128 with the host
+    // compiler in pre-C++11 mode is still not supported.
+    // See https://svn.boost.org/trac/boost/ticket/11852
+    //
+    #ifdef __cplusplus
+        #include <cstddef>
+    #else
+        #include <stddef.h>
+    #endif
+
+    #if defined(_GLIBCXX_USE_FLOAT128) && !defined(__STRICT_ANSI__) && !defined(SPP_NVCC_CXX03)
+         #define SPP_HAS_FLOAT128
+    #endif
+
+    // C++0x features in 4.3.n and later
+    //
+    #if (SPP_GCC_VERSION >= 40300) && defined(SPP_GCC_CXX11)
+       // C++0x features are only enabled when -std=c++0x or -std=gnu++0x are
+       // passed on the command line, which in turn defines
+       // __GXX_EXPERIMENTAL_CXX0X__.
+       #define SPP_HAS_DECLTYPE
+       #define SPP_HAS_RVALUE_REFS
+       #define SPP_HAS_STATIC_ASSERT
+       #define SPP_HAS_VARIADIC_TMPL
+       #define SPP_HAS_CSTDINT
+    #else
+       #define SPP_NO_CXX11_DECLTYPE
+       #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS
+       #define SPP_NO_CXX11_RVALUE_REFERENCES
+       #define SPP_NO_CXX11_STATIC_ASSERT
+    #endif
+
+    // C++0x features in 4.4.n and later
+    //
+    #if (SPP_GCC_VERSION < 40400) || !defined(SPP_GCC_CXX11)
+       #define SPP_NO_CXX11_AUTO_DECLARATIONS
+       #define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS
+       #define SPP_NO_CXX11_CHAR16_T
+       #define SPP_NO_CXX11_CHAR32_T
+       #define SPP_NO_CXX11_HDR_INITIALIZER_LIST
+       #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS
+       #define SPP_NO_CXX11_DELETED_FUNCTIONS
+       #define SPP_NO_CXX11_TRAILING_RESULT_TYPES
+       #define SPP_NO_CXX11_INLINE_NAMESPACES
+       #define SPP_NO_CXX11_VARIADIC_TEMPLATES
+    #endif
+
+    #if SPP_GCC_VERSION < 40500
+       #define SPP_NO_SFINAE_EXPR
+    #endif
+
+    // GCC 4.5 forbids declaration of defaulted functions in private or protected sections
+    #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 5) || !defined(SPP_GCC_CXX11)
+       #define SPP_NO_CXX11_NON_PUBLIC_DEFAULTED_FUNCTIONS
+    #endif
+
+    // C++0x features in 4.5.0 and later
+    //
+    #if (SPP_GCC_VERSION < 40500) || !defined(SPP_GCC_CXX11)
+       #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS
+       #define SPP_NO_CXX11_LAMBDAS
+       #define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS
+       #define SPP_NO_CXX11_RAW_LITERALS
+    #endif
+
+    // C++0x features in 4.6.n and later
+    //
+    #if (SPP_GCC_VERSION < 40600) || !defined(SPP_GCC_CXX11)
+        #define SPP_NO_CXX11_CONSTEXPR
+        #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX
+    #endif
+
+    // C++0x features in 4.7.n and later
+    //
+    #if (SPP_GCC_VERSION < 40700) || !defined(SPP_GCC_CXX11)
+        #define SPP_NO_CXX11_FINAL
+        #define SPP_NO_CXX11_TEMPLATE_ALIASES
+        #define SPP_NO_CXX11_USER_DEFINED_LITERALS
+        #define SPP_NO_CXX11_FIXED_LENGTH_VARIADIC_TEMPLATE_EXPANSION_PACKS
+    #endif
+
+    // C++0x features in 4.8.n and later
+    //
+    #if (SPP_GCC_VERSION < 40800) || !defined(SPP_GCC_CXX11)
+        #define SPP_NO_CXX11_ALIGNAS
+    #endif
+
+    // C++0x features in 4.8.1 and later
+    //
+    #if (SPP_GCC_VERSION < 40801) || !defined(SPP_GCC_CXX11)
+        #define SPP_NO_CXX11_DECLTYPE_N3276
+        #define SPP_NO_CXX11_REF_QUALIFIERS
+        #define SPP_NO_CXX14_BINARY_LITERALS
+    #endif
+
+    // C++14 features in 4.9.0 and later
+    //
+    #if (SPP_GCC_VERSION < 40900) || (__cplusplus < 201300)
+        #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION
+        #define SPP_NO_CXX14_GENERIC_LAMBDAS
+        #define SPP_NO_CXX14_DIGIT_SEPARATORS
+        #define SPP_NO_CXX14_DECLTYPE_AUTO
+        #if !((SPP_GCC_VERSION >= 40801) && (SPP_GCC_VERSION < 40900) && defined(SPP_GCC_CXX11))
+            #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES
+        #endif
+    #endif
+
+
+    // C++ 14:
+    #if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304)
+        #define SPP_NO_CXX14_CONSTEXPR
+    #endif
+    #if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304)
+        #define SPP_NO_CXX14_VARIABLE_TEMPLATES
+    #endif
+
+    //
+    // Unused attribute:
+    #if __GNUC__ >= 4
+        #define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__))
+    #endif
+    //
+    // __builtin_unreachable:
+    #if SPP_GCC_VERSION >= 40800
+        #define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable();
+    #endif
+
+    #ifndef SPP_COMPILER
+        #define SPP_COMPILER "GNU C++ version " __VERSION__
+    #endif
+
+    // ConceptGCC compiler:
+    //   http://www.generic-programming.org/software/ConceptGCC/
+    #ifdef __GXX_CONCEPTS__
+        #define SPP_HAS_CONCEPTS
+        #define SPP_COMPILER "ConceptGCC version " __VERSION__
+    #endif
+
+#elif defined _MSC_VER
+
+    #include <intrin.h>                     // for __popcnt()
+
+    #define SPP_POPCNT_CHECK  // slower when defined, but we have to check!
+    #define spp_cpuid(info, x)    __cpuid(info, x)
+
+    #define SPP_POPCNT __popcnt
+    #if (SPP_GROUP_SIZE == 64 && INTPTR_MAX == INT64_MAX)
+        #define SPP_POPCNT64 __popcnt64
+    #endif
+
+    // Attempt to suppress VC6 warnings about the length of decorated names (obsolete):
+    #pragma warning( disable : 4503 ) // warning: decorated name length exceeded
+
+    #define SPP_HAS_PRAGMA_ONCE
+    #define SPP_HAS_CSTDINT
+
+   //
+    // versions check:
+    // we don't support Visual C++ prior to version 7.1:
+    #if _MSC_VER < 1310
+        #error "Antique compiler not supported"
+    #endif
+
+    #if _MSC_FULL_VER < 180020827
+        #define SPP_NO_FENV_H
+    #endif
+
+    #if _MSC_VER < 1400
+        // although a conforming signature for swprint exists in VC7.1
+        // it appears not to actually work:
+        #define SPP_NO_SWPRINTF
+
+        // Our extern template tests also fail for this compiler:
+        #define SPP_NO_CXX11_EXTERN_TEMPLATE
+
+        // Variadic macros do not exist for VC7.1 and lower
+        #define SPP_NO_CXX11_VARIADIC_MACROS
+    #endif
+
+    #if _MSC_VER < 1500  // 140X == VC++ 8.0
+        #undef SPP_HAS_CSTDINT
+        #define SPP_NO_MEMBER_TEMPLATE_FRIENDS
+    #endif
+
+    #if _MSC_VER < 1600  // 150X == VC++ 9.0
+        // A bug in VC9:
+        #define SPP_NO_ADL_BARRIER
+    #endif
+
+
+    // MSVC (including the latest checked version) has not yet completely
+    // implemented value-initialization, as is reported:
+    // "VC++ does not value-initialize members of derived classes without
+    // user-declared constructor", reported in 2009 by Sylvester Hesp:
+    // https:    //connect.microsoft.com/VisualStudio/feedback/details/484295
+    // "Presence of copy constructor breaks member class initialization",
+    // reported in 2009 by Alex Vakulenko:
+    // https:    //connect.microsoft.com/VisualStudio/feedback/details/499606
+    // "Value-initialization in new-expression", reported in 2005 by
+    // Pavel Kuznetsov (MetaCommunications Engineering):
+    // https:    //connect.microsoft.com/VisualStudio/feedback/details/100744
+    // See also: http:    //www.boost.org/libs/utility/value_init.htm    #compiler_issues
+    // (Niels Dekker, LKEB, May 2010)
+    #define SPP_NO_COMPLETE_VALUE_INITIALIZATION
+
+    #ifndef _NATIVE_WCHAR_T_DEFINED
+        #define SPP_NO_INTRINSIC_WCHAR_T
+    #endif
+
+    //
+    // check for exception handling support:
+    #if !defined(_CPPUNWIND) && !defined(SPP_NO_EXCEPTIONS)
+        #define SPP_NO_EXCEPTIONS
+    #endif
+
+    //
+    // __int64 support:
+    //
+    #define SPP_HAS_MS_INT64
+    #if defined(_MSC_EXTENSIONS) || (_MSC_VER >= 1400)
+        #define SPP_HAS_LONG_LONG
+    #else
+        #define SPP_NO_LONG_LONG
+    #endif
+
+    #if (_MSC_VER >= 1400) && !defined(_DEBUG)
+        #define SPP_HAS_NRVO
+    #endif
+
+    #if _MSC_VER >= 1500  // 150X == VC++ 9.0
+        #define SPP_HAS_PRAGMA_DETECT_MISMATCH
+    #endif
+
+    //
+    // disable Win32 API's if compiler extensions are
+    // turned off:
+    //
+    #if !defined(_MSC_EXTENSIONS) && !defined(SPP_DISABLE_WIN32)
+        #define SPP_DISABLE_WIN32
+    #endif
+
+    #if !defined(_CPPRTTI) && !defined(SPP_NO_RTTI)
+        #define SPP_NO_RTTI
+    #endif
+
+    //
+    // TR1 features:
+    //
+    #if _MSC_VER >= 1700
+        //      #define SPP_HAS_TR1_HASH	// don't know if this is true yet.
+        //      #define SPP_HAS_TR1_TYPE_TRAITS	// don't know if this is true yet.
+        #define SPP_HAS_TR1_UNORDERED_MAP
+        #define SPP_HAS_TR1_UNORDERED_SET
+    #endif
+
+    //
+    // C++0x features
+    //
+    //   See above for SPP_NO_LONG_LONG
+
+    // C++ features supported by VC++ 10 (aka 2010)
+    //
+    #if _MSC_VER < 1600
+        #define SPP_NO_CXX11_AUTO_DECLARATIONS
+        #define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS
+        #define SPP_NO_CXX11_LAMBDAS
+        #define SPP_NO_CXX11_RVALUE_REFERENCES
+        #define SPP_NO_CXX11_STATIC_ASSERT
+        #define SPP_NO_CXX11_DECLTYPE
+    #endif // _MSC_VER < 1600
+
+    #if _MSC_VER >= 1600
+        #define SPP_HAS_STDINT_H
+    #endif
+
+    // C++11 features supported by VC++ 11 (aka 2012)
+    //
+    #if _MSC_VER < 1700
+        #define SPP_NO_CXX11_FINAL
+    #endif // _MSC_VER < 1700
+
+    // C++11 features supported by VC++ 12 (aka 2013).
+    //
+    #if _MSC_FULL_VER < 180020827
+        #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS
+        #define SPP_NO_CXX11_DELETED_FUNCTIONS
+        #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS
+        #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS
+        #define SPP_NO_CXX11_RAW_LITERALS
+        #define SPP_NO_CXX11_TEMPLATE_ALIASES
+        #define SPP_NO_CXX11_TRAILING_RESULT_TYPES
+        #define SPP_NO_CXX11_VARIADIC_TEMPLATES
+        #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX
+        #define SPP_NO_CXX11_DECLTYPE_N3276
+    #endif
+
+    // C++11 features supported by VC++ 14 (aka 2014) CTP1
+    #if (_MSC_FULL_VER < 190021730)
+        #define SPP_NO_CXX11_REF_QUALIFIERS
+        #define SPP_NO_CXX11_USER_DEFINED_LITERALS
+        #define SPP_NO_CXX11_ALIGNAS
+        #define SPP_NO_CXX11_INLINE_NAMESPACES
+        #define SPP_NO_CXX14_DECLTYPE_AUTO
+        #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES
+        #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION
+        #define SPP_NO_CXX11_HDR_INITIALIZER_LIST
+    #endif
+
+    // C++11 features not supported by any versions
+    #define SPP_NO_CXX11_CHAR16_T
+    #define SPP_NO_CXX11_CHAR32_T
+    #define SPP_NO_CXX11_CONSTEXPR
+    #define SPP_NO_SFINAE_EXPR
+    #define SPP_NO_TWO_PHASE_NAME_LOOKUP
+
+    // C++ 14:
+    #if !defined(__cpp_binary_literals) || (__cpp_binary_literals < 201304)
+        #define SPP_NO_CXX14_BINARY_LITERALS
+    #endif
+
+    #if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304)
+        #define SPP_NO_CXX14_CONSTEXPR
+    #endif
+
+    #if (__cplusplus < 201304) // There's no SD6 check for this....
+        #define SPP_NO_CXX14_DIGIT_SEPARATORS
+    #endif
+
+    #if !defined(__cpp_generic_lambdas) || (__cpp_generic_lambdas < 201304)
+        #define SPP_NO_CXX14_GENERIC_LAMBDAS
+    #endif
+
+    #if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304)
+         #define SPP_NO_CXX14_VARIABLE_TEMPLATES
+    #endif
+
+#endif
+
+// from boost/config/suffix.hpp
+// ----------------------------
+#ifndef SPP_ATTRIBUTE_UNUSED
+    #define SPP_ATTRIBUTE_UNUSED
+#endif
+
+/*
+  Try to persuade compilers to inline. 
+*/
+#ifndef SPP_FORCEINLINE
+    #if defined(__GNUC__)
+        #define SPP_FORCEINLINE __inline __attribute__ ((always_inline))
+    #elif defined(_MSC_VER)
+        #define SPP_FORCEINLINE __forceinline
+    #else
+        #define SPP_FORCEINLINE inline
+    #endif
+#endif
+
+
+#endif // spp_config_h_guard
diff --git a/benchmarks/others/sparsepp/spp_dlalloc.h b/benchmarks/others/sparsepp/spp_dlalloc.h
new file mode 100644
index 00000000..f88aab7c
--- /dev/null
+++ b/benchmarks/others/sparsepp/spp_dlalloc.h
@@ -0,0 +1,4044 @@
+#ifndef spp_dlalloc__h_
+#define spp_dlalloc__h_
+
+/* This is a C++ allocator created from Doug Lea's dlmalloc
+   (Version 2.8.6 Wed Aug 29 06:57:58 2012)
+   see: http://g.oswego.edu/dl/html/malloc.html
+*/
+
+#include "spp_utils.h"
+#include "spp_smartptr.h"
+
+
+#ifndef SPP_FORCEINLINE
+    #if defined(__GNUC__)
+        #define SPP_FORCEINLINE __inline __attribute__ ((always_inline))
+    #elif defined(_MSC_VER)
+        #define SPP_FORCEINLINE __forceinline
+    #else
+        #define SPP_FORCEINLINE inline
+    #endif
+#endif
+
+
+#ifndef SPP_IMPL
+    #define SPP_IMPL SPP_FORCEINLINE
+#endif
+
+#ifndef SPP_API
+    #define SPP_API  static
+#endif
+
+
+namespace spp
+{
+    // ---------------------- allocator internal API -----------------------
+    typedef void* mspace;
+
+    /*
+      create_mspace creates and returns a new independent space with the
+      given initial capacity, or, if 0, the default granularity size.  It
+      returns null if there is no system memory available to create the
+      space.  If argument locked is non-zero, the space uses a separate
+      lock to control access. The capacity of the space will grow
+      dynamically as needed to service mspace_malloc requests.  You can
+      control the sizes of incremental increases of this space by
+      compiling with a different SPP_DEFAULT_GRANULARITY or dynamically
+      setting with mallopt(M_GRANULARITY, value).
+    */
+    SPP_API mspace create_mspace(size_t capacity, int locked);
+    SPP_API size_t destroy_mspace(mspace msp);
+    SPP_API void*  mspace_malloc(mspace msp, size_t bytes);
+    SPP_API void   mspace_free(mspace msp, void* mem);
+    SPP_API void*  mspace_realloc(mspace msp, void* mem, size_t newsize);
+
+#if 0
+    SPP_API mspace create_mspace_with_base(void* base, size_t capacity, int locked);
+    SPP_API int    mspace_track_large_chunks(mspace msp, int enable);
+    SPP_API void*  mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
+    SPP_API void*  mspace_memalign(mspace msp, size_t alignment, size_t bytes);
+    SPP_API void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                             size_t elem_size, void* chunks[]);
+    SPP_API void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                               size_t sizes[], void* chunks[]);
+    SPP_API size_t mspace_footprint(mspace msp);
+    SPP_API size_t mspace_max_footprint(mspace msp);
+    SPP_API size_t mspace_usable_size(const void* mem);
+    SPP_API int    mspace_trim(mspace msp, size_t pad);
+    SPP_API int    mspace_mallopt(int, int);
+#endif
+
+    // -----------------------------------------------------------
+    // -----------------------------------------------------------
+    struct MSpace : public spp_rc
+    {
+        MSpace() :
+            _sp(create_mspace(0, 0))
+        {}
+
+        ~MSpace()
+        {
+            destroy_mspace(_sp);
+        }
+
+        mspace _sp;
+    };
+
+    // -----------------------------------------------------------
+    // -----------------------------------------------------------
+    template<class T>
+    class spp_allocator
+    {
+    public:
+        typedef T         value_type;
+        typedef T*        pointer;
+        typedef ptrdiff_t difference_type;
+        typedef const T*  const_pointer;
+        typedef size_t    size_type;
+
+        MSpace *getSpace() const { return _space.get(); }
+
+        spp_allocator() : _space(new MSpace) {}
+        
+        template<class U>
+        spp_allocator(const spp_allocator<U> &o) : _space(o.getSpace()) {}
+
+        template<class U>
+        spp_allocator& operator=(const spp_allocator<U> &o) 
+        {
+            if (&o != this)
+                _space = o.getSpace();
+            return *this;
+        }
+
+        void swap(spp_allocator &o)
+        {
+            std::swap(_space, o._space);
+        }
+
+        pointer allocate(size_t n, const_pointer  /* unused */ = 0)
+        {
+            pointer res = static_cast<pointer>(mspace_malloc(_space->_sp, n * sizeof(T)));
+            if (!res)
+                throw std::bad_alloc();
+            return res;
+        }
+
+        void deallocate(pointer p, size_t /* unused */)
+        {
+            mspace_free(_space->_sp, p);
+        }
+
+        pointer reallocate(pointer p, size_t new_size)
+        {
+            pointer res = static_cast<pointer>(mspace_realloc(_space->_sp, p, new_size * sizeof(T)));
+            if (!res)
+                throw std::bad_alloc();
+            return res;
+        }
+
+        pointer reallocate(pointer p, size_type /* old_size */, size_t new_size)
+        {
+            return reallocate(p, new_size);
+        }
+        
+        size_type max_size() const
+        {
+            return static_cast<size_type>(-1) / sizeof(value_type);
+        }
+
+        void construct(pointer p, const value_type& val)
+        {
+            new (p) value_type(val);
+        }
+
+        void destroy(pointer p) { p->~value_type(); }
+
+        template<class U>
+        struct rebind
+        {
+            // rebind to libc_allocator because we want to use malloc_inspect_all in destructive_iterator 
+            // to reduce peak memory usage (we don't want <group_items> mixed with value_type when 
+            // we traverse the allocated memory).
+            typedef spp::spp_allocator<U> other;
+        };
+
+        mspace space() const { return _space->_sp; }
+
+        // check if we can clear the whole allocator memory at once => works only if the allocator 
+        // is not be shared. If can_clear() returns true, we expect that the next allocator call
+        // will be clear() - not allocate() or deallocate()
+        bool can_clear()
+        {
+            assert(!_space_to_clear);
+            _space_to_clear.reset();
+            _space_to_clear.swap(_space);
+            if (_space_to_clear->count() == 1)
+                return true;
+            else
+                _space_to_clear.swap(_space);
+            return false;
+        }
+
+        void clear()
+        {
+            assert(!_space && !!_space_to_clear);
+            _space_to_clear.reset();
+            _space = new MSpace;
+        }
+        
+    private:
+        spp_sptr<MSpace> _space;
+        spp_sptr<MSpace> _space_to_clear;
+    };
+}
+
+
+// allocators are "equal" whenever memory allocated with one can be deallocated with the other
+template<class T>
+inline bool operator==(const spp_::spp_allocator<T> &a, const spp_::spp_allocator<T> &b)
+{
+    return a.space() == b.space();
+}
+
+template<class T>
+inline bool operator!=(const spp_::spp_allocator<T> &a, const spp_::spp_allocator<T> &b)
+{
+    return !(a == b);
+}
+
+namespace std
+{
+    template <class T>
+    inline void swap(spp_::spp_allocator<T> &a, spp_::spp_allocator<T> &b)
+    {
+        a.swap(b);
+    }
+}
+
+#if !defined(SPP_EXCLUDE_IMPLEMENTATION)
+
+#ifndef WIN32
+    #ifdef _WIN32
+        #define WIN32 1
+    #endif
+    #ifdef _WIN32_WCE
+        #define SPP_LACKS_FCNTL_H
+        #define WIN32 1
+    #endif
+#endif
+
+#ifdef WIN32
+    #define WIN32_LEAN_AND_MEAN
+    #include <windows.h>
+    #include <tchar.h>
+    #define SPP_HAVE_MMAP 1
+    #define SPP_LACKS_UNISTD_H
+    #define SPP_LACKS_SYS_PARAM_H
+    #define SPP_LACKS_SYS_MMAN_H
+    #define SPP_LACKS_STRING_H
+    #define SPP_LACKS_STRINGS_H
+    #define SPP_LACKS_SYS_TYPES_H
+    #define SPP_LACKS_ERRNO_H
+    #define SPP_LACKS_SCHED_H
+    #ifndef SPP_MALLOC_FAILURE_ACTION
+        #define SPP_MALLOC_FAILURE_ACTION
+    #endif
+    #ifndef SPP_MMAP_CLEARS
+        #ifdef _WIN32_WCE /* WINCE reportedly does not clear */
+            #define SPP_MMAP_CLEARS 0
+        #else
+            #define SPP_MMAP_CLEARS 1
+        #endif
+    #endif
+#endif
+
+#if defined(DARWIN) || defined(_DARWIN)
+    #define SPP_HAVE_MMAP 1
+    /* OSX allocators provide 16 byte alignment */
+    #ifndef SPP_MALLOC_ALIGNMENT
+        #define SPP_MALLOC_ALIGNMENT ((size_t)16U)
+    #endif
+#endif
+
+#ifndef SPP_LACKS_SYS_TYPES_H
+    #include <sys/types.h>  /* For size_t */
+#endif
+
+#ifndef SPP_MALLOC_ALIGNMENT
+    #define SPP_MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
+#endif
+
+/* ------------------- size_t and alignment properties -------------------- */
+static const size_t spp_max_size_t = ~(size_t)0;
+static const size_t spp_size_t_bitsize = sizeof(size_t) << 3;
+static const size_t spp_half_max_size_t = spp_max_size_t / 2U;
+static const size_t spp_chunk_align_mask = SPP_MALLOC_ALIGNMENT - 1;
+
+#if defined(SPP_DEBUG) || !defined(NDEBUG)
+static bool spp_is_aligned(void *p) { return ((size_t)p & spp_chunk_align_mask) == 0; }
+#endif
+
+// the number of bytes to offset an address to align it
+static size_t align_offset(void *p)
+{
+    return (((size_t)p & spp_chunk_align_mask) == 0) ? 0 :
+           ((SPP_MALLOC_ALIGNMENT - ((size_t)p & spp_chunk_align_mask)) & spp_chunk_align_mask);
+}
+
+
+#ifndef SPP_FOOTERS
+    #define SPP_FOOTERS 0
+#endif
+
+#ifndef SPP_ABORT
+    #define SPP_ABORT  abort()
+#endif
+
+#ifndef SPP_ABORT_ON_ASSERT_FAILURE
+    #define SPP_ABORT_ON_ASSERT_FAILURE 1
+#endif
+
+#ifndef SPP_PROCEED_ON_ERROR
+    #define SPP_PROCEED_ON_ERROR 0
+#endif
+
+#ifndef SPP_INSECURE
+    #define SPP_INSECURE 0
+#endif
+
+#ifndef SPP_MALLOC_INSPECT_ALL
+    #define SPP_MALLOC_INSPECT_ALL 0
+#endif
+
+#ifndef SPP_HAVE_MMAP
+    #define SPP_HAVE_MMAP 1
+#endif
+
+#ifndef SPP_MMAP_CLEARS
+    #define SPP_MMAP_CLEARS 1
+#endif
+
+#ifndef SPP_HAVE_MREMAP
+    #ifdef linux
+        #define SPP_HAVE_MREMAP 1
+        #ifndef _GNU_SOURCE
+            #define _GNU_SOURCE /* Turns on mremap() definition */
+        #endif
+    #else
+        #define SPP_HAVE_MREMAP 0
+    #endif
+#endif
+
+#ifndef SPP_MALLOC_FAILURE_ACTION
+    // ENOMEM = 12
+    #define SPP_MALLOC_FAILURE_ACTION  errno = 12
+#endif
+
+
+#ifndef SPP_DEFAULT_GRANULARITY
+    #if defined(WIN32)
+        #define SPP_DEFAULT_GRANULARITY (0)  /* 0 means to compute in init_mparams */
+    #else
+        #define SPP_DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+    #endif
+#endif
+
+#ifndef SPP_DEFAULT_TRIM_THRESHOLD
+    #define SPP_DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#endif
+
+#ifndef SPP_DEFAULT_MMAP_THRESHOLD
+    #if SPP_HAVE_MMAP
+        #define SPP_DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+    #else
+        #define SPP_DEFAULT_MMAP_THRESHOLD spp_max_size_t
+    #endif
+#endif
+
+#ifndef SPP_MAX_RELEASE_CHECK_RATE
+    #if SPP_HAVE_MMAP
+        #define SPP_MAX_RELEASE_CHECK_RATE 4095
+    #else
+        #define SPP_MAX_RELEASE_CHECK_RATE spp_max_size_t
+    #endif
+#endif
+
+#ifndef SPP_USE_BUILTIN_FFS
+    #define SPP_USE_BUILTIN_FFS 0
+#endif
+
+#ifndef SPP_USE_DEV_RANDOM
+    #define SPP_USE_DEV_RANDOM 0
+#endif
+
+#ifndef SPP_NO_SEGMENT_TRAVERSAL
+    #define SPP_NO_SEGMENT_TRAVERSAL 0
+#endif
+
+
+
+/*------------------------------ internal #includes ---------------------- */
+
+#ifdef _MSC_VER
+    #pragma warning( disable : 4146 ) /* no "unsigned" warnings */
+#endif
+#ifndef SPP_LACKS_ERRNO_H
+    #include <errno.h>       /* for SPP_MALLOC_FAILURE_ACTION */
+#endif
+
+#ifdef SPP_DEBUG
+    #if SPP_ABORT_ON_ASSERT_FAILURE
+        #undef assert
+        #define assert(x) if(!(x)) SPP_ABORT
+    #else
+        #include <assert.h>
+    #endif
+#else
+    #ifndef assert
+        #define assert(x)
+    #endif
+    #define SPP_DEBUG 0
+#endif
+
+#if !defined(WIN32) && !defined(SPP_LACKS_TIME_H)
+    #include <time.h>        /* for magic initialization */
+#endif
+
+#ifndef SPP_LACKS_STDLIB_H
+    #include <stdlib.h>      /* for abort() */
+#endif
+
+#ifndef SPP_LACKS_STRING_H
+    #include <string.h>      /* for memset etc */
+#endif
+
+#if SPP_USE_BUILTIN_FFS
+    #ifndef SPP_LACKS_STRINGS_H
+        #include <strings.h>     /* for ffs */
+    #endif
+#endif
+
+#if SPP_HAVE_MMAP
+    #ifndef SPP_LACKS_SYS_MMAN_H
+        /* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
+        #if (defined(linux) && !defined(__USE_GNU))
+            #define __USE_GNU 1
+            #include <sys/mman.h>    /* for mmap */
+            #undef __USE_GNU
+        #else
+            #include <sys/mman.h>    /* for mmap */
+        #endif
+    #endif
+    #ifndef SPP_LACKS_FCNTL_H
+        #include <fcntl.h>
+    #endif
+#endif
+
+#ifndef SPP_LACKS_UNISTD_H
+    #include <unistd.h>     /* for sbrk, sysconf */
+#else
+    #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+        extern void*     sbrk(ptrdiff_t);
+    #endif
+#endif
+
+#include <new>
+
+namespace spp
+{
+
+/* Declarations for bit scanning on win32 */
+#if defined(_MSC_VER) && _MSC_VER>=1300
+    #ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
+        extern "C" {
+            unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
+            unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
+        }
+        
+        #define BitScanForward _BitScanForward
+        #define BitScanReverse _BitScanReverse
+        #pragma intrinsic(_BitScanForward)
+        #pragma intrinsic(_BitScanReverse)
+    #endif /* BitScanForward */
+#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
+
+#ifndef WIN32
+    #ifndef malloc_getpagesize
+        #ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */
+            #ifndef _SC_PAGE_SIZE
+                #define _SC_PAGE_SIZE _SC_PAGESIZE
+            #endif
+        #endif
+        #ifdef _SC_PAGE_SIZE
+            #define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
+        #else
+            #if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+                extern size_t getpagesize();
+                #define malloc_getpagesize getpagesize()
+            #else
+                #ifdef WIN32 /* use supplied emulation of getpagesize */
+                    #define malloc_getpagesize getpagesize()
+                #else
+                    #ifndef SPP_LACKS_SYS_PARAM_H
+                        #include <sys/param.h>
+                    #endif
+                    #ifdef EXEC_PAGESIZE
+                        #define malloc_getpagesize EXEC_PAGESIZE
+                    #else
+                        #ifdef NBPG
+                            #ifndef CLSIZE
+                                #define malloc_getpagesize NBPG
+                            #else
+                                #define malloc_getpagesize (NBPG * CLSIZE)
+                            #endif
+                        #else
+                            #ifdef NBPC
+                                #define malloc_getpagesize NBPC
+                            #else
+                                #ifdef PAGESIZE
+                                    #define malloc_getpagesize PAGESIZE
+                                #else /* just guess */
+                                    #define malloc_getpagesize ((size_t)4096U)
+                                #endif
+                            #endif
+                        #endif
+                    #endif
+                #endif
+            #endif
+        #endif
+    #endif
+#endif
+
+/* -------------------------- MMAP preliminaries ------------------------- */
+
+/*
+   If SPP_HAVE_MORECORE or SPP_HAVE_MMAP are false, we just define calls and
+   checks to fail so compiler optimizer can delete code rather than
+   using so many "#if"s.
+*/
+
+
+/* MMAP must return mfail on failure */
+static void *mfail  = (void*)spp_max_size_t;
+static char *cmfail = (char*)mfail;
+
+#if SPP_HAVE_MMAP
+
+#ifndef WIN32
+    #define SPP_MUNMAP_DEFAULT(a, s)  munmap((a), (s))
+    #define SPP_MMAP_PROT            (PROT_READ | PROT_WRITE)
+    #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+        #define MAP_ANONYMOUS        MAP_ANON
+    #endif
+    
+    #ifdef MAP_ANONYMOUS
+        #define SPP_MMAP_FLAGS           (MAP_PRIVATE | MAP_ANONYMOUS)
+        #define SPP_MMAP_DEFAULT(s)       mmap(0, (s), SPP_MMAP_PROT, SPP_MMAP_FLAGS, -1, 0)
+    #else /* MAP_ANONYMOUS */
+        /*
+           Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+           is unlikely to be needed, but is supplied just in case.
+        */
+        #define SPP_MMAP_FLAGS           (MAP_PRIVATE)
+        static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
+        void SPP_MMAP_DEFAULT(size_t s)
+        {
+            if (dev_zero_fd < 0)
+                dev_zero_fd = open("/dev/zero", O_RDWR);
+            mmap(0, s, SPP_MMAP_PROT, SPP_MMAP_FLAGS, dev_zero_fd, 0);
+        }
+    #endif /* MAP_ANONYMOUS */
+    
+    #define SPP_DIRECT_MMAP_DEFAULT(s) SPP_MMAP_DEFAULT(s)
+    
+#else /* WIN32 */
+    
+    /* Win32 MMAP via VirtualAlloc */
+    static SPP_FORCEINLINE void* win32mmap(size_t size)
+    {
+        void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+        return (ptr != 0) ? ptr : mfail;
+    }
+    
+    /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+    static SPP_FORCEINLINE void* win32direct_mmap(size_t size)
+    {
+        void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN,
+                                 PAGE_READWRITE);
+        return (ptr != 0) ? ptr : mfail;
+    }
+    
+    /* This function supports releasing coalesed segments */
+    static SPP_FORCEINLINE int win32munmap(void* ptr, size_t size)
+    {
+        MEMORY_BASIC_INFORMATION minfo;
+        char* cptr = (char*)ptr;
+        while (size)
+        {
+            if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+                return -1;
+            if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+                    minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+                return -1;
+            if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+                return -1;
+            cptr += minfo.RegionSize;
+            size -= minfo.RegionSize;
+        }
+        return 0;
+    }
+    
+    #define SPP_MMAP_DEFAULT(s)             win32mmap(s)
+    #define SPP_MUNMAP_DEFAULT(a, s)        win32munmap((a), (s))
+    #define SPP_DIRECT_MMAP_DEFAULT(s)      win32direct_mmap(s)
+#endif /* WIN32 */
+#endif /* SPP_HAVE_MMAP */
+
+#if SPP_HAVE_MREMAP
+    #ifndef WIN32
+        #define SPP_MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+    #endif
+#endif
+
+/**
+ * Define SPP_CALL_MMAP/SPP_CALL_MUNMAP/SPP_CALL_DIRECT_MMAP
+ */
+#if SPP_HAVE_MMAP
+    #define USE_MMAP_BIT                1
+
+    #ifdef SPP_MMAP
+        #define SPP_CALL_MMAP(s)        SPP_MMAP(s)
+    #else
+        #define SPP_CALL_MMAP(s)        SPP_MMAP_DEFAULT(s)
+    #endif
+
+    #ifdef SPP_MUNMAP
+        #define SPP_CALL_MUNMAP(a, s)   SPP_MUNMAP((a), (s))
+    #else
+        #define SPP_CALL_MUNMAP(a, s)   SPP_MUNMAP_DEFAULT((a), (s))
+    #endif
+
+    #ifdef SPP_DIRECT_MMAP
+        #define SPP_CALL_DIRECT_MMAP(s) SPP_DIRECT_MMAP(s)
+    #else
+        #define SPP_CALL_DIRECT_MMAP(s) SPP_DIRECT_MMAP_DEFAULT(s)
+    #endif
+
+#else  /* SPP_HAVE_MMAP */
+    #define USE_MMAP_BIT            0
+
+    #define SPP_MMAP(s)                 mfail
+    #define SPP_MUNMAP(a, s)            (-1)
+    #define SPP_DIRECT_MMAP(s)          mfail
+    #define SPP_CALL_DIRECT_MMAP(s)     SPP_DIRECT_MMAP(s)
+    #define SPP_CALL_MMAP(s)            SPP_MMAP(s)
+    #define SPP_CALL_MUNMAP(a, s)       SPP_MUNMAP((a), (s))
+#endif
+
+/**
+ * Define SPP_CALL_MREMAP
+ */
+#if SPP_HAVE_MMAP && SPP_HAVE_MREMAP
+    #ifdef MREMAP
+        #define SPP_CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
+    #else
+        #define SPP_CALL_MREMAP(addr, osz, nsz, mv) SPP_MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+    #endif
+#else
+    #define SPP_CALL_MREMAP(addr, osz, nsz, mv)     mfail
+#endif
+
+/* mstate bit set if continguous morecore disabled or failed */
+static const unsigned USE_NONCONTIGUOUS_BIT = 4U;
+
+/* segment bit set in create_mspace_with_base */
+static const unsigned EXTERN_BIT = 8U;
+
+
+/* --------------------------- flags ------------------------ */
+
+static const unsigned PINUSE_BIT = 1;
+static const unsigned CINUSE_BIT = 2;
+static const unsigned FLAG4_BIT  = 4;
+static const unsigned INUSE_BITS = (PINUSE_BIT | CINUSE_BIT);
+static const unsigned FLAG_BITS  = (PINUSE_BIT | CINUSE_BIT | FLAG4_BIT);
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#if SPP_FOOTERS
+    static const unsigned CHUNK_OVERHEAD = 2 * sizeof(size_t);
+#else
+    static const unsigned CHUNK_OVERHEAD = sizeof(size_t);
+#endif
+
+/* MMapped chunks need a second word of overhead ... */
+static const unsigned SPP_MMAP_CHUNK_OVERHEAD = 2 * sizeof(size_t);
+
+/* ... and additional padding for fake next-chunk at foot */
+static const unsigned SPP_MMAP_FOOT_PAD = 4 * sizeof(size_t);
+
+// ===============================================================================
+struct malloc_chunk_header
+{
+    void set_size_and_pinuse_of_free_chunk(size_t s)
+    {
+        _head = s | PINUSE_BIT;
+        set_foot(s);
+    }
+
+    void set_foot(size_t s)
+    {
+        ((malloc_chunk_header *)((char*)this + s))->_prev_foot = s;
+    }
+
+    // extraction of fields from head words
+    bool cinuse() const        { return !!(_head & CINUSE_BIT); }
+    bool pinuse() const        { return !!(_head & PINUSE_BIT); }
+    bool flag4inuse() const    { return !!(_head & FLAG4_BIT); }
+    bool is_inuse() const      { return (_head & INUSE_BITS) != PINUSE_BIT; }
+    bool is_mmapped() const    { return (_head & INUSE_BITS) == 0; }
+
+    size_t chunksize() const   { return _head & ~(FLAG_BITS); }
+
+    void clear_pinuse()        { _head &= ~PINUSE_BIT; }
+    void set_flag4()           { _head |= FLAG4_BIT; }
+    void clear_flag4()         { _head &= ~FLAG4_BIT; }
+
+    // Treat space at ptr +/- offset as a chunk
+    malloc_chunk_header * chunk_plus_offset(size_t s)
+    {
+        return (malloc_chunk_header *)((char*)this + s);
+    }
+    malloc_chunk_header * chunk_minus_offset(size_t s)
+    {
+        return (malloc_chunk_header *)((char*)this - s);
+    }
+
+    // Ptr to next or previous physical malloc_chunk.
+    malloc_chunk_header * next_chunk()
+    {
+        return (malloc_chunk_header *)((char*)this + (_head & ~FLAG_BITS));
+    }
+    malloc_chunk_header * prev_chunk()
+    {
+        return (malloc_chunk_header *)((char*)this - (_prev_foot));
+    }
+
+    // extract next chunk's pinuse bit
+    size_t next_pinuse()  { return next_chunk()->_head & PINUSE_BIT; }
+
+    size_t   _prev_foot;  // Size of previous chunk (if free).
+    size_t   _head;       // Size and inuse bits.
+};
+
+// ===============================================================================
+struct malloc_chunk : public malloc_chunk_header
+{
+    // Set size, pinuse bit, foot, and clear next pinuse
+    void set_free_with_pinuse(size_t s, malloc_chunk* n)
+    {
+        n->clear_pinuse();
+        set_size_and_pinuse_of_free_chunk(s);
+    }
+
+    // Get the internal overhead associated with chunk p
+    size_t overhead_for() { return is_mmapped() ? SPP_MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD; }
+
+    // Return true if malloced space is not necessarily cleared
+    bool calloc_must_clear()
+    {
+#if SPP_MMAP_CLEARS
+        return !is_mmapped();
+#else
+        return true;
+#endif
+    }
+
+    struct malloc_chunk* _fd;         // double links -- used only if free.
+    struct malloc_chunk* _bk;
+};
+
+static const unsigned MCHUNK_SIZE = sizeof(malloc_chunk);
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+static const unsigned MIN_CHUNK_SIZE = (MCHUNK_SIZE + spp_chunk_align_mask) & ~spp_chunk_align_mask;
+
+typedef malloc_chunk  mchunk;
+typedef malloc_chunk* mchunkptr;
+typedef malloc_chunk_header *hchunkptr;
+typedef malloc_chunk* sbinptr;         // The type of bins of chunks
+typedef unsigned int bindex_t;         // Described below
+typedef unsigned int binmap_t;         // Described below
+typedef unsigned int flag_t;           // The type of various bit flag sets
+
+// conversion from malloc headers to user pointers, and back
+static SPP_FORCEINLINE void *chunk2mem(const void *p)       { return (void *)((char *)p + 2 * sizeof(size_t)); }
+static SPP_FORCEINLINE mchunkptr mem2chunk(const void *mem) { return (mchunkptr)((char *)mem - 2 * sizeof(size_t)); }
+
+// chunk associated with aligned address A
+static SPP_FORCEINLINE mchunkptr align_as_chunk(char *A)    { return (mchunkptr)(A + align_offset(chunk2mem(A))); }
+
+// Bounds on request (not chunk) sizes.
+static const unsigned MAX_REQUEST = (-MIN_CHUNK_SIZE) << 2;
+static const unsigned MIN_REQUEST = MIN_CHUNK_SIZE - CHUNK_OVERHEAD - 1;
+
+// pad request bytes into a usable size
+static SPP_FORCEINLINE size_t pad_request(size_t req)
+{
+    return (req + CHUNK_OVERHEAD + spp_chunk_align_mask) & ~spp_chunk_align_mask;
+}
+
+// pad request, checking for minimum (but not maximum)
+static SPP_FORCEINLINE size_t request2size(size_t req)
+{
+    return req < MIN_REQUEST ? MIN_CHUNK_SIZE : pad_request(req);
+}
+
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+/*
+  The head field of a chunk is or'ed with PINUSE_BIT when previous
+  adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
+  use, unless mmapped, in which case both bits are cleared.
+
+  FLAG4_BIT is not used by this malloc, but might be useful in extensions.
+*/
+
+// Head value for fenceposts
+static const unsigned FENCEPOST_HEAD = INUSE_BITS | sizeof(size_t);
+
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+/*
+  When chunks are not in use, they are treated as nodes of either
+  lists or trees.
+
+  "Small"  chunks are stored in circular doubly-linked lists, and look
+  like this:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk in list             |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk in list            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space (may be 0 bytes long)                .
+            .                                                               .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Larger chunks are kept in a form of bitwise digital trees (aka
+  tries) keyed on chunksizes.  Because malloc_tree_chunks are only for
+  free chunks greater than 256 bytes, their size doesn't impose any
+  constraints on user chunk sizes.  Each node looks like:
+
+    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Size of previous chunk                            |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `head:' |             Size of chunk, in bytes                         |P|
+      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Forward pointer to next chunk of same size        |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Back pointer to previous chunk of same size       |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to left child (child[0])                  |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to right child (child[1])                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Pointer to parent                                 |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             bin index of this chunk                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+            |             Unused space                                      .
+            .                                                               |
+nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    `foot:' |             Size of chunk, in bytes                           |
+            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+  Each tree holding treenodes is a tree of unique chunk sizes.  Chunks
+  of the same size are arranged in a circularly-linked list, with only
+  the oldest chunk (the next to be used, in our FIFO ordering)
+  actually in the tree.  (Tree members are distinguished by a non-null
+  parent pointer.)  If a chunk with the same size an an existing node
+  is inserted, it is linked off the existing node using pointers that
+  work in the same way as fd/bk pointers of small chunks.
+
+  Each tree contains a power of 2 sized range of chunk sizes (the
+  smallest is 0x100 <= x < 0x180), which is is divided in half at each
+  tree level, with the chunks in the smaller half of the range (0x100
+  <= x < 0x140 for the top nose) in the left subtree and the larger
+  half (0x140 <= x < 0x180) in the right subtree.  This is, of course,
+  done by inspecting individual bits.
+
+  Using these rules, each node's left subtree contains all smaller
+  sizes than its right subtree.  However, the node at the root of each
+  subtree has no particular ordering relationship to either.  (The
+  dividing line between the subtree sizes is based on trie relation.)
+  If we remove the last chunk of a given size from the interior of the
+  tree, we need to replace it with a leaf node.  The tree ordering
+  rules permit a node to be replaced by any leaf below it.
+
+  The smallest chunk in a tree (a common operation in a best-fit
+  allocator) can be found by walking a path to the leftmost leaf in
+  the tree.  Unlike a usual binary tree, where we follow left child
+  pointers until we reach a null, here we follow the right child
+  pointer any time the left one is null, until we reach a leaf with
+  both child pointers null. The smallest chunk in the tree will be
+  somewhere along that path.
+
+  The worst case number of steps to add, find, or remove a node is
+  bounded by the number of bits differentiating chunks within
+  bins. Under current bin calculations, this ranges from 6 up to 21
+  (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
+  is of course much better.
+*/
+
+// ===============================================================================
+struct malloc_tree_chunk : public malloc_chunk_header
+{
+    malloc_tree_chunk *leftmost_child()
+    {
+        return _child[0] ? _child[0] : _child[1];
+    }
+
+
+    malloc_tree_chunk* _fd;
+    malloc_tree_chunk* _bk;
+
+    malloc_tree_chunk* _child[2];
+    malloc_tree_chunk* _parent;
+    bindex_t           _index;
+};
+
+typedef malloc_tree_chunk  tchunk;
+typedef malloc_tree_chunk* tchunkptr;
+typedef malloc_tree_chunk* tbinptr; // The type of bins of trees
+
+/* ----------------------------- Segments -------------------------------- */
+
+/*
+  Each malloc space may include non-contiguous segments, held in a
+  list headed by an embedded malloc_segment record representing the
+  top-most space. Segments also include flags holding properties of
+  the space. Large chunks that are directly allocated by mmap are not
+  included in this list. They are instead independently created and
+  destroyed without otherwise keeping track of them.
+
+  Segment management mainly comes into play for spaces allocated by
+  MMAP.  Any call to MMAP might or might not return memory that is
+  adjacent to an existing segment.  MORECORE normally contiguously
+  extends the current space, so this space is almost always adjacent,
+  which is simpler and faster to deal with. (This is why MORECORE is
+  used preferentially to MMAP when both are available -- see
+  sys_alloc.)  When allocating using MMAP, we don't use any of the
+  hinting mechanisms (inconsistently) supported in various
+  implementations of unix mmap, or distinguish reserving from
+  committing memory. Instead, we just ask for space, and exploit
+  contiguity when we get it.  It is probably possible to do
+  better than this on some systems, but no general scheme seems
+  to be significantly better.
+
+  Management entails a simpler variant of the consolidation scheme
+  used for chunks to reduce fragmentation -- new adjacent memory is
+  normally prepended or appended to an existing segment. However,
+  there are limitations compared to chunk consolidation that mostly
+  reflect the fact that segment processing is relatively infrequent
+  (occurring only when getting memory from system) and that we
+  don't expect to have huge numbers of segments:
+
+  * Segments are not indexed, so traversal requires linear scans.  (It
+    would be possible to index these, but is not worth the extra
+    overhead and complexity for most programs on most platforms.)
+  * New segments are only appended to old ones when holding top-most
+    memory; if they cannot be prepended to others, they are held in
+    different segments.
+
+  Except for the top-most segment of an mstate, each segment record
+  is kept at the tail of its segment. Segments are added by pushing
+  segment records onto the list headed by &mstate.seg for the
+  containing mstate.
+
+  Segment flags control allocation/merge/deallocation policies:
+  * If EXTERN_BIT set, then we did not allocate this segment,
+    and so should not try to deallocate or merge with others.
+    (This currently holds only for the initial segment passed
+    into create_mspace_with_base.)
+  * If USE_MMAP_BIT set, the segment may be merged with
+    other surrounding mmapped segments and trimmed/de-allocated
+    using munmap.
+  * If neither bit is set, then the segment was obtained using
+    MORECORE so can be merged with surrounding MORECORE'd segments
+    and deallocated/trimmed using MORECORE with negative arguments.
+*/
+
+// ===============================================================================
+struct malloc_segment
+{
+    bool is_mmapped_segment()  { return !!(_sflags & USE_MMAP_BIT); }
+    bool is_extern_segment()   { return !!(_sflags & EXTERN_BIT); }
+
+    char*           _base;          // base address
+    size_t          _size;          // allocated size
+    malloc_segment* _next;          // ptr to next segment
+    flag_t          _sflags;        // mmap and extern flag
+};
+
+typedef malloc_segment  msegment;
+typedef malloc_segment* msegmentptr;
+
+/* ------------- Malloc_params ------------------- */
+
+/*
+  malloc_params holds global properties, including those that can be
+  dynamically set using mallopt. There is a single instance, mparams,
+  initialized in init_mparams. Note that the non-zeroness of "magic"
+  also serves as an initialization flag.
+*/
+
+// ===============================================================================
+struct malloc_params
+{
+    malloc_params() : _magic(0) {}
+
+    void ensure_initialization()
+    {
+        if (!_magic)
+            _init();
+    }
+    
+    SPP_IMPL int change(int param_number, int value);
+
+    size_t page_align(size_t sz)
+    {
+        return (sz + (_page_size - 1)) & ~(_page_size - 1);
+    }
+
+    size_t granularity_align(size_t sz)
+    {
+        return (sz + (_granularity - 1)) & ~(_granularity - 1);
+    }
+
+    bool is_page_aligned(char *S)
+    {
+        return ((size_t)S & (_page_size - 1)) == 0;
+    }
+
+    SPP_IMPL int _init();
+
+    size_t _magic;
+    size_t _page_size;
+    size_t _granularity;
+    size_t _mmap_threshold;
+    size_t _trim_threshold;
+    flag_t _default_mflags;
+};
+
+static malloc_params mparams;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/*
+   A malloc_state holds all of the bookkeeping for a space.
+   The main fields are:
+
+  Top
+    The topmost chunk of the currently active segment. Its size is
+    cached in topsize.  The actual size of topmost space is
+    topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+    fenceposts and segment records if necessary when getting more
+    space from the system.  The size at which to autotrim top is
+    cached from mparams in trim_check, except that it is disabled if
+    an autotrim fails.
+
+  Designated victim (dv)
+    This is the preferred chunk for servicing small requests that
+    don't have exact fits.  It is normally the chunk split off most
+    recently to service another small request.  Its size is cached in
+    dvsize. The link fields of this chunk are not maintained since it
+    is not kept in a bin.
+
+  SmallBins
+    An array of bin headers for free chunks.  These bins hold chunks
+    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+    chunks of all the same size, spaced 8 bytes apart.  To simplify
+    use in double-linked lists, each bin header acts as a malloc_chunk
+    pointing to the real first node, if it exists (else pointing to
+    itself).  This avoids special-casing for headers.  But to avoid
+    waste, we allocate only the fd/bk pointers of bins, and then use
+    repositioning tricks to treat these as the fields of a chunk.
+
+  TreeBins
+    Treebins are pointers to the roots of trees holding a range of
+    sizes. There are 2 equally spaced treebins for each power of two
+    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+    larger.
+
+  Bin maps
+    There is one bit map for small bins ("smallmap") and one for
+    treebins ("treemap).  Each bin sets its bit when non-empty, and
+    clears the bit when empty.  Bit operations are then used to avoid
+    bin-by-bin searching -- nearly all "search" is done without ever
+    looking at bins that won't be selected.  The bit maps
+    conservatively use 32 bits per map word, even if on 64bit system.
+    For a good description of some of the bit-based techniques used
+    here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+    supplement at http://hackersdelight.org/). Many of these are
+    intended to reduce the branchiness of paths through malloc etc, as
+    well as to reduce the number of memory locations read or written.
+
+  Segments
+    A list of segments headed by an embedded malloc_segment record
+    representing the initial space.
+
+  Address check support
+    The least_addr field is the least address ever obtained from
+    MORECORE or MMAP. Attempted frees and reallocs of any address less
+    than this are trapped (unless SPP_INSECURE is defined).
+
+  Magic tag
+    A cross-check field that should always hold same value as mparams._magic.
+
+  Max allowed footprint
+    The maximum allowed bytes to allocate from system (zero means no limit)
+
+  Flags
+    Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+  Statistics
+    Each space keeps track of current and maximum system memory
+    obtained via MORECORE or MMAP.
+
+  Trim support
+    Fields holding the amount of unused topmost memory that should trigger
+    trimming, and a counter to force periodic scanning to release unused
+    non-topmost segments.
+
+  Extension support
+    A void* pointer and a size_t field that can be used to help implement
+    extensions to this malloc.
+*/
+
+
+// ================================================================================
+class malloc_state
+{
+public:
+    /* ----------------------- _malloc, _free, etc... --- */
+    SPP_FORCEINLINE void* _malloc(size_t bytes);
+    SPP_FORCEINLINE void  _free(mchunkptr p);
+
+
+    /* ------------------------ Relays to internal calls to malloc/free from realloc, memalign etc */
+    void *internal_malloc(size_t b) { return mspace_malloc(this, b); }
+    void internal_free(void *mem)   { mspace_free(this, mem); }
+
+    /* ------------------------ ----------------------- */
+
+    SPP_IMPL void      init_top(mchunkptr p, size_t psize);
+    SPP_IMPL void      init_bins();
+    SPP_IMPL void      init(char* tbase, size_t tsize);
+
+    /* ------------------------ System alloc/dealloc -------------------------- */
+    SPP_IMPL void*     sys_alloc(size_t nb);
+    SPP_IMPL size_t    release_unused_segments();
+    SPP_IMPL int       sys_trim(size_t pad);
+    SPP_IMPL void      dispose_chunk(mchunkptr p, size_t psize);
+
+    /* ----------------------- Internal support for realloc, memalign, etc --- */
+    SPP_IMPL mchunkptr try_realloc_chunk(mchunkptr p, size_t nb, int can_move);
+    SPP_IMPL void*     internal_memalign(size_t alignment, size_t bytes);
+    SPP_IMPL void**    ialloc(size_t n_elements, size_t* sizes, int opts, void* chunks[]);
+    SPP_IMPL size_t    internal_bulk_free(void* array[], size_t nelem);
+    SPP_IMPL void      internal_inspect_all(void(*handler)(void *start, void *end,
+                                                           size_t used_bytes, void* callback_arg),
+                                            void* arg);
+
+    /* -------------------------- system alloc setup (Operations on mflags) ----- */
+    bool      use_lock() const { return false; }
+    void      enable_lock()    {}
+    void      set_lock(int)    {}
+    void      disable_lock()   {}
+
+    bool      use_mmap() const { return !!(_mflags & USE_MMAP_BIT); }
+    void      enable_mmap()    { _mflags |=  USE_MMAP_BIT; }
+
+#if SPP_HAVE_MMAP
+    void      disable_mmap()   { _mflags &= ~USE_MMAP_BIT; }
+#else
+    void      disable_mmap()   {}
+#endif
+
+    /* ----------------------- Runtime Check Support ------------------------- */
+
+    /*
+      For security, the main invariant is that malloc/free/etc never
+      writes to a static address other than malloc_state, unless static
+      malloc_state itself has been corrupted, which cannot occur via
+      malloc (because of these checks). In essence this means that we
+      believe all pointers, sizes, maps etc held in malloc_state, but
+      check all of those linked or offsetted from other embedded data
+      structures.  These checks are interspersed with main code in a way
+      that tends to minimize their run-time cost.
+
+      When SPP_FOOTERS is defined, in addition to range checking, we also
+      verify footer fields of inuse chunks, which can be used guarantee
+      that the mstate controlling malloc/free is intact.  This is a
+      streamlined version of the approach described by William Robertson
+      et al in "Run-time Detection of Heap-based Overflows" LISA'03
+      http://www.usenix.org/events/lisa03/tech/robertson.html The footer
+      of an inuse chunk holds the xor of its mstate and a random seed,
+      that is checked upon calls to free() and realloc().  This is
+      (probabalistically) unguessable from outside the program, but can be
+      computed by any code successfully malloc'ing any chunk, so does not
+      itself provide protection against code that has already broken
+      security through some other means.  Unlike Robertson et al, we
+      always dynamically check addresses of all offset chunks (previous,
+      next, etc). This turns out to be cheaper than relying on hashes.
+    */
+
+
+#if !SPP_INSECURE
+    // Check if address a is at least as high as any from MORECORE or MMAP
+    bool        ok_address(void *a) const { return (char *)a >= _least_addr; }
+
+    // Check if address of next chunk n is higher than base chunk p
+    static bool ok_next(void *p, void *n) { return p < n; }
+
+    // Check if p has inuse status
+    static bool ok_inuse(mchunkptr p)     { return p->is_inuse(); }
+
+    // Check if p has its pinuse bit on
+    static bool ok_pinuse(mchunkptr p)    { return p->pinuse(); }
+
+    // Check if (alleged) mstate m has expected magic field
+    bool        ok_magic() const          { return _magic == mparams._magic; }
+
+    // In gcc, use __builtin_expect to minimize impact of checks
+  #if defined(__GNUC__) && __GNUC__ >= 3
+    static bool rtcheck(bool e)       { return __builtin_expect(e, 1); }
+  #else
+    static bool rtcheck(bool e)       { return e; }
+  #endif
+#else
+    static bool ok_address(void *)       { return true; }
+    static bool ok_next(void *, void *)  { return true; }
+    static bool ok_inuse(mchunkptr)      { return true; }
+    static bool ok_pinuse(mchunkptr)     { return true; }
+    static bool ok_magic()               { return true; }
+    static bool rtcheck(bool)            { return true; }
+#endif
+
+    bool is_initialized() const           { return _top != 0; }
+
+    bool use_noncontiguous()  const       { return !!(_mflags & USE_NONCONTIGUOUS_BIT); }
+    void disable_contiguous()             { _mflags |=  USE_NONCONTIGUOUS_BIT; }
+
+    // Return segment holding given address
+    msegmentptr segment_holding(char* addr) const
+    {
+        msegmentptr sp = (msegmentptr)&_seg;
+        for (;;)
+        {
+            if (addr >= sp->_base && addr < sp->_base + sp->_size)
+                return sp;
+            if ((sp = sp->_next) == 0)
+                return 0;
+        }
+    }
+
+    // Return true if segment contains a segment link
+    int has_segment_link(msegmentptr ss) const
+    {
+        msegmentptr sp = (msegmentptr)&_seg;
+        for (;;)
+        {
+            if ((char*)sp >= ss->_base && (char*)sp < ss->_base + ss->_size)
+                return 1;
+            if ((sp = sp->_next) == 0)
+                return 0;
+        }
+    }
+
+    bool should_trim(size_t s) const { return s > _trim_check; }
+
+    /* -------------------------- Debugging setup ---------------------------- */
+
+#if ! SPP_DEBUG
+    void check_free_chunk(mchunkptr) {}
+    void check_inuse_chunk(mchunkptr) {}
+    void check_malloced_chunk(void*, size_t) {}
+    void check_mmapped_chunk(mchunkptr) {}
+    void check_malloc_state() {}
+    void check_top_chunk(mchunkptr) {}
+#else /* SPP_DEBUG */
+    void check_free_chunk(mchunkptr p)       { do_check_free_chunk(p); }
+    void check_inuse_chunk(mchunkptr p)      { do_check_inuse_chunk(p); }
+    void check_malloced_chunk(void* p, size_t s) { do_check_malloced_chunk(p, s); }
+    void check_mmapped_chunk(mchunkptr p)    { do_check_mmapped_chunk(p); }
+    void check_malloc_state()                { do_check_malloc_state(); }
+    void check_top_chunk(mchunkptr p)        { do_check_top_chunk(p); }
+
+    void do_check_any_chunk(mchunkptr p) const;
+    void do_check_top_chunk(mchunkptr p) const;
+    void do_check_mmapped_chunk(mchunkptr p) const;
+    void do_check_inuse_chunk(mchunkptr p) const;
+    void do_check_free_chunk(mchunkptr p) const;
+    void do_check_malloced_chunk(void* mem, size_t s) const;
+    void do_check_tree(tchunkptr t);
+    void do_check_treebin(bindex_t i);
+    void do_check_smallbin(bindex_t i);
+    void do_check_malloc_state();
+    int  bin_find(mchunkptr x);
+    size_t traverse_and_check();
+#endif
+
+private:
+
+    /* ---------------------------- Indexing Bins ---------------------------- */
+
+    static bool  is_small(size_t s)          { return (s >> SMALLBIN_SHIFT) < NSMALLBINS; }
+    static bindex_t  small_index(size_t s)   { return (bindex_t)(s  >> SMALLBIN_SHIFT); }
+    static size_t small_index2size(size_t i) { return i << SMALLBIN_SHIFT; }
+    static bindex_t  MIN_SMALL_INDEX()       { return small_index(MIN_CHUNK_SIZE); }
+
+    // assign tree index for size S to variable I. Use x86 asm if possible
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+    SPP_FORCEINLINE static bindex_t compute_tree_index(size_t S)
+    {
+        unsigned int X = S >> TREEBIN_SHIFT;
+        if (X == 0)
+            return 0;
+        else if (X > 0xFFFF)
+            return NTREEBINS - 1;
+
+        unsigned int K = (unsigned) sizeof(X) * __CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X);
+        return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)));
+    }
+
+#elif defined (__INTEL_COMPILER)
+    SPP_FORCEINLINE static bindex_t compute_tree_index(size_t S)
+    {
+        size_t X = S >> TREEBIN_SHIFT;
+        if (X == 0)
+            return 0;
+        else if (X > 0xFFFF)
+            return NTREEBINS - 1;
+
+        unsigned int K = _bit_scan_reverse(X);
+        return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)));
+    }
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+    SPP_FORCEINLINE static bindex_t compute_tree_index(size_t S)
+    {
+        size_t X = S >> TREEBIN_SHIFT;
+        if (X == 0)
+            return 0;
+        else if (X > 0xFFFF)
+            return NTREEBINS - 1;
+
+        unsigned int K;
+        _BitScanReverse((DWORD *) &K, (DWORD) X);
+        return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)));
+    }
+
+#else // GNUC
+    SPP_FORCEINLINE static bindex_t compute_tree_index(size_t S)
+    {
+        size_t X = S >> TREEBIN_SHIFT;
+        if (X == 0)
+            return 0;
+        else if (X > 0xFFFF)
+            return NTREEBINS - 1;
+
+        unsigned int Y = (unsigned int)X;
+        unsigned int N = ((Y - 0x100) >> 16) & 8;
+        unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;
+        N += K;
+        N += K = (((Y <<= K) - 0x4000) >> 16) & 2;
+        K = 14 - N + ((Y <<= K) >> 15);
+        return (K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1));
+    }
+#endif
+
+    // Shift placing maximum resolved bit in a treebin at i as sign bit
+    static bindex_t leftshift_for_tree_index(bindex_t i)
+    {
+        return (i == NTREEBINS - 1) ? 0 :
+               ((spp_size_t_bitsize - 1) - ((i >> 1) + TREEBIN_SHIFT - 2));
+    }
+
+    // The size of the smallest chunk held in bin with index i
+    static bindex_t minsize_for_tree_index(bindex_t i)
+    {
+        return ((size_t)1 << ((i >> 1) + TREEBIN_SHIFT)) |
+               (((size_t)(i & 1)) << ((i >> 1) + TREEBIN_SHIFT - 1));
+    }
+
+
+    // ----------- isolate the least set bit of a bitmap
+    static binmap_t least_bit(binmap_t x) { return x & -x; }
+
+    // ----------- mask with all bits to left of least bit of x on
+    static binmap_t left_bits(binmap_t x) { return (x << 1) | -(x << 1); }
+
+    // index corresponding to given bit. Use x86 asm if possible
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+    static bindex_t compute_bit2idx(binmap_t X)
+    {
+        unsigned int J;
+        J = __builtin_ctz(X);
+        return (bindex_t)J;
+    }
+
+#elif defined (__INTEL_COMPILER)
+    static bindex_t compute_bit2idx(binmap_t X)
+    {
+        unsigned int J;
+        J = _bit_scan_forward(X);
+        return (bindex_t)J;
+    }
+
+#elif defined(_MSC_VER) && _MSC_VER>=1300
+    static bindex_t compute_bit2idx(binmap_t X)
+    {
+        unsigned int J;
+        _BitScanForward((DWORD *) &J, X);
+        return (bindex_t)J;
+    }
+
+#elif SPP_USE_BUILTIN_FFS
+    static bindex_t compute_bit2idx(binmap_t X) { return ffs(X) - 1; }
+
+#else
+    static bindex_t compute_bit2idx(binmap_t X)
+    {
+        unsigned int Y = X - 1;
+        unsigned int K = Y >> (16 - 4) & 16;
+        unsigned int N = K;        Y >>= K;
+        N += K = Y >> (8 - 3) &  8;  Y >>= K;
+        N += K = Y >> (4 - 2) &  4;  Y >>= K;
+        N += K = Y >> (2 - 1) &  2;  Y >>= K;
+        N += K = Y >> (1 - 0) &  1;  Y >>= K;
+        return (bindex_t)(N + Y);
+    }
+#endif
+
+    /* ------------------------ Set up inuse chunks with or without footers ---*/
+#if !SPP_FOOTERS
+    void mark_inuse_foot(malloc_chunk_header *, size_t) {}
+#else
+    //Set foot of inuse chunk to be xor of mstate and seed
+    void  mark_inuse_foot(malloc_chunk_header *p, size_t s)
+    {
+        (((mchunkptr)((char*)p + s))->prev_foot = (size_t)this ^ mparams._magic);
+    }
+#endif
+
+    void set_inuse(malloc_chunk_header *p, size_t s)
+    {
+        p->_head = (p->_head & PINUSE_BIT) | s | CINUSE_BIT;
+        ((mchunkptr)(((char*)p) + s))->_head |= PINUSE_BIT;
+        mark_inuse_foot(p, s);
+    }
+
+    void set_inuse_and_pinuse(malloc_chunk_header *p, size_t s)
+    {
+        p->_head = s | PINUSE_BIT | CINUSE_BIT;
+        ((mchunkptr)(((char*)p) + s))->_head |= PINUSE_BIT;
+        mark_inuse_foot(p, s);
+    }
+
+    void set_size_and_pinuse_of_inuse_chunk(malloc_chunk_header *p, size_t s)
+    {
+        p->_head = s | PINUSE_BIT | CINUSE_BIT;
+        mark_inuse_foot(p, s);
+    }
+
+    /* ------------------------ Addressing by index. See  about smallbin repositioning --- */
+    sbinptr  smallbin_at(bindex_t i) const { return (sbinptr)((char*)&_smallbins[i << 1]); }
+    tbinptr* treebin_at(bindex_t i)  { return &_treebins[i]; }
+
+    /* ----------------------- bit corresponding to given index ---------*/
+    static binmap_t idx2bit(bindex_t i) { return ((binmap_t)1 << i); }
+
+    // --------------- Mark/Clear bits with given index
+    void     mark_smallmap(bindex_t i)      { _smallmap |=  idx2bit(i); }
+    void     clear_smallmap(bindex_t i)     { _smallmap &= ~idx2bit(i); }
+    binmap_t smallmap_is_marked(bindex_t i) const { return _smallmap & idx2bit(i); }
+
+    void     mark_treemap(bindex_t i)       { _treemap  |=  idx2bit(i); }
+    void     clear_treemap(bindex_t i)      { _treemap  &= ~idx2bit(i); }
+    binmap_t treemap_is_marked(bindex_t i)  const { return _treemap & idx2bit(i); }
+
+    /* ------------------------ ----------------------- */
+    SPP_FORCEINLINE void insert_small_chunk(mchunkptr P, size_t S);
+    SPP_FORCEINLINE void unlink_small_chunk(mchunkptr P, size_t S);
+    SPP_FORCEINLINE void unlink_first_small_chunk(mchunkptr B, mchunkptr P, bindex_t I);
+    SPP_FORCEINLINE void replace_dv(mchunkptr P, size_t S);
+
+    /* ------------------------- Operations on trees ------------------------- */
+    SPP_FORCEINLINE void insert_large_chunk(tchunkptr X, size_t S);
+    SPP_FORCEINLINE void unlink_large_chunk(tchunkptr X);
+
+    /* ------------------------ Relays to large vs small bin operations */
+    SPP_FORCEINLINE void insert_chunk(mchunkptr P, size_t S);
+    SPP_FORCEINLINE void unlink_chunk(mchunkptr P, size_t S);
+
+    /* -----------------------  Direct-mmapping chunks ----------------------- */
+    SPP_IMPL void*       mmap_alloc(size_t nb);
+    SPP_IMPL mchunkptr   mmap_resize(mchunkptr oldp, size_t nb, int flags);
+
+    SPP_IMPL void        reset_on_error();
+    SPP_IMPL void*       prepend_alloc(char* newbase, char* oldbase, size_t nb);
+    SPP_IMPL void        add_segment(char* tbase, size_t tsize, flag_t mmapped);
+
+    /* ------------------------ malloc --------------------------- */
+    SPP_IMPL void*       tmalloc_large(size_t nb);
+    SPP_IMPL void*       tmalloc_small(size_t nb);
+
+    /* ------------------------Bin types, widths and sizes -------- */
+    static const size_t NSMALLBINS      = 32;
+    static const size_t NTREEBINS       = 32;
+    static const size_t SMALLBIN_SHIFT  = 3;
+    static const size_t SMALLBIN_WIDTH  = 1 << SMALLBIN_SHIFT;
+    static const size_t TREEBIN_SHIFT   = 8;
+    static const size_t MIN_LARGE_SIZE  = 1 << TREEBIN_SHIFT;
+    static const size_t MAX_SMALL_SIZE  = (MIN_LARGE_SIZE - 1);
+    static const size_t MAX_SMALL_REQUEST = (MAX_SMALL_SIZE - spp_chunk_align_mask - CHUNK_OVERHEAD);
+
+    /* ------------------------ data members --------------------------- */
+    binmap_t   _smallmap;
+    binmap_t   _treemap;
+    size_t     _dvsize;
+    size_t     _topsize;
+    char*      _least_addr;
+    mchunkptr  _dv;
+    mchunkptr  _top;
+    size_t     _trim_check;
+    size_t     _release_checks;
+    size_t     _magic;
+    mchunkptr  _smallbins[(NSMALLBINS + 1) * 2];
+    tbinptr    _treebins[NTREEBINS];
+public:
+    size_t     _footprint;
+    size_t     _max_footprint;
+    size_t     _footprint_limit; // zero means no limit
+    flag_t     _mflags;
+
+    msegment   _seg;
+
+private:
+    void*      _extp;      // Unused but available for extensions
+    size_t     _exts;
+};
+
+typedef malloc_state*    mstate;
+
+/* ------------- end malloc_state ------------------- */
+
+#if SPP_FOOTERS
+static malloc_state* get_mstate_for(malloc_chunk_header *p)
+{
+    return (malloc_state*)(((mchunkptr)((char*)(p) +
+                                        (p->chunksize())))->prev_foot ^ mparams._magic);
+}
+#endif
+
+/* -------------------------- system alloc setup ------------------------- */
+
+
+
+// For mmap, use granularity alignment on windows, else page-align
+#ifdef WIN32
+    #define mmap_align(S) mparams.granularity_align(S)
+#else
+    #define mmap_align(S) mparams.page_align(S)
+#endif
+
+//  True if segment S holds address A
+static bool segment_holds(msegmentptr S, mchunkptr A)
+{
+    return (char*)A >= S->_base && (char*)A < S->_base + S->_size;
+}
+
+/*
+  top_foot_size is padding at the end of a segment, including space
+  that may be needed to place segment records and fenceposts when new
+  noncontiguous segments are added.
+*/
+static SPP_FORCEINLINE size_t top_foot_size()
+{
+    return align_offset(chunk2mem((void *)0)) + 
+        pad_request(sizeof(struct malloc_segment)) + 
+        MIN_CHUNK_SIZE;
+}
+
+
+// For sys_alloc, enough padding to ensure can malloc request on success
+static SPP_FORCEINLINE size_t sys_alloc_padding()
+{
+    return  top_foot_size() + SPP_MALLOC_ALIGNMENT;
+}
+
+
+#define SPP_USAGE_ERROR_ACTION(m,p) SPP_ABORT
+
+/* ---------------------------- setting mparams -------------------------- */
+
+// Initialize mparams
+int malloc_params::_init()
+{
+#ifdef NEED_GLOBAL_LOCK_INIT
+    if (malloc_global_mutex_status <= 0)
+        init_malloc_global_mutex();
+#endif
+
+    if (_magic == 0)
+    {
+        size_t magic;
+        size_t psize;
+        size_t gsize;
+
+#ifndef WIN32
+        psize = malloc_getpagesize;
+        gsize = ((SPP_DEFAULT_GRANULARITY != 0) ? SPP_DEFAULT_GRANULARITY : psize);
+#else
+        {
+            SYSTEM_INFO system_info;
+            GetSystemInfo(&system_info);
+            psize = system_info.dwPageSize;
+            gsize = ((SPP_DEFAULT_GRANULARITY != 0) ?
+                     SPP_DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
+        }
+#endif
+
+        /* Sanity-check configuration:
+           size_t must be unsigned and as wide as pointer type.
+           ints must be at least 4 bytes.
+           alignment must be at least 8.
+           Alignment, min chunk size, and page size must all be powers of 2.
+        */
+        if ((sizeof(size_t) != sizeof(char*)) ||
+                (spp_max_size_t < MIN_CHUNK_SIZE)  ||
+                (sizeof(int) < 4)  ||
+                (SPP_MALLOC_ALIGNMENT < (size_t)8U) ||
+                ((SPP_MALLOC_ALIGNMENT & (SPP_MALLOC_ALIGNMENT - 1)) != 0) ||
+                ((MCHUNK_SIZE      & (MCHUNK_SIZE - 1))      != 0) ||
+                ((gsize            & (gsize - 1))            != 0) ||
+                ((psize            & (psize - 1))            != 0))
+            SPP_ABORT;
+        _granularity = gsize;
+        _page_size = psize;
+        _mmap_threshold = SPP_DEFAULT_MMAP_THRESHOLD;
+        _trim_threshold = SPP_DEFAULT_TRIM_THRESHOLD;
+        _default_mflags = USE_MMAP_BIT | USE_NONCONTIGUOUS_BIT;
+
+        {
+#if SPP_USE_DEV_RANDOM
+            int fd;
+            unsigned char buf[sizeof(size_t)];
+            // Try to use /dev/urandom, else fall back on using time
+            if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
+                    read(fd, buf, sizeof(buf)) == sizeof(buf))
+            {
+                magic = *((size_t *) buf);
+                close(fd);
+            }
+            else
+#endif
+            {
+#ifdef WIN32
+                magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
+#elif defined(SPP_LACKS_TIME_H)
+                magic = (size_t)&magic ^ (size_t)0x55555555U;
+#else
+                magic = (size_t)(time(0) ^ (size_t)0x55555555U);
+#endif
+            }
+            magic |= (size_t)8U;    // ensure nonzero
+            magic &= ~(size_t)7U;   // improve chances of fault for bad values
+            // Until memory modes commonly available, use volatile-write
+            (*(volatile size_t *)(&(_magic))) = magic;
+        }
+    }
+
+    return 1;
+}
+
+/*
+  mallopt tuning options.  SVID/XPG defines four standard parameter
+  numbers for mallopt, normally defined in malloc.h.  None of these
+  are used in this malloc, so setting them has no effect. But this
+  malloc does support the following options.
+*/
+static const int  m_trim_threshold = -1;
+static const int  m_granularity    = -2;
+static const int  m_mmap_threshold = -3;
+
+// support for mallopt
+int malloc_params::change(int param_number, int value)
+{
+    size_t val;
+    ensure_initialization();
+    val = (value == -1) ? spp_max_size_t : (size_t)value;
+
+    switch (param_number)
+    {
+    case m_trim_threshold:
+        _trim_threshold = val;
+        return 1;
+
+    case m_granularity:
+        if (val >= _page_size && ((val & (val - 1)) == 0))
+        {
+            _granularity = val;
+            return 1;
+        }
+        else
+            return 0;
+
+    case m_mmap_threshold:
+        _mmap_threshold = val;
+        return 1;
+
+    default:
+        return 0;
+    }
+}
+
+#if SPP_DEBUG
+/* ------------------------- Debugging Support --------------------------- */
+
+// Check properties of any chunk, whether free, inuse, mmapped etc
+void malloc_state::do_check_any_chunk(mchunkptr p)  const
+{
+    assert((spp_is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD));
+    assert(ok_address(p));
+}
+
+// Check properties of top chunk
+void malloc_state::do_check_top_chunk(mchunkptr p) const
+{
+    msegmentptr sp = segment_holding((char*)p);
+    size_t  sz = p->_head & ~INUSE_BITS; // third-lowest bit can be set!
+    assert(sp != 0);
+    assert((spp_is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD));
+    assert(ok_address(p));
+    assert(sz == _topsize);
+    assert(sz > 0);
+    assert(sz == ((sp->_base + sp->_size) - (char*)p) - top_foot_size());
+    assert(p->pinuse());
+    assert(!p->chunk_plus_offset(sz)->pinuse());
+}
+
+// Check properties of (inuse) mmapped chunks
+void malloc_state::do_check_mmapped_chunk(mchunkptr p) const
+{
+    size_t  sz = p->chunksize();
+    size_t len = (sz + (p->_prev_foot) + SPP_MMAP_FOOT_PAD);
+    assert(p->is_mmapped());
+    assert(use_mmap());
+    assert((spp_is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD));
+    assert(ok_address(p));
+    assert(!is_small(sz));
+    assert((len & (mparams._page_size - 1)) == 0);
+    assert(p->chunk_plus_offset(sz)->_head == FENCEPOST_HEAD);
+    assert(p->chunk_plus_offset(sz + sizeof(size_t))->_head == 0);
+}
+
+// Check properties of inuse chunks
+void malloc_state::do_check_inuse_chunk(mchunkptr p) const
+{
+    do_check_any_chunk(p);
+    assert(p->is_inuse());
+    assert(p->next_pinuse());
+    // If not pinuse and not mmapped, previous chunk has OK offset
+    assert(p->is_mmapped() || p->pinuse() || (mchunkptr)p->prev_chunk()->next_chunk() == p);
+    if (p->is_mmapped())
+        do_check_mmapped_chunk(p);
+}
+
+// Check properties of free chunks
+void malloc_state::do_check_free_chunk(mchunkptr p) const
+{
+    size_t sz = p->chunksize();
+    mchunkptr next = (mchunkptr)p->chunk_plus_offset(sz);
+    do_check_any_chunk(p);
+    assert(!p->is_inuse());
+    assert(!p->next_pinuse());
+    assert(!p->is_mmapped());
+    if (p != _dv && p != _top)
+    {
+        if (sz >= MIN_CHUNK_SIZE)
+        {
+            assert((sz & spp_chunk_align_mask) == 0);
+            assert(spp_is_aligned(chunk2mem(p)));
+            assert(next->_prev_foot == sz);
+            assert(p->pinuse());
+            assert(next == _top || next->is_inuse());
+            assert(p->_fd->_bk == p);
+            assert(p->_bk->_fd == p);
+        }
+        else  // markers are always of size sizeof(size_t)
+            assert(sz == sizeof(size_t));
+    }
+}
+
+// Check properties of malloced chunks at the point they are malloced
+void malloc_state::do_check_malloced_chunk(void* mem, size_t s) const
+{
+    if (mem != 0)
+    {
+        mchunkptr p = mem2chunk(mem);
+        size_t sz = p->_head & ~INUSE_BITS;
+        do_check_inuse_chunk(p);
+        assert((sz & spp_chunk_align_mask) == 0);
+        assert(sz >= MIN_CHUNK_SIZE);
+        assert(sz >= s);
+        // unless mmapped, size is less than MIN_CHUNK_SIZE more than request
+        assert(p->is_mmapped() || sz < (s + MIN_CHUNK_SIZE));
+    }
+}
+
+// Check a tree and its subtrees.
+void malloc_state::do_check_tree(tchunkptr t)
+{
+    tchunkptr head = 0;
+    tchunkptr u = t;
+    bindex_t tindex = t->_index;
+    size_t tsize = t->chunksize();
+    bindex_t idx = compute_tree_index(tsize);
+    assert(tindex == idx);
+    assert(tsize >= MIN_LARGE_SIZE);
+    assert(tsize >= minsize_for_tree_index(idx));
+    assert((idx == NTREEBINS - 1) || (tsize < minsize_for_tree_index((idx + 1))));
+
+    do
+    {
+        // traverse through chain of same-sized nodes
+        do_check_any_chunk((mchunkptr)u);
+        assert(u->_index == tindex);
+        assert(u->chunksize() == tsize);
+        assert(!u->is_inuse());
+        assert(!u->next_pinuse());
+        assert(u->_fd->_bk == u);
+        assert(u->_bk->_fd == u);
+        if (u->_parent == 0)
+        {
+            assert(u->_child[0] == 0);
+            assert(u->_child[1] == 0);
+        }
+        else
+        {
+            assert(head == 0); // only one node on chain has parent
+            head = u;
+            assert(u->_parent != u);
+            assert(u->_parent->_child[0] == u ||
+                   u->_parent->_child[1] == u ||
+                   *((tbinptr*)(u->_parent)) == u);
+            if (u->_child[0] != 0)
+            {
+                assert(u->_child[0]->_parent == u);
+                assert(u->_child[0] != u);
+                do_check_tree(u->_child[0]);
+            }
+            if (u->_child[1] != 0)
+            {
+                assert(u->_child[1]->_parent == u);
+                assert(u->_child[1] != u);
+                do_check_tree(u->_child[1]);
+            }
+            if (u->_child[0] != 0 && u->_child[1] != 0)
+                assert(u->_child[0]->chunksize() < u->_child[1]->chunksize());
+        }
+        u = u->_fd;
+    }
+    while (u != t);
+    assert(head != 0);
+}
+
+//  Check all the chunks in a treebin.
+void malloc_state::do_check_treebin(bindex_t i)
+{
+    tbinptr* tb = (tbinptr*)treebin_at(i);
+    tchunkptr t = *tb;
+    int empty = (_treemap & (1U << i)) == 0;
+    if (t == 0)
+        assert(empty);
+    if (!empty)
+        do_check_tree(t);
+}
+
+//  Check all the chunks in a smallbin.
+void malloc_state::do_check_smallbin(bindex_t i)
+{
+    sbinptr b = smallbin_at(i);
+    mchunkptr p = b->_bk;
+    unsigned int empty = (_smallmap & (1U << i)) == 0;
+    if (p == b)
+        assert(empty);
+    if (!empty)
+    {
+        for (; p != b; p = p->_bk)
+        {
+            size_t size = p->chunksize();
+            mchunkptr q;
+            // each chunk claims to be free
+            do_check_free_chunk(p);
+            // chunk belongs in bin
+            assert(small_index(size) == i);
+            assert(p->_bk == b || p->_bk->chunksize() == p->chunksize());
+            // chunk is followed by an inuse chunk
+            q = (mchunkptr)p->next_chunk();
+            if (q->_head != FENCEPOST_HEAD)
+                do_check_inuse_chunk(q);
+        }
+    }
+}
+
+// Find x in a bin. Used in other check functions.
+int malloc_state::bin_find(mchunkptr x)
+{
+    size_t size = x->chunksize();
+    if (is_small(size))
+    {
+        bindex_t sidx = small_index(size);
+        sbinptr b = smallbin_at(sidx);
+        if (smallmap_is_marked(sidx))
+        {
+            mchunkptr p = b;
+            do
+            {
+                if (p == x)
+                    return 1;
+            }
+            while ((p = p->_fd) != b);
+        }
+    }
+    else
+    {
+        bindex_t tidx = compute_tree_index(size);
+        if (treemap_is_marked(tidx))
+        {
+            tchunkptr t = *treebin_at(tidx);
+            size_t sizebits = size << leftshift_for_tree_index(tidx);
+            while (t != 0 && t->chunksize() != size)
+            {
+                t = t->_child[(sizebits >> (spp_size_t_bitsize - 1)) & 1];
+                sizebits <<= 1;
+            }
+            if (t != 0)
+            {
+                tchunkptr u = t;
+                do
+                {
+                    if (u == (tchunkptr)x)
+                        return 1;
+                }
+                while ((u = u->_fd) != t);
+            }
+        }
+    }
+    return 0;
+}
+
+// Traverse each chunk and check it; return total
+size_t malloc_state::traverse_and_check()
+{
+    size_t sum = 0;
+    if (is_initialized())
+    {
+        msegmentptr s = (msegmentptr)&_seg;
+        sum += _topsize + top_foot_size();
+        while (s != 0)
+        {
+            mchunkptr q = align_as_chunk(s->_base);
+            mchunkptr lastq = 0;
+            assert(q->pinuse());
+            while (segment_holds(s, q) &&
+                    q != _top && q->_head != FENCEPOST_HEAD)
+            {
+                sum += q->chunksize();
+                if (q->is_inuse())
+                {
+                    assert(!bin_find(q));
+                    do_check_inuse_chunk(q);
+                }
+                else
+                {
+                    assert(q == _dv || bin_find(q));
+                    assert(lastq == 0 || lastq->is_inuse()); // Not 2 consecutive free
+                    do_check_free_chunk(q);
+                }
+                lastq = q;
+                q = (mchunkptr)q->next_chunk();
+            }
+            s = s->_next;
+        }
+    }
+    return sum;
+}
+
+
+// Check all properties of malloc_state.
+void malloc_state::do_check_malloc_state()
+{
+    bindex_t i;
+    size_t total;
+    // check bins
+    for (i = 0; i < NSMALLBINS; ++i)
+        do_check_smallbin(i);
+    for (i = 0; i < NTREEBINS; ++i)
+        do_check_treebin(i);
+
+    if (_dvsize != 0)
+    {
+        // check dv chunk
+        do_check_any_chunk(_dv);
+        assert(_dvsize == _dv->chunksize());
+        assert(_dvsize >= MIN_CHUNK_SIZE);
+        assert(bin_find(_dv) == 0);
+    }
+
+    if (_top != 0)
+    {
+        // check top chunk
+        do_check_top_chunk(_top);
+        //assert(topsize == top->chunksize()); redundant
+        assert(_topsize > 0);
+        assert(bin_find(_top) == 0);
+    }
+
+    total = traverse_and_check();
+    assert(total <= _footprint);
+    assert(_footprint <= _max_footprint);
+}
+#endif // SPP_DEBUG
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/*
+  Various forms of linking and unlinking are defined as macros.  Even
+  the ones for trees, which are very long but have very short typical
+  paths.  This is ugly but reduces reliance on inlining support of
+  compilers.
+*/
+
+// Link a free chunk into a smallbin
+void malloc_state::insert_small_chunk(mchunkptr p, size_t s)
+{
+    bindex_t I  = small_index(s);
+    mchunkptr B = smallbin_at(I);
+    mchunkptr F = B;
+    assert(s >= MIN_CHUNK_SIZE);
+    if (!smallmap_is_marked(I))
+        mark_smallmap(I);
+    else if (rtcheck(ok_address(B->_fd)))
+        F = B->_fd;
+    else
+        SPP_ABORT;
+    B->_fd = p;
+    F->_bk = p;
+    p->_fd = F;
+    p->_bk = B;
+}
+
+// Unlink a chunk from a smallbin
+void malloc_state::unlink_small_chunk(mchunkptr p, size_t s)
+{
+    mchunkptr F = p->_fd;
+    mchunkptr B = p->_bk;
+    bindex_t I = small_index(s);
+    assert(p != B);
+    assert(p != F);
+    assert(p->chunksize() == small_index2size(I));
+    if (rtcheck(F == smallbin_at(I) || (ok_address(F) && F->_bk == p)))
+    {
+        if (B == F)
+            clear_smallmap(I);
+        else if (rtcheck(B == smallbin_at(I) ||
+                         (ok_address(B) && B->_fd == p)))
+        {
+            F->_bk = B;
+            B->_fd = F;
+        }
+        else
+            SPP_ABORT;
+    }
+    else
+        SPP_ABORT;
+}
+
+// Unlink the first chunk from a smallbin
+void malloc_state::unlink_first_small_chunk(mchunkptr B, mchunkptr p, bindex_t I)
+{
+    mchunkptr F = p->_fd;
+    assert(p != B);
+    assert(p != F);
+    assert(p->chunksize() == small_index2size(I));
+    if (B == F)
+        clear_smallmap(I);
+    else if (rtcheck(ok_address(F) && F->_bk == p))
+    {
+        F->_bk = B;
+        B->_fd = F;
+    }
+    else
+        SPP_ABORT;
+}
+
+// Replace dv node, binning the old one
+// Used only when dvsize known to be small
+void malloc_state::replace_dv(mchunkptr p, size_t s)
+{
+    size_t DVS = _dvsize;
+    assert(is_small(DVS));
+    if (DVS != 0)
+    {
+        mchunkptr DV = _dv;
+        insert_small_chunk(DV, DVS);
+    }
+    _dvsize = s;
+    _dv = p;
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+// Insert chunk into tree
+void malloc_state::insert_large_chunk(tchunkptr X, size_t s)
+{
+    tbinptr* H;
+    bindex_t I = compute_tree_index(s);
+    H = treebin_at(I);
+    X->_index = I;
+    X->_child[0] = X->_child[1] = 0;
+    if (!treemap_is_marked(I))
+    {
+        mark_treemap(I);
+        *H = X;
+        X->_parent = (tchunkptr)H;
+        X->_fd = X->_bk = X;
+    }
+    else
+    {
+        tchunkptr T = *H;
+        size_t K = s << leftshift_for_tree_index(I);
+        for (;;)
+        {
+            if (T->chunksize() != s)
+            {
+                tchunkptr* C = &(T->_child[(K >> (spp_size_t_bitsize - 1)) & 1]);
+                K <<= 1;
+                if (*C != 0)
+                    T = *C;
+                else if (rtcheck(ok_address(C)))
+                {
+                    *C = X;
+                    X->_parent = T;
+                    X->_fd = X->_bk = X;
+                    break;
+                }
+                else
+                {
+                    SPP_ABORT;
+                    break;
+                }
+            }
+            else
+            {
+                tchunkptr F = T->_fd;
+                if (rtcheck(ok_address(T) && ok_address(F)))
+                {
+                    T->_fd = F->_bk = X;
+                    X->_fd = F;
+                    X->_bk = T;
+                    X->_parent = 0;
+                    break;
+                }
+                else
+                {
+                    SPP_ABORT;
+                    break;
+                }
+            }
+        }
+    }
+}
+
+/*
+  Unlink steps:
+
+  1. If x is a chained node, unlink it from its same-sized fd/bk links
+     and choose its bk node as its replacement.
+  2. If x was the last node of its size, but not a leaf node, it must
+     be replaced with a leaf node (not merely one with an open left or
+     right), to make sure that lefts and rights of descendents
+     correspond properly to bit masks.  We use the rightmost descendent
+     of x.  We could use any other leaf, but this is easy to locate and
+     tends to counteract removal of leftmosts elsewhere, and so keeps
+     paths shorter than minimally guaranteed.  This doesn't loop much
+     because on average a node in a tree is near the bottom.
+  3. If x is the base of a chain (i.e., has parent links) relink
+     x's parent and children to x's replacement (or null if none).
+*/
+
+void malloc_state::unlink_large_chunk(tchunkptr X)
+{
+    tchunkptr XP = X->_parent;
+    tchunkptr R;
+    if (X->_bk != X)
+    {
+        tchunkptr F = X->_fd;
+        R = X->_bk;
+        if (rtcheck(ok_address(F) && F->_bk == X && R->_fd == X))
+        {
+            F->_bk = R;
+            R->_fd = F;
+        }
+        else
+            SPP_ABORT;
+    }
+    else
+    {
+        tchunkptr* RP;
+        if (((R = *(RP = &(X->_child[1]))) != 0) ||
+                ((R = *(RP = &(X->_child[0]))) != 0))
+        {
+            tchunkptr* CP;
+            while ((*(CP = &(R->_child[1])) != 0) ||
+                    (*(CP = &(R->_child[0])) != 0))
+                R = *(RP = CP);
+            if (rtcheck(ok_address(RP)))
+                *RP = 0;
+            else
+                SPP_ABORT;
+        }
+    }
+    if (XP != 0)
+    {
+        tbinptr* H = treebin_at(X->_index);
+        if (X == *H)
+        {
+            if ((*H = R) == 0)
+                clear_treemap(X->_index);
+        }
+        else if (rtcheck(ok_address(XP)))
+        {
+            if (XP->_child[0] == X)
+                XP->_child[0] = R;
+            else
+                XP->_child[1] = R;
+        }
+        else
+            SPP_ABORT;
+        if (R != 0)
+        {
+            if (rtcheck(ok_address(R)))
+            {
+                tchunkptr C0, C1;
+                R->_parent = XP;
+                if ((C0 = X->_child[0]) != 0)
+                {
+                    if (rtcheck(ok_address(C0)))
+                    {
+                        R->_child[0] = C0;
+                        C0->_parent = R;
+                    }
+                    else
+                        SPP_ABORT;
+                }
+                if ((C1 = X->_child[1]) != 0)
+                {
+                    if (rtcheck(ok_address(C1)))
+                    {
+                        R->_child[1] = C1;
+                        C1->_parent = R;
+                    }
+                    else
+                        SPP_ABORT;
+                }
+            }
+            else
+                SPP_ABORT;
+        }
+    }
+}
+
+// Relays to large vs small bin operations
+
+void malloc_state::insert_chunk(mchunkptr p, size_t s)
+{
+    if (is_small(s))
+        insert_small_chunk(p, s);
+    else
+    {
+        tchunkptr tp = (tchunkptr)(p);
+        insert_large_chunk(tp, s);
+    }
+}
+
+void malloc_state::unlink_chunk(mchunkptr p, size_t s)
+{
+    if (is_small(s))
+        unlink_small_chunk(p, s);
+    else
+    {
+        tchunkptr tp = (tchunkptr)(p);
+        unlink_large_chunk(tp);
+    }
+}
+
+
+/* -----------------------  Direct-mmapping chunks ----------------------- */
+
+/*
+  Directly mmapped chunks are set up with an offset to the start of
+  the mmapped region stored in the prev_foot field of the chunk. This
+  allows reconstruction of the required argument to MUNMAP when freed,
+  and also allows adjustment of the returned chunk to meet alignment
+  requirements (especially in memalign).
+*/
+
+// Malloc using mmap
+void* malloc_state::mmap_alloc(size_t nb)
+{
+    size_t mmsize = mmap_align(nb + 6 * sizeof(size_t) + spp_chunk_align_mask);
+    if (_footprint_limit != 0)
+    {
+        size_t fp = _footprint + mmsize;
+        if (fp <= _footprint || fp > _footprint_limit)
+            return 0;
+    }
+    if (mmsize > nb)
+    {
+        // Check for wrap around 0
+        char* mm = (char*)(SPP_CALL_DIRECT_MMAP(mmsize));
+        if (mm != cmfail)
+        {
+            size_t offset = align_offset(chunk2mem(mm));
+            size_t psize = mmsize - offset - SPP_MMAP_FOOT_PAD;
+            mchunkptr p = (mchunkptr)(mm + offset);
+            p->_prev_foot = offset;
+            p->_head = psize;
+            mark_inuse_foot(p, psize);
+            p->chunk_plus_offset(psize)->_head = FENCEPOST_HEAD;
+            p->chunk_plus_offset(psize + sizeof(size_t))->_head = 0;
+
+            if (_least_addr == 0 || mm < _least_addr)
+                _least_addr = mm;
+            if ((_footprint += mmsize) > _max_footprint)
+                _max_footprint = _footprint;
+            assert(spp_is_aligned(chunk2mem(p)));
+            check_mmapped_chunk(p);
+            return chunk2mem(p);
+        }
+    }
+    return 0;
+}
+
+// Realloc using mmap
+mchunkptr malloc_state::mmap_resize(mchunkptr oldp, size_t nb, int flags)
+{
+    size_t oldsize = oldp->chunksize();
+    (void)flags;      // placate people compiling -Wunused
+    if (is_small(nb)) // Can't shrink mmap regions below small size
+        return 0;
+
+    // Keep old chunk if big enough but not too big
+    if (oldsize >= nb + sizeof(size_t) &&
+            (oldsize - nb) <= (mparams._granularity << 1))
+        return oldp;
+    else
+    {
+        size_t offset = oldp->_prev_foot;
+        size_t oldmmsize = oldsize + offset + SPP_MMAP_FOOT_PAD;
+        size_t newmmsize = mmap_align(nb + 6 * sizeof(size_t) + spp_chunk_align_mask);
+        char* cp = (char*)SPP_CALL_MREMAP((char*)oldp - offset,
+                                      oldmmsize, newmmsize, flags);
+        if (cp != cmfail)
+        {
+            mchunkptr newp = (mchunkptr)(cp + offset);
+            size_t psize = newmmsize - offset - SPP_MMAP_FOOT_PAD;
+            newp->_head = psize;
+            mark_inuse_foot(newp, psize);
+            newp->chunk_plus_offset(psize)->_head = FENCEPOST_HEAD;
+            newp->chunk_plus_offset(psize + sizeof(size_t))->_head = 0;
+
+            if (cp < _least_addr)
+                _least_addr = cp;
+            if ((_footprint += newmmsize - oldmmsize) > _max_footprint)
+                _max_footprint = _footprint;
+            check_mmapped_chunk(newp);
+            return newp;
+        }
+    }
+    return 0;
+}
+
+
+/* -------------------------- mspace management -------------------------- */
+
+// Initialize top chunk and its size
+void malloc_state::init_top(mchunkptr p, size_t psize)
+{
+    // Ensure alignment
+    size_t offset = align_offset(chunk2mem(p));
+    p = (mchunkptr)((char*)p + offset);
+    psize -= offset;
+
+    _top = p;
+    _topsize = psize;
+    p->_head = psize | PINUSE_BIT;
+    // set size of fake trailing chunk holding overhead space only once
+    p->chunk_plus_offset(psize)->_head = top_foot_size();
+    _trim_check = mparams._trim_threshold; // reset on each update
+}
+
+// Initialize bins for a new mstate that is otherwise zeroed out
+void malloc_state::init_bins()
+{
+    // Establish circular links for smallbins
+    bindex_t i;
+    for (i = 0; i < NSMALLBINS; ++i)
+    {
+        sbinptr bin = smallbin_at(i);
+        bin->_fd = bin->_bk = bin;
+    }
+}
+
+#if SPP_PROCEED_ON_ERROR
+
+// default corruption action
+void malloc_state::reset_on_error()
+{
+    int i;
+    ++malloc_corruption_error_count;
+    // Reinitialize fields to forget about all memory
+    _smallmap = _treemap = 0;
+    _dvsize = _topsize = 0;
+    _seg._base = 0;
+    _seg._size = 0;
+    _seg._next = 0;
+    _top = _dv = 0;
+    for (i = 0; i < NTREEBINS; ++i)
+        *treebin_at(i) = 0;
+    init_bins();
+}
+#endif
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+void* malloc_state::prepend_alloc(char* newbase, char* oldbase, size_t nb)
+{
+    mchunkptr p = align_as_chunk(newbase);
+    mchunkptr oldfirst = align_as_chunk(oldbase);
+    size_t psize = (char*)oldfirst - (char*)p;
+    mchunkptr q = (mchunkptr)p->chunk_plus_offset(nb);
+    size_t qsize = psize - nb;
+    set_size_and_pinuse_of_inuse_chunk(p, nb);
+
+    assert((char*)oldfirst > (char*)q);
+    assert(oldfirst->pinuse());
+    assert(qsize >= MIN_CHUNK_SIZE);
+
+    // consolidate remainder with first chunk of old base
+    if (oldfirst == _top)
+    {
+        size_t tsize = _topsize += qsize;
+        _top = q;
+        q->_head = tsize | PINUSE_BIT;
+        check_top_chunk(q);
+    }
+    else if (oldfirst == _dv)
+    {
+        size_t dsize = _dvsize += qsize;
+        _dv = q;
+        q->set_size_and_pinuse_of_free_chunk(dsize);
+    }
+    else
+    {
+        if (!oldfirst->is_inuse())
+        {
+            size_t nsize = oldfirst->chunksize();
+            unlink_chunk(oldfirst, nsize);
+            oldfirst = (mchunkptr)oldfirst->chunk_plus_offset(nsize);
+            qsize += nsize;
+        }
+        q->set_free_with_pinuse(qsize, oldfirst);
+        insert_chunk(q, qsize);
+        check_free_chunk(q);
+    }
+
+    check_malloced_chunk(chunk2mem(p), nb);
+    return chunk2mem(p);
+}
+
+// Add a segment to hold a new noncontiguous region
+void malloc_state::add_segment(char* tbase, size_t tsize, flag_t mmapped)
+{
+    // Determine locations and sizes of segment, fenceposts, old top
+    char* old_top = (char*)_top;
+    msegmentptr oldsp = segment_holding(old_top);
+    char* old_end = oldsp->_base + oldsp->_size;
+    size_t ssize = pad_request(sizeof(struct malloc_segment));
+    char* rawsp = old_end - (ssize + 4 * sizeof(size_t) + spp_chunk_align_mask);
+    size_t offset = align_offset(chunk2mem(rawsp));
+    char* asp = rawsp + offset;
+    char* csp = (asp < (old_top + MIN_CHUNK_SIZE)) ? old_top : asp;
+    mchunkptr sp = (mchunkptr)csp;
+    msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+    mchunkptr tnext = (mchunkptr)sp->chunk_plus_offset(ssize);
+    mchunkptr p = tnext;
+    int nfences = 0;
+
+    // reset top to new space
+    init_top((mchunkptr)tbase, tsize - top_foot_size());
+
+    // Set up segment record
+    assert(spp_is_aligned(ss));
+    set_size_and_pinuse_of_inuse_chunk(sp, ssize);
+    *ss = _seg; // Push current record
+    _seg._base = tbase;
+    _seg._size = tsize;
+    _seg._sflags = mmapped;
+    _seg._next = ss;
+
+    // Insert trailing fenceposts
+    for (;;)
+    {
+        mchunkptr nextp = (mchunkptr)p->chunk_plus_offset(sizeof(size_t));
+        p->_head = FENCEPOST_HEAD;
+        ++nfences;
+        if ((char*)(&(nextp->_head)) < old_end)
+            p = nextp;
+        else
+            break;
+    }
+    assert(nfences >= 2);
+
+    // Insert the rest of old top into a bin as an ordinary free chunk
+    if (csp != old_top)
+    {
+        mchunkptr q = (mchunkptr)old_top;
+        size_t psize = csp - old_top;
+        mchunkptr tn = (mchunkptr)q->chunk_plus_offset(psize);
+        q->set_free_with_pinuse(psize, tn);
+        insert_chunk(q, psize);
+    }
+
+    check_top_chunk(_top);
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+// Get memory from system using MMAP
+void* malloc_state::sys_alloc(size_t nb)
+{
+    char* tbase = cmfail;
+    size_t tsize = 0;
+    flag_t mmap_flag = 0;
+    size_t asize; // allocation size
+
+    mparams.ensure_initialization();
+
+    // Directly map large chunks, but only if already initialized
+    if (use_mmap() && nb >= mparams._mmap_threshold && _topsize != 0)
+    {
+        void* mem = mmap_alloc(nb);
+        if (mem != 0)
+            return mem;
+    }
+
+    asize = mparams.granularity_align(nb + sys_alloc_padding());
+    if (asize <= nb)
+        return 0; // wraparound
+    if (_footprint_limit != 0)
+    {
+        size_t fp = _footprint + asize;
+        if (fp <= _footprint || fp > _footprint_limit)
+            return 0;
+    }
+
+    /*
+      Try getting memory with a call to MMAP new space (disabled if not SPP_HAVE_MMAP).
+      We need to request enough bytes from system to ensure
+      we can malloc nb bytes upon success, so pad with enough space for
+      top_foot, plus alignment-pad to make sure we don't lose bytes if
+      not on boundary, and round this up to a granularity unit.
+    */
+
+    if (SPP_HAVE_MMAP && tbase == cmfail)
+    {
+        // Try MMAP
+        char* mp = (char*)(SPP_CALL_MMAP(asize));
+        if (mp != cmfail)
+        {
+            tbase = mp;
+            tsize = asize;
+            mmap_flag = USE_MMAP_BIT;
+        }
+    }
+
+    if (tbase != cmfail)
+    {
+
+        if ((_footprint += tsize) > _max_footprint)
+            _max_footprint = _footprint;
+
+        if (!is_initialized())
+        {
+            // first-time initialization
+            if (_least_addr == 0 || tbase < _least_addr)
+                _least_addr = tbase;
+            _seg._base = tbase;
+            _seg._size = tsize;
+            _seg._sflags = mmap_flag;
+            _magic = mparams._magic;
+            _release_checks = SPP_MAX_RELEASE_CHECK_RATE;
+            init_bins();
+
+            // Offset top by embedded malloc_state
+            mchunkptr mn = (mchunkptr)mem2chunk(this)->next_chunk();
+            init_top(mn, (size_t)((tbase + tsize) - (char*)mn) - top_foot_size());
+        }
+
+        else
+        {
+            // Try to merge with an existing segment
+            msegmentptr sp = &_seg;
+            // Only consider most recent segment if traversal suppressed
+            while (sp != 0 && tbase != sp->_base + sp->_size)
+                sp = (SPP_NO_SEGMENT_TRAVERSAL) ? 0 : sp->_next;
+            if (sp != 0 &&
+                    !sp->is_extern_segment() &&
+                    (sp->_sflags & USE_MMAP_BIT) == mmap_flag &&
+                    segment_holds(sp, _top))
+            {
+                // append
+                sp->_size += tsize;
+                init_top(_top, _topsize + tsize);
+            }
+            else
+            {
+                if (tbase < _least_addr)
+                    _least_addr = tbase;
+                sp = &_seg;
+                while (sp != 0 && sp->_base != tbase + tsize)
+                    sp = (SPP_NO_SEGMENT_TRAVERSAL) ? 0 : sp->_next;
+                if (sp != 0 &&
+                        !sp->is_extern_segment() &&
+                        (sp->_sflags & USE_MMAP_BIT) == mmap_flag)
+                {
+                    char* oldbase = sp->_base;
+                    sp->_base = tbase;
+                    sp->_size += tsize;
+                    return prepend_alloc(tbase, oldbase, nb);
+                }
+                else
+                    add_segment(tbase, tsize, mmap_flag);
+            }
+        }
+
+        if (nb < _topsize)
+        {
+            // Allocate from new or extended top space
+            size_t rsize = _topsize -= nb;
+            mchunkptr p = _top;
+            mchunkptr r = _top = (mchunkptr)p->chunk_plus_offset(nb);
+            r->_head = rsize | PINUSE_BIT;
+            set_size_and_pinuse_of_inuse_chunk(p, nb);
+            check_top_chunk(_top);
+            check_malloced_chunk(chunk2mem(p), nb);
+            return chunk2mem(p);
+        }
+    }
+
+    SPP_MALLOC_FAILURE_ACTION;
+    return 0;
+}
+
+/* -----------------------  system deallocation -------------------------- */
+
+// Unmap and unlink any mmapped segments that don't contain used chunks
+size_t malloc_state::release_unused_segments()
+{
+    size_t released = 0;
+    int nsegs = 0;
+    msegmentptr pred = &_seg;
+    msegmentptr sp = pred->_next;
+    while (sp != 0)
+    {
+        char* base = sp->_base;
+        size_t size = sp->_size;
+        msegmentptr next = sp->_next;
+        ++nsegs;
+        if (sp->is_mmapped_segment() && !sp->is_extern_segment())
+        {
+            mchunkptr p = align_as_chunk(base);
+            size_t psize = p->chunksize();
+            // Can unmap if first chunk holds entire segment and not pinned
+            if (!p->is_inuse() && (char*)p + psize >= base + size - top_foot_size())
+            {
+                tchunkptr tp = (tchunkptr)p;
+                assert(segment_holds(sp, p));
+                if (p == _dv)
+                {
+                    _dv = 0;
+                    _dvsize = 0;
+                }
+                else
+                    unlink_large_chunk(tp);
+                if (SPP_CALL_MUNMAP(base, size) == 0)
+                {
+                    released += size;
+                    _footprint -= size;
+                    // unlink obsoleted record
+                    sp = pred;
+                    sp->_next = next;
+                }
+                else
+                {
+                    // back out if cannot unmap
+                    insert_large_chunk(tp, psize);
+                }
+            }
+        }
+        if (SPP_NO_SEGMENT_TRAVERSAL) // scan only first segment
+            break;
+        pred = sp;
+        sp = next;
+    }
+    // Reset check counter
+    _release_checks = (((size_t) nsegs > (size_t) SPP_MAX_RELEASE_CHECK_RATE) ?
+                       (size_t) nsegs : (size_t) SPP_MAX_RELEASE_CHECK_RATE);
+    return released;
+}
+
+int malloc_state::sys_trim(size_t pad)
+{
+    size_t released = 0;
+    mparams.ensure_initialization();
+    if (pad < MAX_REQUEST && is_initialized())
+    {
+        pad += top_foot_size(); // ensure enough room for segment overhead
+
+        if (_topsize > pad)
+        {
+            // Shrink top space in _granularity - size units, keeping at least one
+            size_t unit = mparams._granularity;
+            size_t extra = ((_topsize - pad + (unit - 1)) / unit -
+                            1) * unit;
+            msegmentptr sp = segment_holding((char*)_top);
+
+            if (!sp->is_extern_segment())
+            {
+                if (sp->is_mmapped_segment())
+                {
+                    if (SPP_HAVE_MMAP &&
+                        sp->_size >= extra &&
+                        !has_segment_link(sp))
+                    {
+                        // can't shrink if pinned
+                        size_t newsize = sp->_size - extra;
+                        (void)newsize; // placate people compiling -Wunused-variable
+                        // Prefer mremap, fall back to munmap
+                        if ((SPP_CALL_MREMAP(sp->_base, sp->_size, newsize, 0) != mfail) ||
+                            (SPP_CALL_MUNMAP(sp->_base + newsize, extra) == 0))
+                            released = extra;
+                    }
+                }
+            }
+
+            if (released != 0)
+            {
+                sp->_size -= released;
+                _footprint -= released;
+                init_top(_top, _topsize - released);
+                check_top_chunk(_top);
+            }
+        }
+
+        // Unmap any unused mmapped segments
+        if (SPP_HAVE_MMAP)
+            released += release_unused_segments();
+
+        // On failure, disable autotrim to avoid repeated failed future calls
+        if (released == 0 && _topsize > _trim_check)
+            _trim_check = spp_max_size_t;
+    }
+
+    return (released != 0) ? 1 : 0;
+}
+
+/* Consolidate and bin a chunk. Differs from exported versions
+   of free mainly in that the chunk need not be marked as inuse.
+*/
+void malloc_state::dispose_chunk(mchunkptr p, size_t psize)
+{
+    mchunkptr next = (mchunkptr)p->chunk_plus_offset(psize);
+    if (!p->pinuse())
+    {
+        mchunkptr prev;
+        size_t prevsize = p->_prev_foot;
+        if (p->is_mmapped())
+        {
+            psize += prevsize + SPP_MMAP_FOOT_PAD;
+            if (SPP_CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+                _footprint -= psize;
+            return;
+        }
+        prev = (mchunkptr)p->chunk_minus_offset(prevsize);
+        psize += prevsize;
+        p = prev;
+        if (rtcheck(ok_address(prev)))
+        {
+            // consolidate backward
+            if (p != _dv)
+                unlink_chunk(p, prevsize);
+            else if ((next->_head & INUSE_BITS) == INUSE_BITS)
+            {
+                _dvsize = psize;
+                p->set_free_with_pinuse(psize, next);
+                return;
+            }
+        }
+        else
+        {
+            SPP_ABORT;
+            return;
+        }
+    }
+    if (rtcheck(ok_address(next)))
+    {
+        if (!next->cinuse())
+        {
+            // consolidate forward
+            if (next == _top)
+            {
+                size_t tsize = _topsize += psize;
+                _top = p;
+                p->_head = tsize | PINUSE_BIT;
+                if (p == _dv)
+                {
+                    _dv = 0;
+                    _dvsize = 0;
+                }
+                return;
+            }
+            else if (next == _dv)
+            {
+                size_t dsize = _dvsize += psize;
+                _dv = p;
+                p->set_size_and_pinuse_of_free_chunk(dsize);
+                return;
+            }
+            else
+            {
+                size_t nsize = next->chunksize();
+                psize += nsize;
+                unlink_chunk(next, nsize);
+                p->set_size_and_pinuse_of_free_chunk(psize);
+                if (p == _dv)
+                {
+                    _dvsize = psize;
+                    return;
+                }
+            }
+        }
+        else
+            p->set_free_with_pinuse(psize, next);
+        insert_chunk(p, psize);
+    }
+    else
+        SPP_ABORT;
+}
+
+/* ---------------------------- malloc --------------------------- */
+
+// allocate a large request from the best fitting chunk in a treebin
+void* malloc_state::tmalloc_large(size_t nb)
+{
+    tchunkptr v = 0;
+    size_t rsize = -nb; // Unsigned negation
+    tchunkptr t;
+    bindex_t idx = compute_tree_index(nb);
+    if ((t = *treebin_at(idx)) != 0)
+    {
+        // Traverse tree for this bin looking for node with size == nb
+        size_t sizebits = nb << leftshift_for_tree_index(idx);
+        tchunkptr rst = 0;  // The deepest untaken right subtree
+        for (;;)
+        {
+            tchunkptr rt;
+            size_t trem = t->chunksize() - nb;
+            if (trem < rsize)
+            {
+                v = t;
+                if ((rsize = trem) == 0)
+                    break;
+            }
+            rt = t->_child[1];
+            t = t->_child[(sizebits >> (spp_size_t_bitsize - 1)) & 1];
+            if (rt != 0 && rt != t)
+                rst = rt;
+            if (t == 0)
+            {
+                t = rst; // set t to least subtree holding sizes > nb
+                break;
+            }
+            sizebits <<= 1;
+        }
+    }
+    if (t == 0 && v == 0)
+    {
+        // set t to root of next non-empty treebin
+        binmap_t leftbits = left_bits(idx2bit(idx)) & _treemap;
+        if (leftbits != 0)
+        {
+            binmap_t leastbit = least_bit(leftbits);
+            bindex_t i = compute_bit2idx(leastbit);
+            t = *treebin_at(i);
+        }
+    }
+
+    while (t != 0)
+    {
+        // find smallest of tree or subtree
+        size_t trem = t->chunksize() - nb;
+        if (trem < rsize)
+        {
+            rsize = trem;
+            v = t;
+        }
+        t = t->leftmost_child();
+    }
+
+    //  If dv is a better fit, return 0 so malloc will use it
+    if (v != 0 && rsize < (size_t)(_dvsize - nb))
+    {
+        if (rtcheck(ok_address(v)))
+        {
+            // split
+            mchunkptr r = (mchunkptr)v->chunk_plus_offset(nb);
+            assert(v->chunksize() == rsize + nb);
+            if (rtcheck(ok_next(v, r)))
+            {
+                unlink_large_chunk(v);
+                if (rsize < MIN_CHUNK_SIZE)
+                    set_inuse_and_pinuse(v, (rsize + nb));
+                else
+                {
+                    set_size_and_pinuse_of_inuse_chunk(v, nb);
+                    r->set_size_and_pinuse_of_free_chunk(rsize);
+                    insert_chunk(r, rsize);
+                }
+                return chunk2mem(v);
+            }
+        }
+        SPP_ABORT;
+    }
+    return 0;
+}
+
+// allocate a small request from the best fitting chunk in a treebin
+void* malloc_state::tmalloc_small(size_t nb)
+{
+    tchunkptr t, v;
+    size_t rsize;
+    binmap_t leastbit = least_bit(_treemap);
+    bindex_t i = compute_bit2idx(leastbit);
+    v = t = *treebin_at(i);
+    rsize = t->chunksize() - nb;
+
+    while ((t = t->leftmost_child()) != 0)
+    {
+        size_t trem = t->chunksize() - nb;
+        if (trem < rsize)
+        {
+            rsize = trem;
+            v = t;
+        }
+    }
+
+    if (rtcheck(ok_address(v)))
+    {
+        mchunkptr r = (mchunkptr)v->chunk_plus_offset(nb);
+        assert(v->chunksize() == rsize + nb);
+        if (rtcheck(ok_next(v, r)))
+        {
+            unlink_large_chunk(v);
+            if (rsize < MIN_CHUNK_SIZE)
+                set_inuse_and_pinuse(v, (rsize + nb));
+            else
+            {
+                set_size_and_pinuse_of_inuse_chunk(v, nb);
+                r->set_size_and_pinuse_of_free_chunk(rsize);
+                replace_dv(r, rsize);
+            }
+            return chunk2mem(v);
+        }
+    }
+
+    SPP_ABORT;
+    return 0;
+}
+
+/* ---------------------------- malloc --------------------------- */
+
+void* malloc_state::_malloc(size_t bytes)
+{
+    if (1)
+    {
+        void* mem;
+        size_t nb;
+        if (bytes <= MAX_SMALL_REQUEST)
+        {
+            bindex_t idx;
+            binmap_t smallbits;
+            nb = (bytes < MIN_REQUEST) ? MIN_CHUNK_SIZE : pad_request(bytes);
+            idx = small_index(nb);
+            smallbits = _smallmap >> idx;
+
+            if ((smallbits & 0x3U) != 0)
+            {
+                // Remainderless fit to a smallbin.
+                mchunkptr b, p;
+                idx += ~smallbits & 1;       // Uses next bin if idx empty
+                b = smallbin_at(idx);
+                p = b->_fd;
+                assert(p->chunksize() == small_index2size(idx));
+                unlink_first_small_chunk(b, p, idx);
+                set_inuse_and_pinuse(p, small_index2size(idx));
+                mem = chunk2mem(p);
+                check_malloced_chunk(mem, nb);
+                goto postaction;
+            }
+
+            else if (nb > _dvsize)
+            {
+                if (smallbits != 0)
+                {
+                    // Use chunk in next nonempty smallbin
+                    mchunkptr b, p, r;
+                    size_t rsize;
+                    binmap_t leftbits = (smallbits << idx) & left_bits(malloc_state::idx2bit(idx));
+                    binmap_t leastbit = least_bit(leftbits);
+                    bindex_t i = compute_bit2idx(leastbit);
+                    b = smallbin_at(i);
+                    p = b->_fd;
+                    assert(p->chunksize() == small_index2size(i));
+                    unlink_first_small_chunk(b, p, i);
+                    rsize = small_index2size(i) - nb;
+                    // Fit here cannot be remainderless if 4byte sizes
+                    if (sizeof(size_t) != 4 && rsize < MIN_CHUNK_SIZE)
+                        set_inuse_and_pinuse(p, small_index2size(i));
+                    else
+                    {
+                        set_size_and_pinuse_of_inuse_chunk(p, nb);
+                        r = (mchunkptr)p->chunk_plus_offset(nb);
+                        r->set_size_and_pinuse_of_free_chunk(rsize);
+                        replace_dv(r, rsize);
+                    }
+                    mem = chunk2mem(p);
+                    check_malloced_chunk(mem, nb);
+                    goto postaction;
+                }
+
+                else if (_treemap != 0 && (mem = tmalloc_small(nb)) != 0)
+                {
+                    check_malloced_chunk(mem, nb);
+                    goto postaction;
+                }
+            }
+        }
+        else if (bytes >= MAX_REQUEST)
+            nb = spp_max_size_t; // Too big to allocate. Force failure (in sys alloc)
+        else
+        {
+            nb = pad_request(bytes);
+            if (_treemap != 0 && (mem = tmalloc_large(nb)) != 0)
+            {
+                check_malloced_chunk(mem, nb);
+                goto postaction;
+            }
+        }
+
+        if (nb <= _dvsize)
+        {
+            size_t rsize = _dvsize - nb;
+            mchunkptr p = _dv;
+            if (rsize >= MIN_CHUNK_SIZE)
+            {
+                // split dv
+                mchunkptr r = _dv = (mchunkptr)p->chunk_plus_offset(nb);
+                _dvsize = rsize;
+                r->set_size_and_pinuse_of_free_chunk(rsize);
+                set_size_and_pinuse_of_inuse_chunk(p, nb);
+            }
+            else   // exhaust dv
+            {
+                size_t dvs = _dvsize;
+                _dvsize = 0;
+                _dv = 0;
+                set_inuse_and_pinuse(p, dvs);
+            }
+            mem = chunk2mem(p);
+            check_malloced_chunk(mem, nb);
+            goto postaction;
+        }
+
+        else if (nb < _topsize)
+        {
+            // Split top
+            size_t rsize = _topsize -= nb;
+            mchunkptr p = _top;
+            mchunkptr r = _top = (mchunkptr)p->chunk_plus_offset(nb);
+            r->_head = rsize | PINUSE_BIT;
+            set_size_and_pinuse_of_inuse_chunk(p, nb);
+            mem = chunk2mem(p);
+            check_top_chunk(_top);
+            check_malloced_chunk(mem, nb);
+            goto postaction;
+        }
+
+        mem = sys_alloc(nb);
+
+postaction:
+        return mem;
+    }
+
+    return 0;
+}
+
+/* ---------------------------- free --------------------------- */
+
+void malloc_state::_free(mchunkptr p)
+{
+    if (1)
+    {
+        check_inuse_chunk(p);
+        if (rtcheck(ok_address(p) && ok_inuse(p)))
+        {
+            size_t psize = p->chunksize();
+            mchunkptr next = (mchunkptr)p->chunk_plus_offset(psize);
+            if (!p->pinuse())
+            {
+                size_t prevsize = p->_prev_foot;
+                if (p->is_mmapped())
+                {
+                    psize += prevsize + SPP_MMAP_FOOT_PAD;
+                    if (SPP_CALL_MUNMAP((char*)p - prevsize, psize) == 0)
+                        _footprint -= psize;
+                    goto postaction;
+                }
+                else
+                {
+                    mchunkptr prev = (mchunkptr)p->chunk_minus_offset(prevsize);
+                    psize += prevsize;
+                    p = prev;
+                    if (rtcheck(ok_address(prev)))
+                    {
+                        // consolidate backward
+                        if (p != _dv)
+                            unlink_chunk(p, prevsize);
+                        else if ((next->_head & INUSE_BITS) == INUSE_BITS)
+                        {
+                            _dvsize = psize;
+                            p->set_free_with_pinuse(psize, next);
+                            goto postaction;
+                        }
+                    }
+                    else
+                        goto erroraction;
+                }
+            }
+
+            if (rtcheck(ok_next(p, next) && ok_pinuse(next)))
+            {
+                if (!next->cinuse())
+                {
+                    // consolidate forward
+                    if (next == _top)
+                    {
+                        size_t tsize = _topsize += psize;
+                        _top = p;
+                        p->_head = tsize | PINUSE_BIT;
+                        if (p == _dv)
+                        {
+                            _dv = 0;
+                            _dvsize = 0;
+                        }
+                        if (should_trim(tsize))
+                            sys_trim(0);
+                        goto postaction;
+                    }
+                    else if (next == _dv)
+                    {
+                        size_t dsize = _dvsize += psize;
+                        _dv = p;
+                        p->set_size_and_pinuse_of_free_chunk(dsize);
+                        goto postaction;
+                    }
+                    else
+                    {
+                        size_t nsize = next->chunksize();
+                        psize += nsize;
+                        unlink_chunk(next, nsize);
+                        p->set_size_and_pinuse_of_free_chunk(psize);
+                        if (p == _dv)
+                        {
+                            _dvsize = psize;
+                            goto postaction;
+                        }
+                    }
+                }
+                else
+                    p->set_free_with_pinuse(psize, next);
+
+                if (is_small(psize))
+                {
+                    insert_small_chunk(p, psize);
+                    check_free_chunk(p);
+                }
+                else
+                {
+                    tchunkptr tp = (tchunkptr)p;
+                    insert_large_chunk(tp, psize);
+                    check_free_chunk(p);
+                    if (--_release_checks == 0)
+                        release_unused_segments();
+                }
+                goto postaction;
+            }
+        }
+erroraction:
+        SPP_USAGE_ERROR_ACTION(this, p);
+postaction:
+        ;
+    }
+}
+
+/* ------------ Internal support for realloc, memalign, etc -------------- */
+
+// Try to realloc; only in-place unless can_move true
+mchunkptr malloc_state::try_realloc_chunk(mchunkptr p, size_t nb, int can_move)
+{
+    mchunkptr newp = 0;
+    size_t oldsize = p->chunksize();
+    mchunkptr next = (mchunkptr)p->chunk_plus_offset(oldsize);
+    if (rtcheck(ok_address(p) && ok_inuse(p) &&
+                ok_next(p, next) && ok_pinuse(next)))
+    {
+        if (p->is_mmapped())
+            newp = mmap_resize(p, nb, can_move);
+        else if (oldsize >= nb)
+        {
+            // already big enough
+            size_t rsize = oldsize - nb;
+            if (rsize >= MIN_CHUNK_SIZE)
+            {
+                // split off remainder
+                mchunkptr r = (mchunkptr)p->chunk_plus_offset(nb);
+                set_inuse(p, nb);
+                set_inuse(r, rsize);
+                dispose_chunk(r, rsize);
+            }
+            newp = p;
+        }
+        else if (next == _top)
+        {
+            // extend into top
+            if (oldsize + _topsize > nb)
+            {
+                size_t newsize = oldsize + _topsize;
+                size_t newtopsize = newsize - nb;
+                mchunkptr newtop = (mchunkptr)p->chunk_plus_offset(nb);
+                set_inuse(p, nb);
+                newtop->_head = newtopsize | PINUSE_BIT;
+                _top = newtop;
+                _topsize = newtopsize;
+                newp = p;
+            }
+        }
+        else if (next == _dv)
+        {
+            // extend into dv
+            size_t dvs = _dvsize;
+            if (oldsize + dvs >= nb)
+            {
+                size_t dsize = oldsize + dvs - nb;
+                if (dsize >= MIN_CHUNK_SIZE)
+                {
+                    mchunkptr r = (mchunkptr)p->chunk_plus_offset(nb);
+                    mchunkptr n = (mchunkptr)r->chunk_plus_offset(dsize);
+                    set_inuse(p, nb);
+                    r->set_size_and_pinuse_of_free_chunk(dsize);
+                    n->clear_pinuse();
+                    _dvsize = dsize;
+                    _dv = r;
+                }
+                else
+                {
+                    // exhaust dv
+                    size_t newsize = oldsize + dvs;
+                    set_inuse(p, newsize);
+                    _dvsize = 0;
+                    _dv = 0;
+                }
+                newp = p;
+            }
+        }
+        else if (!next->cinuse())
+        {
+            // extend into next free chunk
+            size_t nextsize = next->chunksize();
+            if (oldsize + nextsize >= nb)
+            {
+                size_t rsize = oldsize + nextsize - nb;
+                unlink_chunk(next, nextsize);
+                if (rsize < MIN_CHUNK_SIZE)
+                {
+                    size_t newsize = oldsize + nextsize;
+                    set_inuse(p, newsize);
+                }
+                else
+                {
+                    mchunkptr r = (mchunkptr)p->chunk_plus_offset(nb);
+                    set_inuse(p, nb);
+                    set_inuse(r, rsize);
+                    dispose_chunk(r, rsize);
+                }
+                newp = p;
+            }
+        }
+    }
+    else
+        SPP_USAGE_ERROR_ACTION(m, chunk2mem(p));
+    return newp;
+}
+
+void* malloc_state::internal_memalign(size_t alignment, size_t bytes)
+{
+    void* mem = 0;
+    if (alignment < MIN_CHUNK_SIZE) // must be at least a minimum chunk size
+        alignment = MIN_CHUNK_SIZE;
+    if ((alignment & (alignment - 1)) != 0)
+    {
+        // Ensure a power of 2
+        size_t a = SPP_MALLOC_ALIGNMENT << 1;
+        while (a < alignment)
+            a <<= 1;
+        alignment = a;
+    }
+    if (bytes >= MAX_REQUEST - alignment)
+        SPP_MALLOC_FAILURE_ACTION;
+    else
+    {
+        size_t nb = request2size(bytes);
+        size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
+        mem = internal_malloc(req);
+        if (mem != 0)
+        {
+            mchunkptr p = mem2chunk(mem);
+            if ((((size_t)(mem)) & (alignment - 1)) != 0)
+            {
+                // misaligned
+                /*
+                  Find an aligned spot inside chunk.  Since we need to give
+                  back leading space in a chunk of at least MIN_CHUNK_SIZE, if
+                  the first calculation places us at a spot with less than
+                  MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
+                  We've allocated enough total room so that this is always
+                  possible.
+                */
+                char* br = (char*)mem2chunk((void *)(((size_t)((char*)mem + alignment - 1)) &
+                                                     -alignment));
+                char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE) ?
+                            br : br + alignment;
+                mchunkptr newp = (mchunkptr)pos;
+                size_t leadsize = pos - (char*)(p);
+                size_t newsize = p->chunksize() - leadsize;
+
+                if (p->is_mmapped())
+                {
+                    // For mmapped chunks, just adjust offset
+                    newp->_prev_foot = p->_prev_foot + leadsize;
+                    newp->_head = newsize;
+                }
+                else
+                {
+                    // Otherwise, give back leader, use the rest
+                    set_inuse(newp, newsize);
+                    set_inuse(p, leadsize);
+                    dispose_chunk(p, leadsize);
+                }
+                p = newp;
+            }
+
+            // Give back spare room at the end
+            if (!p->is_mmapped())
+            {
+                size_t size = p->chunksize();
+                if (size > nb + MIN_CHUNK_SIZE)
+                {
+                    size_t remainder_size = size - nb;
+                    mchunkptr remainder = (mchunkptr)p->chunk_plus_offset(nb);
+                    set_inuse(p, nb);
+                    set_inuse(remainder, remainder_size);
+                    dispose_chunk(remainder, remainder_size);
+                }
+            }
+
+            mem = chunk2mem(p);
+            assert(p->chunksize() >= nb);
+            assert(((size_t)mem & (alignment - 1)) == 0);
+            check_inuse_chunk(p);
+        }
+    }
+    return mem;
+}
+
+/*
+  Common support for independent_X routines, handling
+    all of the combinations that can result.
+  The opts arg has:
+    bit 0 set if all elements are same size (using sizes[0])
+    bit 1 set if elements should be zeroed
+*/
+void** malloc_state::ialloc(size_t n_elements, size_t* sizes, int opts,
+                            void* chunks[])
+{
+
+    size_t    element_size;   // chunksize of each element, if all same
+    size_t    contents_size;  // total size of elements
+    size_t    array_size;     // request size of pointer array
+    void*     mem;            // malloced aggregate space
+    mchunkptr p;              // corresponding chunk
+    size_t    remainder_size; // remaining bytes while splitting
+    void**    marray;         // either "chunks" or malloced ptr array
+    mchunkptr array_chunk;    // chunk for malloced ptr array
+    flag_t    was_enabled;    // to disable mmap
+    size_t    size;
+    size_t    i;
+
+    mparams.ensure_initialization();
+    // compute array length, if needed
+    if (chunks != 0)
+    {
+        if (n_elements == 0)
+            return chunks; // nothing to do
+        marray = chunks;
+        array_size = 0;
+    }
+    else
+    {
+        // if empty req, must still return chunk representing empty array
+        if (n_elements == 0)
+            return (void**)internal_malloc(0);
+        marray = 0;
+        array_size = request2size(n_elements * (sizeof(void*)));
+    }
+
+    // compute total element size
+    if (opts & 0x1)
+    {
+        // all-same-size
+        element_size = request2size(*sizes);
+        contents_size = n_elements * element_size;
+    }
+    else
+    {
+        // add up all the sizes
+        element_size = 0;
+        contents_size = 0;
+        for (i = 0; i != n_elements; ++i)
+            contents_size += request2size(sizes[i]);
+    }
+
+    size = contents_size + array_size;
+
+    /*
+      Allocate the aggregate chunk.  First disable direct-mmapping so
+      malloc won't use it, since we would not be able to later
+      free/realloc space internal to a segregated mmap region.
+    */
+    was_enabled = use_mmap();
+    disable_mmap();
+    mem = internal_malloc(size - CHUNK_OVERHEAD);
+    if (was_enabled)
+        enable_mmap();
+    if (mem == 0)
+        return 0;
+
+    p = mem2chunk(mem);
+    remainder_size = p->chunksize();
+
+    assert(!p->is_mmapped());
+
+    if (opts & 0x2)
+    {
+        // optionally clear the elements
+        memset((size_t*)mem, 0, remainder_size - sizeof(size_t) - array_size);
+    }
+
+    // If not provided, allocate the pointer array as final part of chunk
+    if (marray == 0)
+    {
+        size_t  array_chunk_size;
+        array_chunk = (mchunkptr)p->chunk_plus_offset(contents_size);
+        array_chunk_size = remainder_size - contents_size;
+        marray = (void**)(chunk2mem(array_chunk));
+        set_size_and_pinuse_of_inuse_chunk(array_chunk, array_chunk_size);
+        remainder_size = contents_size;
+    }
+
+    // split out elements
+    for (i = 0; ; ++i)
+    {
+        marray[i] = chunk2mem(p);
+        if (i != n_elements - 1)
+        {
+            if (element_size != 0)
+                size = element_size;
+            else
+                size = request2size(sizes[i]);
+            remainder_size -= size;
+            set_size_and_pinuse_of_inuse_chunk(p, size);
+            p = (mchunkptr)p->chunk_plus_offset(size);
+        }
+        else
+        {
+            // the final element absorbs any overallocation slop
+            set_size_and_pinuse_of_inuse_chunk(p, remainder_size);
+            break;
+        }
+    }
+
+#if SPP_DEBUG
+    if (marray != chunks)
+    {
+        // final element must have exactly exhausted chunk
+        if (element_size != 0)
+            assert(remainder_size == element_size);
+        else
+            assert(remainder_size == request2size(sizes[i]));
+        check_inuse_chunk(mem2chunk(marray));
+    }
+    for (i = 0; i != n_elements; ++i)
+        check_inuse_chunk(mem2chunk(marray[i]));
+
+#endif
+
+    return marray;
+}
+
+/* Try to free all pointers in the given array.
+   Note: this could be made faster, by delaying consolidation,
+   at the price of disabling some user integrity checks, We
+   still optimize some consolidations by combining adjacent
+   chunks before freeing, which will occur often if allocated
+   with ialloc or the array is sorted.
+*/
+size_t malloc_state::internal_bulk_free(void* array[], size_t nelem)
+{
+    size_t unfreed = 0;
+    if (1)
+    {
+        void** a;
+        void** fence = &(array[nelem]);
+        for (a = array; a != fence; ++a)
+        {
+            void* mem = *a;
+            if (mem != 0)
+            {
+                mchunkptr p = mem2chunk(mem);
+                size_t psize = p->chunksize();
+#if SPP_FOOTERS
+                if (get_mstate_for(p) != m)
+                {
+                    ++unfreed;
+                    continue;
+                }
+#endif
+                check_inuse_chunk(p);
+                *a = 0;
+                if (rtcheck(ok_address(p) && ok_inuse(p)))
+                {
+                    void ** b = a + 1; // try to merge with next chunk
+                    mchunkptr next = (mchunkptr)p->next_chunk();
+                    if (b != fence && *b == chunk2mem(next))
+                    {
+                        size_t newsize = next->chunksize() + psize;
+                        set_inuse(p, newsize);
+                        *b = chunk2mem(p);
+                    }
+                    else
+                        dispose_chunk(p, psize);
+                }
+                else
+                {
+                    SPP_ABORT;
+                    break;
+                }
+            }
+        }
+        if (should_trim(_topsize))
+            sys_trim(0);
+    }
+    return unfreed;
+}
+
+void malloc_state::init(char* tbase, size_t tsize)
+{
+    _seg._base = _least_addr = tbase;
+    _seg._size = _footprint = _max_footprint = tsize;
+    _magic    = mparams._magic;
+    _release_checks = SPP_MAX_RELEASE_CHECK_RATE;
+    _mflags   = mparams._default_mflags;
+    _extp     = 0;
+    _exts     = 0;
+    disable_contiguous();
+    init_bins();
+    mchunkptr mn = (mchunkptr)mem2chunk(this)->next_chunk();
+    init_top(mn, (size_t)((tbase + tsize) - (char*)mn) - top_foot_size());
+    check_top_chunk(_top);
+}
+
+/* Traversal */
+#if SPP_MALLOC_INSPECT_ALL
+void malloc_state::internal_inspect_all(void(*handler)(void *start, void *end,
+                                        size_t used_bytes,
+                                        void* callback_arg),
+                                        void* arg)
+{
+    if (is_initialized())
+    {
+        mchunkptr top = top;
+        msegmentptr s;
+        for (s = &seg; s != 0; s = s->next)
+        {
+            mchunkptr q = align_as_chunk(s->base);
+            while (segment_holds(s, q) && q->head != FENCEPOST_HEAD)
+            {
+                mchunkptr next = (mchunkptr)q->next_chunk();
+                size_t sz = q->chunksize();
+                size_t used;
+                void* start;
+                if (q->is_inuse())
+                {
+                    used = sz - CHUNK_OVERHEAD; // must not be mmapped
+                    start = chunk2mem(q);
+                }
+                else
+                {
+                    used = 0;
+                    if (is_small(sz))
+                    {
+                        // offset by possible bookkeeping
+                        start = (void*)((char*)q + sizeof(struct malloc_chunk));
+                    }
+                    else
+                        start = (void*)((char*)q + sizeof(struct malloc_tree_chunk));
+                }
+                if (start < (void*)next)  // skip if all space is bookkeeping
+                    handler(start, next, used, arg);
+                if (q == top)
+                    break;
+                q = next;
+            }
+        }
+    }
+}
+#endif // SPP_MALLOC_INSPECT_ALL
+
+
+
+/* ----------------------------- user mspaces ---------------------------- */
+
+static mstate init_user_mstate(char* tbase, size_t tsize)
+{
+    size_t msize = pad_request(sizeof(malloc_state));
+    mchunkptr msp = align_as_chunk(tbase);
+    mstate m = (mstate)(chunk2mem(msp));
+    memset(m, 0, msize);
+    msp->_head = (msize | INUSE_BITS);
+    m->init(tbase, tsize);
+    return m;
+}
+
+SPP_API mspace create_mspace(size_t capacity, int locked)
+{
+    mstate m = 0;
+    size_t msize;
+    mparams.ensure_initialization();
+    msize = pad_request(sizeof(malloc_state));
+    if (capacity < (size_t) - (msize + top_foot_size() + mparams._page_size))
+    {
+        size_t rs = ((capacity == 0) ? mparams._granularity :
+                     (capacity + top_foot_size() + msize));
+        size_t tsize = mparams.granularity_align(rs);
+        char* tbase = (char*)(SPP_CALL_MMAP(tsize));
+        if (tbase != cmfail)
+        {
+            m = init_user_mstate(tbase, tsize);
+            m->_seg._sflags = USE_MMAP_BIT;
+            m->set_lock(locked);
+        }
+    }
+    return (mspace)m;
+}
+
+SPP_API size_t destroy_mspace(mspace msp)
+{
+    size_t freed = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic())
+    {
+        msegmentptr sp = &ms->_seg;
+        while (sp != 0)
+        {
+            char* base = sp->_base;
+            size_t size = sp->_size;
+            flag_t flag = sp->_sflags;
+            (void)base; // placate people compiling -Wunused-variable
+            sp = sp->_next;
+            if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
+                SPP_CALL_MUNMAP(base, size) == 0)
+                freed += size;
+        }
+    }
+    else
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+    return freed;
+}
+
+/* ----------------------------  mspace versions of malloc/calloc/free routines -------------------- */
+SPP_API void* mspace_malloc(mspace msp, size_t bytes)
+{
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic())
+    {
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+        return 0;
+    }
+    return ms->_malloc(bytes);
+}
+
+SPP_API void mspace_free(mspace msp, void* mem)
+{
+    if (mem != 0)
+    {
+        mchunkptr p  = mem2chunk(mem);
+#if SPP_FOOTERS
+        mstate fm = get_mstate_for(p);
+        (void)msp; // placate people compiling -Wunused
+#else
+        mstate fm = (mstate)msp;
+#endif
+        if (!fm->ok_magic())
+        {
+            SPP_USAGE_ERROR_ACTION(fm, p);
+            return;
+        }
+        fm->_free(p);
+    }
+}
+
+SPP_API inline void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size)
+{
+    void* mem;
+    size_t req = 0;
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic())
+    {
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+        return 0;
+    }
+    if (n_elements != 0)
+    {
+        req = n_elements * elem_size;
+        if (((n_elements | elem_size) & ~(size_t)0xffff) &&
+                (req / n_elements != elem_size))
+            req = spp_max_size_t; // force downstream failure on overflow
+    }
+    mem = ms->internal_malloc(req);
+    if (mem != 0 && mem2chunk(mem)->calloc_must_clear())
+        memset(mem, 0, req);
+    return mem;
+}
+
+SPP_API inline void* mspace_realloc(mspace msp, void* oldmem, size_t bytes)
+{
+    void* mem = 0;
+    if (oldmem == 0)
+        mem = mspace_malloc(msp, bytes);
+    else if (bytes >= MAX_REQUEST)
+        SPP_MALLOC_FAILURE_ACTION;
+#ifdef REALLOC_ZERO_BYTES_FREES
+    else if (bytes == 0)
+        mspace_free(msp, oldmem);
+#endif
+    else
+    {
+        size_t nb = request2size(bytes);
+        mchunkptr oldp = mem2chunk(oldmem);
+#if ! SPP_FOOTERS
+        mstate m = (mstate)msp;
+#else
+        mstate m = get_mstate_for(oldp);
+        if (!m->ok_magic())
+        {
+            SPP_USAGE_ERROR_ACTION(m, oldmem);
+            return 0;
+        }
+#endif
+        if (1)
+        {
+            mchunkptr newp = m->try_realloc_chunk(oldp, nb, 1);
+            if (newp != 0)
+            {
+                m->check_inuse_chunk(newp);
+                mem = chunk2mem(newp);
+            }
+            else
+            {
+                mem = mspace_malloc(m, bytes);
+                if (mem != 0)
+                {
+                    size_t oc = oldp->chunksize() - oldp->overhead_for();
+                    memcpy(mem, oldmem, (oc < bytes) ? oc : bytes);
+                    mspace_free(m, oldmem);
+                }
+            }
+        }
+    }
+    return mem;
+}
+
+#if 0
+
+SPP_API mspace create_mspace_with_base(void* base, size_t capacity, int locked)
+{
+    mstate m = 0;
+    size_t msize;
+    mparams.ensure_initialization();
+    msize = pad_request(sizeof(malloc_state));
+    if (capacity > msize + top_foot_size() &&
+        capacity < (size_t) - (msize + top_foot_size() + mparams._page_size))
+    {
+        m = init_user_mstate((char*)base, capacity);
+        m->_seg._sflags = EXTERN_BIT;
+        m->set_lock(locked);
+    }
+    return (mspace)m;
+}
+
+SPP_API int mspace_track_large_chunks(mspace msp, int enable)
+{
+    int ret = 0;
+    mstate ms = (mstate)msp;
+    if (1)
+    {
+        if (!ms->use_mmap())
+            ret = 1;
+        if (!enable)
+            ms->enable_mmap();
+        else
+            ms->disable_mmap();
+    }
+    return ret;
+}
+
+SPP_API void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes)
+{
+    void* mem = 0;
+    if (oldmem != 0)
+    {
+        if (bytes >= MAX_REQUEST)
+            SPP_MALLOC_FAILURE_ACTION;
+        else
+        {
+            size_t nb = request2size(bytes);
+            mchunkptr oldp = mem2chunk(oldmem);
+#if ! SPP_FOOTERS
+            mstate m = (mstate)msp;
+#else
+            mstate m = get_mstate_for(oldp);
+            (void)msp; // placate people compiling -Wunused
+            if (!m->ok_magic())
+            {
+                SPP_USAGE_ERROR_ACTION(m, oldmem);
+                return 0;
+            }
+#endif
+            if (1)
+            {
+                mchunkptr newp = m->try_realloc_chunk(oldp, nb, 0);
+                if (newp == oldp)
+                {
+                    m->check_inuse_chunk(newp);
+                    mem = oldmem;
+                }
+            }
+        }
+    }
+    return mem;
+}
+
+SPP_API void* mspace_memalign(mspace msp, size_t alignment, size_t bytes)
+{
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic())
+    {
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+        return 0;
+    }
+    if (alignment <= SPP_MALLOC_ALIGNMENT)
+        return mspace_malloc(msp, bytes);
+    return ms->internal_memalign(alignment, bytes);
+}
+
+SPP_API void** mspace_independent_calloc(mspace msp, size_t n_elements,
+                                        size_t elem_size, void* chunks[])
+{
+    size_t sz = elem_size; // serves as 1-element array
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic())
+    {
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+        return 0;
+    }
+    return ms->ialloc(n_elements, &sz, 3, chunks);
+}
+
+SPP_API void** mspace_independent_comalloc(mspace msp, size_t n_elements,
+                                          size_t sizes[], void* chunks[])
+{
+    mstate ms = (mstate)msp;
+    if (!ms->ok_magic())
+    {
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+        return 0;
+    }
+    return ms->ialloc(n_elements, sizes, 0, chunks);
+}
+
+#endif
+
+SPP_API inline size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem)
+{
+    return ((mstate)msp)->internal_bulk_free(array, nelem);
+}
+
+#if SPP_MALLOC_INSPECT_ALL
+SPP_API void mspace_inspect_all(mspace msp,
+                                void(*handler)(void *start,
+                                               void *end,
+                                               size_t used_bytes,
+                                               void* callback_arg),
+                                void* arg)
+{
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic())
+        internal_inspect_all(ms, handler, arg);
+    else
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+}
+#endif
+
+SPP_API inline int mspace_trim(mspace msp, size_t pad)
+{
+    int result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic())
+        result = ms->sys_trim(pad);
+    else
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+    return result;
+}
+
+SPP_API inline size_t mspace_footprint(mspace msp)
+{
+    size_t result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic())
+        result = ms->_footprint;
+    else
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+    return result;
+}
+
+SPP_API inline size_t mspace_max_footprint(mspace msp)
+{
+    size_t result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic())
+        result = ms->_max_footprint;
+    else
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+    return result;
+}
+
+SPP_API inline size_t mspace_footprint_limit(mspace msp)
+{
+    size_t result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic())
+    {
+        size_t maf = ms->_footprint_limit;
+        result = (maf == 0) ? spp_max_size_t : maf;
+    }
+    else
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+    return result;
+}
+
+SPP_API inline size_t mspace_set_footprint_limit(mspace msp, size_t bytes)
+{
+    size_t result = 0;
+    mstate ms = (mstate)msp;
+    if (ms->ok_magic())
+    {
+        if (bytes == 0)
+            result = mparams.granularity_align(1); // Use minimal size
+        if (bytes == spp_max_size_t)
+            result = 0;                    // disable
+        else
+            result = mparams.granularity_align(bytes);
+        ms->_footprint_limit = result;
+    }
+    else
+        SPP_USAGE_ERROR_ACTION(ms, ms);
+    return result;
+}
+
+SPP_API inline size_t mspace_usable_size(const void* mem)
+{
+    if (mem != 0)
+    {
+        mchunkptr p = mem2chunk(mem);
+        if (p->is_inuse())
+            return p->chunksize() - p->overhead_for();
+    }
+    return 0;
+}
+
+SPP_API inline int mspace_mallopt(int param_number, int value)
+{
+    return mparams.change(param_number, value);
+}
+
+} // spp_ namespace
+
+
+#endif // SPP_EXCLUDE_IMPLEMENTATION
+
+#endif // spp_dlalloc__h_
diff --git a/benchmarks/others/sparsepp/spp_memory.h b/benchmarks/others/sparsepp/spp_memory.h
new file mode 100644
index 00000000..cfaa108d
--- /dev/null
+++ b/benchmarks/others/sparsepp/spp_memory.h
@@ -0,0 +1,190 @@
+#if !defined(spp_memory_h_guard)
+#define spp_memory_h_guard
+
+#include <cstdint>
+#include <cstring>
+#include <cstdlib>
+
+#if defined(_WIN32) || defined( __CYGWIN__)
+    #define SPP_WIN
+#endif
+
+#ifdef SPP_WIN
+    #include <windows.h>
+    #include <Psapi.h>
+    #undef min
+    #undef max
+#elif defined(__linux__)
+    #include <sys/types.h>
+    #include <sys/sysinfo.h>
+#elif defined(__FreeBSD__)
+    #include <paths.h>
+    #include <fcntl.h>
+    #include <kvm.h>
+    #include <unistd.h>
+    #include <sys/sysctl.h>
+    #include <sys/user.h>
+#endif
+
+namespace spp
+{
+    uint64_t GetSystemMemory()
+    {
+#ifdef SPP_WIN
+        MEMORYSTATUSEX memInfo;
+        memInfo.dwLength = sizeof(MEMORYSTATUSEX);
+        GlobalMemoryStatusEx(&memInfo);
+        return static_cast<uint64_t>(memInfo.ullTotalPageFile);
+#elif defined(__linux__)
+        struct sysinfo memInfo;
+        sysinfo (&memInfo);
+        auto totalVirtualMem = memInfo.totalram;
+
+        totalVirtualMem += memInfo.totalswap;
+        totalVirtualMem *= memInfo.mem_unit;
+        return static_cast<uint64_t>(totalVirtualMem);
+#elif defined(__FreeBSD__)
+        kvm_t *kd;
+        u_int pageCnt;
+        size_t pageCntLen = sizeof(pageCnt);
+        u_int pageSize;
+        struct kvm_swap kswap;
+        uint64_t totalVirtualMem;
+
+        pageSize = static_cast<u_int>(getpagesize());
+
+        sysctlbyname("vm.stats.vm.v_page_count", &pageCnt, &pageCntLen, NULL, 0);
+        totalVirtualMem = pageCnt * pageSize;
+
+        kd = kvm_open(NULL, _PATH_DEVNULL, NULL, O_RDONLY, "kvm_open");
+        kvm_getswapinfo(kd, &kswap, 1, 0);
+        kvm_close(kd);
+        totalVirtualMem += kswap.ksw_total * pageSize;
+
+        return totalVirtualMem;
+#else
+        return 0;
+#endif
+    }
+
+    uint64_t GetTotalMemoryUsed()
+    {
+#ifdef SPP_WIN
+        MEMORYSTATUSEX memInfo;
+        memInfo.dwLength = sizeof(MEMORYSTATUSEX);
+        GlobalMemoryStatusEx(&memInfo);
+        return static_cast<uint64_t>(memInfo.ullTotalPageFile - memInfo.ullAvailPageFile);
+#elif defined(__linux__)
+        struct sysinfo memInfo;
+        sysinfo(&memInfo);
+        auto virtualMemUsed = memInfo.totalram - memInfo.freeram;
+
+        virtualMemUsed += memInfo.totalswap - memInfo.freeswap;
+        virtualMemUsed *= memInfo.mem_unit;
+
+        return static_cast<uint64_t>(virtualMemUsed);
+#elif defined(__FreeBSD__)
+        kvm_t *kd;
+        u_int pageSize;
+        u_int pageCnt, freeCnt;
+        size_t pageCntLen = sizeof(pageCnt);
+        size_t freeCntLen = sizeof(freeCnt);
+        struct kvm_swap kswap;
+        uint64_t virtualMemUsed;
+
+        pageSize = static_cast<u_int>(getpagesize());
+
+        sysctlbyname("vm.stats.vm.v_page_count", &pageCnt, &pageCntLen, NULL, 0);
+        sysctlbyname("vm.stats.vm.v_free_count", &freeCnt, &freeCntLen, NULL, 0);
+        virtualMemUsed = (pageCnt - freeCnt) * pageSize;
+
+        kd = kvm_open(NULL, _PATH_DEVNULL, NULL, O_RDONLY, "kvm_open");
+        kvm_getswapinfo(kd, &kswap, 1, 0);
+        kvm_close(kd);
+        virtualMemUsed += kswap.ksw_used * pageSize;
+
+        return virtualMemUsed;
+#else
+        return 0;
+#endif
+    }
+
+    uint64_t GetProcessMemoryUsed()
+    {
+#ifdef SPP_WIN
+        PROCESS_MEMORY_COUNTERS_EX pmc;
+        GetProcessMemoryInfo(GetCurrentProcess(), reinterpret_cast<PPROCESS_MEMORY_COUNTERS>(&pmc), sizeof(pmc));
+        return static_cast<uint64_t>(pmc.PrivateUsage);
+#elif defined(__linux__)
+        auto parseLine = 
+            [](char* line)->int
+            {
+                auto i = strlen(line);
+				
+                while(*line < '0' || *line > '9') 
+                {
+                    line++;
+                }
+
+                line[i-3] = '\0';
+                i = atoi(line);
+                return i;
+            };
+
+        auto file = fopen("/proc/self/status", "r");
+        auto result = -1;
+        char line[128];
+
+        while(fgets(line, 128, file) != nullptr)
+        {
+            if(strncmp(line, "VmSize:", 7) == 0)
+            {
+                result = parseLine(line);
+                break;
+            }
+        }
+
+        fclose(file);
+        return static_cast<uint64_t>(result) * 1024;
+#elif defined(__FreeBSD__)
+        struct kinfo_proc info;
+        size_t infoLen = sizeof(info);
+        int mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, getpid() };
+
+        sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &infoLen, NULL, 0);
+        return static_cast<uint64_t>(info.ki_rssize * getpagesize());
+#else
+        return 0;
+#endif
+    }
+
+    uint64_t GetPhysicalMemory()
+    {
+#ifdef SPP_WIN
+        MEMORYSTATUSEX memInfo;
+        memInfo.dwLength = sizeof(MEMORYSTATUSEX);
+        GlobalMemoryStatusEx(&memInfo);
+        return static_cast<uint64_t>(memInfo.ullTotalPhys);
+#elif defined(__linux__)
+        struct sysinfo memInfo;
+        sysinfo(&memInfo);
+
+        auto totalPhysMem = memInfo.totalram;
+
+        totalPhysMem *= memInfo.mem_unit;
+        return static_cast<uint64_t>(totalPhysMem);
+#elif defined(__FreeBSD__)
+        u_long physMem;
+        size_t physMemLen = sizeof(physMem);
+        int mib[] = { CTL_HW, HW_PHYSMEM };
+
+        sysctl(mib, sizeof(mib) / sizeof(*mib), &physMem, &physMemLen, NULL, 0);
+        return physMem;
+#else
+        return 0;
+#endif
+    }
+
+}
+
+#endif // spp_memory_h_guard
diff --git a/benchmarks/others/sparsepp/spp_smartptr.h b/benchmarks/others/sparsepp/spp_smartptr.h
new file mode 100644
index 00000000..fba3acfb
--- /dev/null
+++ b/benchmarks/others/sparsepp/spp_smartptr.h
@@ -0,0 +1,71 @@
+#if !defined(spp_smartptr_h_guard)
+#define spp_smartptr_h_guard
+
+
+/* -----------------------------------------------------------------------------------------------
+ * quick version of intrusive_ptr
+ * -----------------------------------------------------------------------------------------------
+ */
+
+#include <cassert>
+#include "spp_config.h"
+
+// ------------------------------------------------------------------------
+class spp_rc
+{
+public:
+    spp_rc() : _cnt(0) {}
+    spp_rc(const spp_rc &) : _cnt(0) {}
+    void increment() const { ++_cnt; }
+    void decrement() const { assert(_cnt); if (--_cnt == 0) delete this; }
+    unsigned count() const { return _cnt; }
+
+protected:
+    virtual ~spp_rc() {}
+
+private:
+    mutable unsigned _cnt;
+};
+
+// ------------------------------------------------------------------------
+template <class T>
+class spp_sptr
+{
+public:
+    spp_sptr() : _p(0) {}
+    spp_sptr(T *p) : _p(p)                  { if (_p) _p->increment(); }
+    spp_sptr(const spp_sptr &o) : _p(o._p)  { if (_p) _p->increment(); }
+#ifndef SPP_NO_CXX11_RVALUE_REFERENCES 
+    spp_sptr(spp_sptr &&o) : _p(o._p)       { o._p = (T *)0; }
+    spp_sptr& operator=(spp_sptr &&o)       { this->swap(o); return *this; }
+#endif    
+    ~spp_sptr()                             { if (_p) _p->decrement(); }
+    spp_sptr& operator=(const spp_sptr &o)  { reset(o._p); return *this; }
+    T* get() const                          { return _p; }
+    void swap(spp_sptr &o)                  { T *tmp = _p; _p = o._p; o._p = tmp; }
+    void reset(const T *p = 0)             
+    { 
+        if (p == _p) 
+            return; 
+        if (_p) _p->decrement(); 
+        _p = (T *)p; 
+        if (_p) _p->increment();
+    }
+    T*   operator->() const { return const_cast<T *>(_p); }
+    bool operator!()  const { return _p == 0; }
+
+private:
+    T *_p;
+};    
+
+// ------------------------------------------------------------------------
+namespace std
+{
+    template <class T>
+    inline void swap(spp_sptr<T> &a, spp_sptr<T> &b)
+    {
+        a.swap(b);
+    }
+}
+
+#endif // spp_smartptr_h_guard
diff --git a/benchmarks/others/sparsepp/spp_stdint.h b/benchmarks/others/sparsepp/spp_stdint.h
new file mode 100644
index 00000000..3adced9c
--- /dev/null
+++ b/benchmarks/others/sparsepp/spp_stdint.h
@@ -0,0 +1,16 @@
+#if !defined(spp_stdint_h_guard)
+#define spp_stdint_h_guard
+
+#include "spp_config.h"
+
+#if defined(SPP_HAS_CSTDINT) && (__cplusplus >= 201103)
+    #include <cstdint>
+#else
+    #if defined(__FreeBSD__) || defined(__IBMCPP__) || defined(_AIX)
+        #include <inttypes.h>
+    #else
+        #include <stdint.h>
+    #endif
+#endif
+
+#endif // spp_stdint_h_guard
diff --git a/benchmarks/others/sparsepp/spp_timer.h b/benchmarks/others/sparsepp/spp_timer.h
new file mode 100644
index 00000000..48180f4d
--- /dev/null
+++ b/benchmarks/others/sparsepp/spp_timer.h
@@ -0,0 +1,58 @@
+/**
+   Copyright (c) 2016 Mariano Gonzalez
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in all
+   copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#ifndef spp_timer_h_guard
+#define spp_timer_h_guard
+
+#include <chrono>
+
+namespace spp
+{
+    template<typename time_unit = std::milli>
+    class Timer 
+    {
+    public:
+        Timer()                 { reset(); }
+        void reset()            { _start = _snap = clock::now();  }
+        void snap()             { _snap = clock::now();  }
+
+        float get_total() const { return get_diff<float>(_start, clock::now()); }
+        float get_delta() const { return get_diff<float>(_snap, clock::now());  }
+        
+    private:
+        using clock = std::chrono::high_resolution_clock;
+        using point = std::chrono::time_point<clock>;
+
+        template<typename T>
+        static T get_diff(const point& start, const point& end) 
+        {
+            using duration_t = std::chrono::duration<T, time_unit>;
+
+            return std::chrono::duration_cast<duration_t>(end - start).count();
+        }
+
+        point _start;
+        point _snap;
+    };
+}
+
+#endif // spp_timer_h_guard
diff --git a/benchmarks/others/sparsepp/spp_traits.h b/benchmarks/others/sparsepp/spp_traits.h
new file mode 100644
index 00000000..792f52f2
--- /dev/null
+++ b/benchmarks/others/sparsepp/spp_traits.h
@@ -0,0 +1,125 @@
+#if !defined(spp_traits_h_guard)
+#define spp_traits_h_guard
+
+#include "spp_config.h"
+
+template<int S, int H> class HashObject; // for Google's benchmark, not in spp namespace!
+
+namespace spp_
+{
+
+// ---------------------------------------------------------------------------
+//                       type_traits we need
+// ---------------------------------------------------------------------------
+template<class T, T v>
+struct integral_constant { static const T value = v; };
+
+template <class T, T v> const T integral_constant<T, v>::value;
+
+typedef integral_constant<bool, true>  true_type;
+typedef integral_constant<bool, false> false_type;
+
+typedef integral_constant<int, 0>      zero_type;
+typedef integral_constant<int, 1>      one_type;
+typedef integral_constant<int, 2>      two_type;
+typedef integral_constant<int, 3>      three_type;
+
+template<typename T, typename U> struct is_same : public false_type { };
+template<typename T> struct is_same<T, T> : public true_type { };
+
+template<typename T> struct remove_const { typedef T type; };
+template<typename T> struct remove_const<T const> { typedef T type; };
+
+template<typename T> struct remove_volatile { typedef T type; };
+template<typename T> struct remove_volatile<T volatile> { typedef T type; };
+
+template<typename T> struct remove_cv 
+{
+    typedef typename remove_const<typename remove_volatile<T>::type>::type type;
+};
+
+// ---------------- is_integral ----------------------------------------
+template <class T> struct is_integral;
+template <class T> struct is_integral         : false_type { };
+template<> struct is_integral<bool>           : true_type { };
+template<> struct is_integral<char>           : true_type { };
+template<> struct is_integral<unsigned char>  : true_type { };
+template<> struct is_integral<signed char>    : true_type { };
+template<> struct is_integral<short>          : true_type { };
+template<> struct is_integral<unsigned short> : true_type { };
+template<> struct is_integral<int>            : true_type { };
+template<> struct is_integral<unsigned int>   : true_type { };
+template<> struct is_integral<long>           : true_type { };
+template<> struct is_integral<unsigned long>  : true_type { };
+#ifdef SPP_HAS_LONG_LONG
+    template<> struct is_integral<long long>  : true_type { };
+    template<> struct is_integral<unsigned long long> : true_type { };
+#endif
+template <class T> struct is_integral<const T>          : is_integral<T> { };
+template <class T> struct is_integral<volatile T>       : is_integral<T> { };
+template <class T> struct is_integral<const volatile T> : is_integral<T> { };
+
+// ---------------- is_floating_point ----------------------------------------
+template <class T> struct is_floating_point;
+template <class T> struct is_floating_point      : false_type { };
+template<> struct is_floating_point<float>       : true_type { };
+template<> struct is_floating_point<double>      : true_type { };
+template<> struct is_floating_point<long double> : true_type { };
+template <class T> struct is_floating_point<const T> :        is_floating_point<T> { };
+template <class T> struct is_floating_point<volatile T>       : is_floating_point<T> { };
+template <class T> struct is_floating_point<const volatile T> : is_floating_point<T> { };
+
+//  ---------------- is_pointer ----------------------------------------
+template <class T> struct is_pointer;
+template <class T> struct is_pointer     : false_type { };
+template <class T> struct is_pointer<T*> : true_type { };
+template <class T> struct is_pointer<const T>          : is_pointer<T> { };
+template <class T> struct is_pointer<volatile T>       : is_pointer<T> { };
+template <class T> struct is_pointer<const volatile T> : is_pointer<T> { };
+
+//  ---------------- is_reference ----------------------------------------
+template <class T> struct is_reference;
+template<typename T> struct is_reference     : false_type {};
+template<typename T> struct is_reference<T&> : true_type {};
+
+//  ---------------- is_relocatable ----------------------------------------
+// relocatable values can be moved around in memory using memcpy and remain
+// correct. Most types are relocatable, an example of a type who is not would
+// be a struct which contains a pointer to a buffer inside itself - this is the
+// case for std::string in gcc 5.
+// ------------------------------------------------------------------------
+template <class T> struct is_relocatable;
+template <class T> struct is_relocatable :
+     integral_constant<bool, (is_integral<T>::value ||
+                              is_floating_point<T>::value ||
+                              is_pointer<T>::value
+                             )>
+{ };
+
+template<int S, int H> struct is_relocatable<HashObject<S, H> > : true_type { };
+
+template <class T> struct is_relocatable<const T>          : is_relocatable<T> { };
+template <class T> struct is_relocatable<volatile T>       : is_relocatable<T> { };
+template <class T> struct is_relocatable<const volatile T> : is_relocatable<T> { };
+template <class A, int N> struct is_relocatable<A[N]>      : is_relocatable<A> { };
+template <class T, class U> struct is_relocatable<std::pair<T, U> > :
+     integral_constant<bool, (is_relocatable<T>::value && is_relocatable<U>::value)>
+{ };
+
+// A template helper used to select A or B based on a condition.
+// ------------------------------------------------------------
+template<bool cond, typename A, typename B>
+struct if_
+{
+    typedef A type;
+};
+
+template<typename A, typename B>
+struct if_<false, A, B> 
+{
+    typedef B type;
+};
+
+}  // spp_ namespace
+
+#endif // spp_traits_h_guard
diff --git a/benchmarks/others/sparsepp/spp_utils.h b/benchmarks/others/sparsepp/spp_utils.h
new file mode 100644
index 00000000..4f2e9257
--- /dev/null
+++ b/benchmarks/others/sparsepp/spp_utils.h
@@ -0,0 +1,477 @@
+// ----------------------------------------------------------------------
+// Copyright (c) 2016, Steven Gregory Popovitch - [email protected]
+// All rights reserved.
+//
+// Code derived derived from Boost libraries.
+// Boost software licence reproduced below.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * The name of Steven Gregory Popovitch may not be used to
+// endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ----------------------------------------------------------------------
+
+// ---------------------------------------------------------------------------
+// Boost Software License - Version 1.0 - August 17th, 2003
+//
+// Permission is hereby granted, free of charge, to any person or organization
+// obtaining a copy of the software and accompanying documentation covered by
+// this license (the "Software") to use, reproduce, display, distribute,
+// execute, and transmit the Software, and to prepare derivative works of the
+// Software, and to permit third-parties to whom the Software is furnished to
+// do so, all subject to the following:
+//
+// The copyright notices in the Software and this entire statement, including
+// the above license grant, this restriction and the following disclaimer,
+// must be included in all copies of the Software, in whole or in part, and
+// all derivative works of the Software, unless such copies or derivative
+// works are solely in the form of machine-executable object code generated by
+// a source language processor.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+// ---------------------------------------------------------------------------
+
+//  ----------------------------------------------------------------------
+//                  H A S H    F U N C T I O N S
+//                  ----------------------------
+//
+//    Implements spp::spp_hash() and spp::hash_combine()
+//  ----------------------------------------------------------------------
+
+#if !defined(spp_utils_h_guard_)
+#define spp_utils_h_guard_
+
+#if defined(_MSC_VER)
+    #if (_MSC_VER >= 1600 )                      // vs2010 (1900 is vs2015)
+        #include <functional>
+        #define SPP_HASH_CLASS std::hash
+    #else
+        #include  <hash_map>
+        #define SPP_HASH_CLASS stdext::hash_compare
+    #endif
+    #if (_MSC_FULL_VER < 190021730)
+        #define SPP_NO_CXX11_NOEXCEPT
+    #endif
+#elif defined __clang__
+    #if __has_feature(cxx_noexcept) || defined(SPP_CXX11) // define SPP_CXX11 if your compiler has <functional>
+       #include <functional>
+       #define SPP_HASH_CLASS  std::hash
+    #else
+       #include <tr1/functional>
+       #define SPP_HASH_CLASS std::tr1::hash
+    #endif
+
+    #if !__has_feature(cxx_noexcept)
+        #define SPP_NO_CXX11_NOEXCEPT
+    #endif
+#elif defined(__GNUC__)
+    #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L)
+        #include <functional>
+        #define SPP_HASH_CLASS std::hash
+
+        #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) < 40600
+            #define SPP_NO_CXX11_NOEXCEPT
+        #endif
+    #else
+        #include <tr1/unordered_map>
+        #define SPP_HASH_CLASS std::tr1::hash
+        #define SPP_NO_CXX11_NOEXCEPT
+    #endif
+#else
+    #include <functional>
+    #define SPP_HASH_CLASS  std::hash
+#endif
+
+#ifdef SPP_NO_CXX11_NOEXCEPT
+    #define SPP_NOEXCEPT
+#else
+    #define SPP_NOEXCEPT noexcept
+#endif
+
+#ifdef SPP_NO_CXX11_CONSTEXPR
+    #define SPP_CONSTEXPR
+#else
+    #define SPP_CONSTEXPR constexpr
+#endif
+
+#ifdef SPP_NO_CXX14_CONSTEXPR
+    #define SPP_CXX14_CONSTEXPR
+#else
+    #define SPP_CXX14_CONSTEXPR constexpr
+#endif
+
+#define SPP_INLINE
+
+#ifndef spp_
+    #define spp_ spp
+#endif
+
+namespace spp_
+{
+
+template <class T>  T spp_min(T a, T b) { return a < b  ? a : b; }
+template <class T>  T spp_max(T a, T b) { return a >= b ? a : b; }
+
+template <class T>
+struct spp_hash
+{
+    SPP_INLINE size_t operator()(const T &__v) const SPP_NOEXCEPT
+    {
+        SPP_HASH_CLASS<T> hasher;
+        return hasher(__v);
+    }
+};
+
+template <class T>
+struct spp_hash<T *>
+{
+    static size_t spp_log2 (size_t val) SPP_NOEXCEPT
+    {
+        size_t res = 0;
+        while (val > 1)
+        {
+            val >>= 1;
+            res++;
+        }
+        return res;
+    }
+
+    SPP_INLINE size_t operator()(const T *__v) const SPP_NOEXCEPT
+    {
+        static const size_t shift = 3; // spp_log2(1 + sizeof(T)); // T might be incomplete!
+        const uintptr_t i = (const uintptr_t)__v;
+        return static_cast<size_t>(i >> shift);
+    }
+};
+
+// from http://burtleburtle.net/bob/hash/integer.html
+// fast and efficient for power of two table sizes where we always
+// consider the last bits.
+// ---------------------------------------------------------------
+inline size_t spp_mix_32(uint32_t a)
+{
+    a = a ^ (a >> 4);
+    a = (a ^ 0xdeadbeef) + (a << 5);
+    a = a ^ (a >> 11);
+    return static_cast<size_t>(a);
+}
+
+// More thorough scrambling as described in
+// https://gist.github.com/badboy/6267743
+// ----------------------------------------
+inline size_t spp_mix_64(uint64_t a)
+{
+    a = (~a) + (a << 21); // a = (a << 21) - a - 1;
+    a = a ^ (a >> 24);
+    a = (a + (a << 3)) + (a << 8); // a * 265
+    a = a ^ (a >> 14);
+    a = (a + (a << 2)) + (a << 4); // a * 21
+    a = a ^ (a >> 28);
+    a = a + (a << 31);
+    return static_cast<size_t>(a);
+}
+
+template<class ArgumentType, class ResultType>
+struct spp_unary_function
+{
+    typedef ArgumentType argument_type;
+    typedef ResultType result_type;
+};
+
+template <>
+struct spp_hash<bool> : public spp_unary_function<bool, size_t>
+{
+    SPP_INLINE size_t operator()(bool __v) const SPP_NOEXCEPT
+    { return static_cast<size_t>(__v); }
+};
+
+template <>
+struct spp_hash<char> : public spp_unary_function<char, size_t>
+{
+    SPP_INLINE size_t operator()(char __v) const SPP_NOEXCEPT
+    { return static_cast<size_t>(__v); }
+};
+
+template <>
+struct spp_hash<signed char> : public spp_unary_function<signed char, size_t>
+{
+    SPP_INLINE size_t operator()(signed char __v) const SPP_NOEXCEPT
+    { return static_cast<size_t>(__v); }
+};
+
+template <>
+struct spp_hash<unsigned char> : public spp_unary_function<unsigned char, size_t>
+{
+    SPP_INLINE size_t operator()(unsigned char __v) const SPP_NOEXCEPT
+    { return static_cast<size_t>(__v); }
+};
+
+template <>
+struct spp_hash<wchar_t> : public spp_unary_function<wchar_t, size_t>
+{
+    SPP_INLINE size_t operator()(wchar_t __v) const SPP_NOEXCEPT
+    { return static_cast<size_t>(__v); }
+};
+
+template <>
+struct spp_hash<int16_t> : public spp_unary_function<int16_t, size_t>
+{
+    SPP_INLINE size_t operator()(int16_t __v) const SPP_NOEXCEPT
+    { return spp_mix_32(static_cast<uint32_t>(__v)); }
+};
+
+template <>
+struct spp_hash<uint16_t> : public spp_unary_function<uint16_t, size_t>
+{
+    SPP_INLINE size_t operator()(uint16_t __v) const SPP_NOEXCEPT
+    { return spp_mix_32(static_cast<uint32_t>(__v)); }
+};
+
+template <>
+struct spp_hash<int32_t> : public spp_unary_function<int32_t, size_t>
+{
+    SPP_INLINE size_t operator()(int32_t __v) const SPP_NOEXCEPT
+    { return spp_mix_32(static_cast<uint32_t>(__v)); }
+};
+
+template <>
+struct spp_hash<uint32_t> : public spp_unary_function<uint32_t, size_t>
+{
+    SPP_INLINE size_t operator()(uint32_t __v) const SPP_NOEXCEPT
+    { return spp_mix_32(static_cast<uint32_t>(__v)); }
+};
+
+template <>
+struct spp_hash<int64_t> : public spp_unary_function<int64_t, size_t>
+{
+    SPP_INLINE size_t operator()(int64_t __v) const SPP_NOEXCEPT
+    { return spp_mix_64(static_cast<uint64_t>(__v)); }
+};
+
+template <>
+struct spp_hash<uint64_t> : public spp_unary_function<uint64_t, size_t>
+{
+    SPP_INLINE size_t operator()(uint64_t __v) const SPP_NOEXCEPT
+    { return spp_mix_64(static_cast<uint64_t>(__v)); }
+};
+
+template <>
+struct spp_hash<float> : public spp_unary_function<float, size_t>
+{
+    SPP_INLINE size_t operator()(float __v) const SPP_NOEXCEPT
+    {
+        // -0.0 and 0.0 should return same hash
+        uint32_t *as_int = reinterpret_cast<uint32_t *>(&__v);
+        return (__v == 0) ? static_cast<size_t>(0) : spp_mix_32(*as_int);
+    }
+};
+
+template <>
+struct spp_hash<double> : public spp_unary_function<double, size_t>
+{
+    SPP_INLINE size_t operator()(double __v) const SPP_NOEXCEPT
+    {
+        // -0.0 and 0.0 should return same hash
+        uint64_t *as_int = reinterpret_cast<uint64_t *>(&__v);
+        return (__v == 0) ? static_cast<size_t>(0) : spp_mix_64(*as_int);
+    }
+};
+
+template <class T, int sz> struct Combiner
+{
+    inline void operator()(T& seed, T value);
+};
+
+template <class T> struct Combiner<T, 4>
+{
+    inline void  operator()(T& seed, T value)
+    {
+        seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+    }
+};
+
+template <class T> struct Combiner<T, 8>
+{
+    inline void  operator()(T& seed, T value)
+    {
+        seed ^= value + T(0xc6a4a7935bd1e995) + (seed << 6) + (seed >> 2);
+    }
+};
+
+template <class T>
+inline void hash_combine(std::size_t& seed, T const& v)
+{
+    spp_::spp_hash<T> hasher;
+    Combiner<std::size_t, sizeof(std::size_t)> combiner;
+
+    combiner(seed, hasher(v));
+}
+
+static inline uint32_t s_spp_popcount_default(uint32_t i) SPP_NOEXCEPT
+{
+    i = i - ((i >> 1) & 0x55555555);
+    i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
+    return (((i + (i >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
+}
+
+static inline uint32_t s_spp_popcount_default(uint64_t x) SPP_NOEXCEPT
+{
+    const uint64_t m1  = uint64_t(0x5555555555555555); // binary: 0101...
+    const uint64_t m2  = uint64_t(0x3333333333333333); // binary: 00110011..
+    const uint64_t m4  = uint64_t(0x0f0f0f0f0f0f0f0f); // binary:  4 zeros,  4 ones ...
+    const uint64_t h01 = uint64_t(0x0101010101010101); // the sum of 256 to the power of 0,1,2,3...
+
+    x -= (x >> 1) & m1;             // put count of each 2 bits into those 2 bits
+    x = (x & m2) + ((x >> 2) & m2); // put count of each 4 bits into those 4 bits 
+    x = (x + (x >> 4)) & m4;        // put count of each 8 bits into those 8 bits 
+    return (x * h01)>>56;           // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24)+...
+}
+
+#ifdef __APPLE__
+    static inline uint32_t count_trailing_zeroes(size_t v) SPP_NOEXCEPT
+    {
+        size_t x = (v & -v) - 1;
+        // sadly sizeof() required to build on macos 
+        return sizeof(size_t) == 8 ? s_spp_popcount_default((uint64_t)x) : s_spp_popcount_default((uint32_t)x);
+    }
+
+    static inline uint32_t s_popcount(size_t v) SPP_NOEXCEPT
+    {
+        // sadly sizeof() required to build on macos 
+        return sizeof(size_t) == 8 ? s_spp_popcount_default((uint64_t)v) : s_spp_popcount_default((uint32_t)v);
+    }
+#else
+    static inline uint32_t count_trailing_zeroes(size_t v) SPP_NOEXCEPT
+    {
+        return s_spp_popcount_default((v & -(intptr_t)v) - 1);
+    }
+
+    static inline uint32_t s_popcount(size_t v) SPP_NOEXCEPT
+    {
+        return s_spp_popcount_default(v);
+    }
+#endif
+
+// -----------------------------------------------------------
+// -----------------------------------------------------------
+template<class T>
+class libc_allocator
+{
+public:
+    typedef T         value_type;
+    typedef T*        pointer;
+    typedef ptrdiff_t difference_type;
+    typedef const T*  const_pointer;
+    typedef size_t    size_type;
+
+    libc_allocator() {}
+    libc_allocator(const libc_allocator&) {}
+
+    template<class U>
+    libc_allocator(const libc_allocator<U> &) {}
+
+    libc_allocator& operator=(const libc_allocator &) { return *this; }
+
+    template<class U>
+    libc_allocator& operator=(const libc_allocator<U> &) { return *this; }
+
+#ifndef SPP_NO_CXX11_RVALUE_REFERENCES    
+    libc_allocator(libc_allocator &&) {}
+    libc_allocator& operator=(libc_allocator &&) { return *this; }
+#endif
+
+    pointer allocate(size_t n, const_pointer  /* unused */= 0) 
+    {
+        pointer res = static_cast<pointer>(malloc(n * sizeof(T)));
+        if (!res)
+            throw std::bad_alloc();
+        return res;
+    }
+
+    void deallocate(pointer p, size_t /* unused */) 
+    {
+        free(p);
+    }
+
+    pointer reallocate(pointer p, size_t new_size) 
+    {
+        pointer res = static_cast<pointer>(realloc(p, new_size * sizeof(T)));
+        if (!res)
+            throw std::bad_alloc();
+        return res;
+    }
+
+    // extra API to match spp_allocator interface
+    pointer reallocate(pointer p, size_t /* old_size */, size_t new_size) 
+    {
+        return static_cast<pointer>(realloc(p, new_size * sizeof(T)));
+    }
+
+    size_type max_size() const
+    {
+        return static_cast<size_type>(-1) / sizeof(value_type);
+    }
+
+    void construct(pointer p, const value_type& val)
+    {
+        new(p) value_type(val);
+    }
+
+    void destroy(pointer p) { p->~value_type(); }
+
+    template<class U>
+    struct rebind
+    {
+        typedef spp_::libc_allocator<U> other;
+    };
+
+};
+
+// forward declaration
+// -------------------
+template<class T>
+class spp_allocator;
+
+}
+
+template<class T>
+inline bool operator==(const spp_::libc_allocator<T> &, const spp_::libc_allocator<T> &)
+{
+    return true;
+}
+
+template<class T>
+inline bool operator!=(const spp_::libc_allocator<T> &, const spp_::libc_allocator<T> &)
+{
+    return false;
+}
+
+#endif // spp_utils_h_guard_
+
author	Tyge Løvset <[email protected]>	2020-12-27 16:03:58 +0100
committer	Tyge Løvset <[email protected]>	2020-12-27 16:03:58 +0100
commit	83b7be31a1d0fc0be4e013dbfc97bb6cdc3600db (patch)
tree	df69b4e6a7a85b5ed8c8bbd6d1baf52794b44966 /benchmarks
parent	5a444c90db6372749cbdc629ec999871cd20af72 (diff)
download	STC-modified-83b7be31a1d0fc0be4e013dbfc97bb6cdc3600db.tar.gz STC-modified-83b7be31a1d0fc0be4e013dbfc97bb6cdc3600db.zip