diff options
| author | _Tradam <[email protected]> | 2023-09-08 01:29:47 +0000 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-09-08 01:29:47 +0000 |
| commit | 3c76c7f3d5db3f9586a90d03f8fbb02d79de9acd (patch) | |
| tree | afbe4b540967223911f7c5de36559b82154f02f3 /misc/benchmarks | |
| parent | 0841165881871ee01b782129be681209aeed2423 (diff) | |
| parent | 1a72205fe05c2375cfd380dd8381a8460d9ed8d1 (diff) | |
| download | STC-modified-modified.tar.gz STC-modified-modified.zip | |
Diffstat (limited to 'misc/benchmarks')
21 files changed, 638 insertions, 284 deletions
diff --git a/misc/benchmarks/external/ankerl/unordered_dense.h b/misc/benchmarks/external/ankerl/unordered_dense.h index faad051d..b8cacea7 100644 --- a/misc/benchmarks/external/ankerl/unordered_dense.h +++ b/misc/benchmarks/external/ankerl/unordered_dense.h @@ -1,7 +1,7 @@ ///////////////////////// ankerl::unordered_dense::{map, set} ///////////////////////// // A fast & densely stored hashmap and hashset based on robin-hood backward shift deletion. -// Version 3.1.0 +// Version 4.0.1 // https://github.com/martinus/unordered_dense // // Licensed under the MIT License <http://opensource.org/licenses/MIT>. @@ -30,12 +30,15 @@ #define ANKERL_UNORDERED_DENSE_H // see https://semver.org/spec/v2.0.0.html -#define ANKERL_UNORDERED_DENSE_VERSION_MAJOR 3 // NOLINT(cppcoreguidelines-macro-usage) incompatible API changes -#define ANKERL_UNORDERED_DENSE_VERSION_MINOR 1 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible functionality -#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 0 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible bug fixes +#define ANKERL_UNORDERED_DENSE_VERSION_MAJOR 4 // NOLINT(cppcoreguidelines-macro-usage) incompatible API changes +#define ANKERL_UNORDERED_DENSE_VERSION_MINOR 0 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible functionality +#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 1 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible bug fixes // API versioning with inline namespace, see https://www.foonathan.net/2018/11/inline-namespaces/ + +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define ANKERL_UNORDERED_DENSE_VERSION_CONCAT1(major, minor, patch) v##major##_##minor##_##patch +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define ANKERL_UNORDERED_DENSE_VERSION_CONCAT(major, minor, patch) ANKERL_UNORDERED_DENSE_VERSION_CONCAT1(major, minor, patch) #define ANKERL_UNORDERED_DENSE_NAMESPACE \ ANKERL_UNORDERED_DENSE_VERSION_CONCAT( \ @@ -57,9 +60,9 @@ // exceptions #if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND) -# define ANKERL_UNORDERED_DENSE_HAS_EXCEPTIONS() 1 +# define ANKERL_UNORDERED_DENSE_HAS_EXCEPTIONS() 1 // NOLINT(cppcoreguidelines-macro-usage) #else -# define ANKERL_UNORDERED_DENSE_HAS_EXCEPTIONS() 0 +# define ANKERL_UNORDERED_DENSE_HAS_EXCEPTIONS() 0 // NOLINT(cppcoreguidelines-macro-usage) #endif #ifdef _MSC_VER # define ANKERL_UNORDERED_DENSE_NOINLINE __declspec(noinline) @@ -89,20 +92,13 @@ # include <cstdlib> // for abort # endif -# define ANKERL_UNORDERED_DENSE_PMR 0 // NOLINT(cppcoreguidelines-macro-usage) # if defined(__has_include) # if __has_include(<memory_resource>) -# undef ANKERL_UNORDERED_DENSE_PMR -# define ANKERL_UNORDERED_DENSE_PMR 1 // NOLINT(cppcoreguidelines-macro-usage) -# define ANKERL_UNORDERED_DENSE_PMR_ALLOCATOR \ - std::pmr::polymorphic_allocator // NOLINT(cppcoreguidelines-macro-usage) -# include <memory_resource> // for polymorphic_allocator +# define ANKERL_UNORDERED_DENSE_PMR std::pmr // NOLINT(cppcoreguidelines-macro-usage) +# include <memory_resource> // for polymorphic_allocator # elif __has_include(<experimental/memory_resource>) -# undef ANKERL_UNORDERED_DENSE_PMR -# define ANKERL_UNORDERED_DENSE_PMR 1 // NOLINT(cppcoreguidelines-macro-usage) -# define ANKERL_UNORDERED_DENSE_PMR_ALLOCATOR \ - std::experimental::pmr::polymorphic_allocator // NOLINT(cppcoreguidelines-macro-usage) -# include <experimental/memory_resource> // for polymorphic_allocator +# define ANKERL_UNORDERED_DENSE_PMR std::experimental::pmr // NOLINT(cppcoreguidelines-macro-usage) +# include <experimental/memory_resource> // for polymorphic_allocator # endif # endif @@ -428,7 +424,7 @@ constexpr bool is_map_v = !std::is_void_v<Mapped>; // clang-format off template <typename Hash, typename KeyEqual> -constexpr bool is_transparent_v = is_detected_v<detect_is_transparent, Hash>&& is_detected_v<detect_is_transparent, KeyEqual>; +constexpr bool is_transparent_v = is_detected_v<detect_is_transparent, Hash> && is_detected_v<detect_is_transparent, KeyEqual>; // clang-format on template <typename From, typename To1, typename To2> @@ -446,19 +442,320 @@ struct base_table_type_map { // base type for set doesn't have mapped_type struct base_table_type_set {}; +} // namespace detail + +// Very much like std::deque, but faster for indexing (in most cases). As of now this doesn't implement the full std::vector +// API, but merely what's necessary to work as an underlying container for ankerl::unordered_dense::{map, set}. +// It allocates blocks of equal size and puts them into the m_blocks vector. That means it can grow simply by adding a new +// block to the back of m_blocks, and doesn't double its size like an std::vector. The disadvantage is that memory is not +// linear and thus there is one more indirection necessary for indexing. +template <typename T, typename Allocator = std::allocator<T>, size_t MaxSegmentSizeBytes = 4096> +class segmented_vector { + template <bool IsConst> + class iter_t; + +public: + using allocator_type = Allocator; + using pointer = typename std::allocator_traits<allocator_type>::pointer; + using const_pointer = typename std::allocator_traits<allocator_type>::const_pointer; + using difference_type = typename std::allocator_traits<allocator_type>::difference_type; + using value_type = T; + using size_type = std::size_t; + using reference = T&; + using const_reference = T const&; + using iterator = iter_t<false>; + using const_iterator = iter_t<true>; + +private: + using vec_alloc = typename std::allocator_traits<Allocator>::template rebind_alloc<pointer>; + std::vector<pointer, vec_alloc> m_blocks{}; + size_t m_size{}; + + // Calculates the maximum number for x in (s << x) <= max_val + static constexpr auto num_bits_closest(size_t max_val, size_t s) -> size_t { + auto f = size_t{0}; + while (s << (f + 1) <= max_val) { + ++f; + } + return f; + } + + using self_t = segmented_vector<T, Allocator, MaxSegmentSizeBytes>; + static constexpr auto num_bits = num_bits_closest(MaxSegmentSizeBytes, sizeof(T)); + static constexpr auto num_elements_in_block = 1U << num_bits; + static constexpr auto mask = num_elements_in_block - 1U; + + /** + * Iterator class doubles as const_iterator and iterator + */ + template <bool IsConst> + class iter_t { + using ptr_t = typename std::conditional_t<IsConst, segmented_vector::const_pointer const*, segmented_vector::pointer*>; + ptr_t m_data{}; + size_t m_idx{}; + + template <bool B> + friend class iter_t; + + public: + using difference_type = segmented_vector::difference_type; + using value_type = T; + using reference = typename std::conditional_t<IsConst, value_type const&, value_type&>; + using pointer = typename std::conditional_t<IsConst, segmented_vector::const_pointer, segmented_vector::pointer>; + using iterator_category = std::forward_iterator_tag; + + iter_t() noexcept = default; + + template <bool OtherIsConst, typename = typename std::enable_if<IsConst && !OtherIsConst>::type> + // NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions) + constexpr iter_t(iter_t<OtherIsConst> const& other) noexcept + : m_data(other.m_data) + , m_idx(other.m_idx) {} + + constexpr iter_t(ptr_t data, size_t idx) noexcept + : m_data(data) + , m_idx(idx) {} + + template <bool OtherIsConst, typename = typename std::enable_if<IsConst && !OtherIsConst>::type> + constexpr auto operator=(iter_t<OtherIsConst> const& other) noexcept -> iter_t& { + m_data = other.m_data; + m_idx = other.m_idx; + return *this; + } + + constexpr auto operator++() noexcept -> iter_t& { + ++m_idx; + return *this; + } + + constexpr auto operator+(difference_type diff) noexcept -> iter_t { + return {m_data, static_cast<size_t>(static_cast<difference_type>(m_idx) + diff)}; + } + + template <bool OtherIsConst> + constexpr auto operator-(iter_t<OtherIsConst> const& other) noexcept -> difference_type { + return static_cast<difference_type>(m_idx) - static_cast<difference_type>(other.m_idx); + } + + constexpr auto operator*() const noexcept -> reference { + return m_data[m_idx >> num_bits][m_idx & mask]; + } + + constexpr auto operator->() const noexcept -> pointer { + return &m_data[m_idx >> num_bits][m_idx & mask]; + } + + template <bool O> + constexpr auto operator==(iter_t<O> const& o) const noexcept -> bool { + return m_idx == o.m_idx; + } + + template <bool O> + constexpr auto operator!=(iter_t<O> const& o) const noexcept -> bool { + return !(*this == o); + } + }; + + // slow path: need to allocate a new segment every once in a while + void increase_capacity() { + auto ba = Allocator(m_blocks.get_allocator()); + pointer block = std::allocator_traits<Allocator>::allocate(ba, num_elements_in_block); + m_blocks.push_back(block); + } + + // Moves everything from other + void append_everything_from(segmented_vector&& other) { + reserve(size() + other.size()); + for (auto&& o : other) { + emplace_back(std::move(o)); + } + } + + // Copies everything from other + void append_everything_from(segmented_vector const& other) { + reserve(size() + other.size()); + for (auto const& o : other) { + emplace_back(o); + } + } + + void dealloc() { + auto ba = Allocator(m_blocks.get_allocator()); + for (auto ptr : m_blocks) { + std::allocator_traits<Allocator>::deallocate(ba, ptr, num_elements_in_block); + } + } + + [[nodiscard]] static constexpr auto calc_num_blocks_for_capacity(size_t capacity) { + return (capacity + num_elements_in_block - 1U) / num_elements_in_block; + } + +public: + segmented_vector() = default; + + // NOLINTNEXTLINE(google-explicit-constructor,hicpp-explicit-conversions) + segmented_vector(Allocator alloc) + : m_blocks(vec_alloc(alloc)) {} + + segmented_vector(segmented_vector&& other, Allocator alloc) + : m_blocks(vec_alloc(alloc)) { + if (other.get_allocator() == alloc) { + *this = std::move(other); + } else { + // Oh my, allocator is different so we need to copy everything. + append_everything_from(std::move(other)); + } + } + + segmented_vector(segmented_vector&& other) noexcept + : m_blocks(std::move(other.m_blocks)) + , m_size(std::exchange(other.m_size, {})) {} + + segmented_vector(segmented_vector const& other, Allocator alloc) + : m_blocks(vec_alloc(alloc)) { + append_everything_from(other); + } + + segmented_vector(segmented_vector const& other) { + append_everything_from(other); + } + + auto operator=(segmented_vector const& other) -> segmented_vector& { + if (this == &other) { + return *this; + } + clear(); + append_everything_from(other); + return *this; + } + + auto operator=(segmented_vector&& other) noexcept -> segmented_vector& { + clear(); + dealloc(); + m_blocks = std::move(other.m_blocks); + m_size = std::exchange(other.m_size, {}); + return *this; + } + + ~segmented_vector() { + clear(); + dealloc(); + } + + [[nodiscard]] constexpr auto size() const -> size_t { + return m_size; + } + + [[nodiscard]] constexpr auto capacity() const -> size_t { + return m_blocks.size() * num_elements_in_block; + } + + // Indexing is highly performance critical + [[nodiscard]] constexpr auto operator[](size_t i) const noexcept -> T const& { + return m_blocks[i >> num_bits][i & mask]; + } + + [[nodiscard]] constexpr auto operator[](size_t i) noexcept -> T& { + return m_blocks[i >> num_bits][i & mask]; + } + + [[nodiscard]] constexpr auto begin() -> iterator { + return {m_blocks.data(), 0U}; + } + [[nodiscard]] constexpr auto begin() const -> const_iterator { + return {m_blocks.data(), 0U}; + } + [[nodiscard]] constexpr auto cbegin() const -> const_iterator { + return {m_blocks.data(), 0U}; + } + + [[nodiscard]] constexpr auto end() -> iterator { + return {m_blocks.data(), m_size}; + } + [[nodiscard]] constexpr auto end() const -> const_iterator { + return {m_blocks.data(), m_size}; + } + [[nodiscard]] constexpr auto cend() const -> const_iterator { + return {m_blocks.data(), m_size}; + } + + [[nodiscard]] constexpr auto back() -> reference { + return operator[](m_size - 1); + } + [[nodiscard]] constexpr auto back() const -> const_reference { + return operator[](m_size - 1); + } + + void pop_back() { + back().~T(); + --m_size; + } + + [[nodiscard]] auto empty() const { + return 0 == m_size; + } + + void reserve(size_t new_capacity) { + m_blocks.reserve(calc_num_blocks_for_capacity(new_capacity)); + while (new_capacity > capacity()) { + increase_capacity(); + } + } + + [[nodiscard]] auto get_allocator() const -> allocator_type { + return allocator_type{m_blocks.get_allocator()}; + } + + template <class... Args> + auto emplace_back(Args&&... args) -> reference { + if (m_size == capacity()) { + increase_capacity(); + } + auto* ptr = static_cast<void*>(&operator[](m_size)); + auto& ref = *new (ptr) T(std::forward<Args>(args)...); + ++m_size; + return ref; + } + + void clear() { + if constexpr (!std::is_trivially_destructible_v<T>) { + for (size_t i = 0, s = size(); i < s; ++i) { + operator[](i).~T(); + } + } + m_size = 0; + } + + void shrink_to_fit() { + auto ba = Allocator(m_blocks.get_allocator()); + auto num_blocks_required = calc_num_blocks_for_capacity(m_size); + while (m_blocks.size() > num_blocks_required) { + std::allocator_traits<Allocator>::deallocate(ba, m_blocks.back(), num_elements_in_block); + m_blocks.pop_back(); + } + m_blocks.shrink_to_fit(); + } +}; + +namespace detail { + // This is it, the table. Doubles as map and set, and uses `void` for T when its used as a set. template <class Key, class T, // when void, treat it as a set. class Hash, class KeyEqual, class AllocatorOrContainer, - class Bucket> + class Bucket, + bool IsSegmented> class table : public std::conditional_t<is_map_v<T>, base_table_type_map<T>, base_table_type_set> { + using underlying_value_type = typename std::conditional_t<is_map_v<T>, std::pair<Key, T>, Key>; + using underlying_container_type = std::conditional_t<IsSegmented, + segmented_vector<underlying_value_type, AllocatorOrContainer>, + std::vector<underlying_value_type, AllocatorOrContainer>>; + public: - using value_container_type = std::conditional_t< - is_detected_v<detect_iterator, AllocatorOrContainer>, - AllocatorOrContainer, - typename std::vector<typename std::conditional_t<is_map_v<T>, std::pair<Key, T>, Key>, AllocatorOrContainer>>; + using value_container_type = std:: + conditional_t<is_detected_v<detect_iterator, AllocatorOrContainer>, AllocatorOrContainer, underlying_container_type>; private: using bucket_alloc = @@ -492,7 +789,8 @@ private: static_assert(std::is_trivially_copyable_v<Bucket>, "assert we can just memset / memcpy"); value_container_type m_values{}; // Contains all the key-value pairs in one densely stored container. No holes. - typename std::allocator_traits<bucket_alloc>::pointer m_buckets{}; + using bucket_pointer = typename std::allocator_traits<bucket_alloc>::pointer; + bucket_pointer m_buckets{}; size_t m_num_buckets = 0; size_t m_max_bucket_capacity = 0; float m_max_load_factor = default_max_load_factor; @@ -507,8 +805,7 @@ private: } // Helper to access bucket through pointer types - [[nodiscard]] static constexpr auto at(typename std::allocator_traits<bucket_alloc>::pointer bucket_ptr, size_t offset) - -> Bucket& { + [[nodiscard]] static constexpr auto at(bucket_pointer bucket_ptr, size_t offset) -> Bucket& { return *(bucket_ptr + static_cast<typename std::allocator_traits<bucket_alloc>::difference_type>(offset)); } @@ -578,7 +875,7 @@ private: } [[nodiscard]] static constexpr auto calc_num_buckets(uint8_t shifts) -> size_t { - return std::min(max_bucket_count(), size_t{1} << (64U - shifts)); + return (std::min)(max_bucket_count(), size_t{1} << (64U - shifts)); } [[nodiscard]] constexpr auto calc_shifts_for_size(size_t s) const -> uint8_t { @@ -983,7 +1280,7 @@ public: } [[nodiscard]] static constexpr auto max_size() noexcept -> size_t { - if constexpr (std::numeric_limits<value_idx_type>::max() == std::numeric_limits<size_t>::max()) { + if constexpr ((std::numeric_limits<value_idx_type>::max)() == (std::numeric_limits<size_t>::max)()) { return size_t{1} << (sizeof(value_idx_type) * 8 - 1); } else { return size_t{1} << (sizeof(value_idx_type) * 8); @@ -1272,7 +1569,7 @@ public: auto const last_to_end = std::distance(last, cend()); // remove elements from left to right which moves elements from the end back - auto const mid = idx_first + std::min(first_to_last, last_to_end); + auto const mid = idx_first + (std::min)(first_to_last, last_to_end); auto idx = idx_first; while (idx != mid) { erase(begin() + idx); @@ -1439,8 +1736,8 @@ public: } void rehash(size_t count) { - count = std::min(count, max_size()); - auto shifts = calc_shifts_for_size(std::max(count, size())); + count = (std::min)(count, max_size()); + auto shifts = calc_shifts_for_size((std::max)(count, size())); if (shifts != m_shifts) { m_shifts = shifts; deallocate_buckets(); @@ -1451,12 +1748,12 @@ public: } void reserve(size_t capa) { - capa = std::min(capa, max_size()); + capa = (std::min)(capa, max_size()); if constexpr (has_reserve<value_container_type>) { // std::deque doesn't have reserve(). Make sure we only call when available m_values.reserve(capa); } - auto shifts = calc_shifts_for_size(std::max(capa, size())); + auto shifts = calc_shifts_for_size((std::max)(capa, size())); if (0 == m_num_buckets || shifts < m_shifts) { m_shifts = shifts; deallocate_buckets(); @@ -1519,16 +1816,31 @@ template <class Key, class KeyEqual = std::equal_to<Key>, class AllocatorOrContainer = std::allocator<std::pair<Key, T>>, class Bucket = bucket_type::standard> -using map = detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket>; +using map = detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket, false>; + +template <class Key, + class T, + class Hash = hash<Key>, + class KeyEqual = std::equal_to<Key>, + class AllocatorOrContainer = std::allocator<std::pair<Key, T>>, + class Bucket = bucket_type::standard> +using segmented_map = detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket, true>; + +template <class Key, + class Hash = hash<Key>, + class KeyEqual = std::equal_to<Key>, + class AllocatorOrContainer = std::allocator<Key>, + class Bucket = bucket_type::standard> +using set = detail::table<Key, void, Hash, KeyEqual, AllocatorOrContainer, Bucket, false>; template <class Key, class Hash = hash<Key>, class KeyEqual = std::equal_to<Key>, class AllocatorOrContainer = std::allocator<Key>, class Bucket = bucket_type::standard> -using set = detail::table<Key, void, Hash, KeyEqual, AllocatorOrContainer, Bucket>; +using segmented_set = detail::table<Key, void, Hash, KeyEqual, AllocatorOrContainer, Bucket, true>; -# if ANKERL_UNORDERED_DENSE_PMR +# if defined(ANKERL_UNORDERED_DENSE_PMR) namespace pmr { @@ -1537,10 +1849,23 @@ template <class Key, class Hash = hash<Key>, class KeyEqual = std::equal_to<Key>, class Bucket = bucket_type::standard> -using map = detail::table<Key, T, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR_ALLOCATOR<std::pair<Key, T>>, Bucket>; +using map = + detail::table<Key, T, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR::polymorphic_allocator<std::pair<Key, T>>, Bucket, false>; + +template <class Key, + class T, + class Hash = hash<Key>, + class KeyEqual = std::equal_to<Key>, + class Bucket = bucket_type::standard> +using segmented_map = + detail::table<Key, T, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR::polymorphic_allocator<std::pair<Key, T>>, Bucket, true>; + +template <class Key, class Hash = hash<Key>, class KeyEqual = std::equal_to<Key>, class Bucket = bucket_type::standard> +using set = detail::table<Key, void, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR::polymorphic_allocator<Key>, Bucket, false>; template <class Key, class Hash = hash<Key>, class KeyEqual = std::equal_to<Key>, class Bucket = bucket_type::standard> -using set = detail::table<Key, void, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR_ALLOCATOR<Key>, Bucket>; +using segmented_set = + detail::table<Key, void, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR::polymorphic_allocator<Key>, Bucket, true>; } // namespace pmr @@ -1558,11 +1883,18 @@ using set = detail::table<Key, void, Hash, KeyEqual, ANKERL_UNORDERED_DENSE_PMR_ namespace std { // NOLINT(cert-dcl58-cpp) -template <class Key, class T, class Hash, class KeyEqual, class AllocatorOrContainer, class Bucket, class Pred> +template <class Key, + class T, + class Hash, + class KeyEqual, + class AllocatorOrContainer, + class Bucket, + class Pred, + bool IsSegmented> // NOLINTNEXTLINE(cert-dcl58-cpp) -auto erase_if(ankerl::unordered_dense::detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket>& map, Pred pred) - -> size_t { - using map_t = ankerl::unordered_dense::detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket>; +auto erase_if(ankerl::unordered_dense::detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket, IsSegmented>& map, + Pred pred) -> size_t { + using map_t = ankerl::unordered_dense::detail::table<Key, T, Hash, KeyEqual, AllocatorOrContainer, Bucket, IsSegmented>; // going back to front because erase() invalidates the end iterator auto const old_size = map.size(); @@ -1575,7 +1907,7 @@ auto erase_if(ankerl::unordered_dense::detail::table<Key, T, Hash, KeyEqual, All } } - return map.size() - old_size; + return old_size - map.size(); } } // namespace std diff --git a/misc/benchmarks/external/emhash/hash_table7.hpp b/misc/benchmarks/external/emhash/hash_table7.hpp index 8f8982f9..41970d8c 100644 --- a/misc/benchmarks/external/emhash/hash_table7.hpp +++ b/misc/benchmarks/external/emhash/hash_table7.hpp @@ -92,7 +92,7 @@ of resizing granularity. Ignoring variance, the expected occurrences of list siz #include "wyhash.h" #endif -#ifdef EMH_KEY +#ifdef EMH_NEW #undef EMH_KEY #undef EMH_VAL #undef EMH_PKV @@ -547,10 +547,10 @@ public: static PairT* alloc_bucket(size_type num_buckets) { -#if _WIN32 - auto* new_pairs = (PairT*)malloc(AllocSize(num_buckets)); -#else +#ifdef EMH_ALLOC auto* new_pairs = (PairT*)aligned_alloc(EMH_MALIGN, AllocSize(num_buckets)); +#else + auto* new_pairs = (PairT*)malloc(AllocSize(num_buckets)); #endif return new_pairs; } @@ -1668,16 +1668,10 @@ private: // key is not in this map. Find a place to put it. size_type find_empty_bucket(const size_type bucket_from, const size_type main_bucket) { -#ifdef EMH_ALIGN64 // only works 64bit - const auto boset = bucket_from % MASK_BIT; - auto* const align = _bitmask + bucket_from / MASK_BIT; - const auto bmask = ((size_t)align[1] << (MASK_BIT - boset)) | (align[0] >> boset); - if (EMH_LIKELY(bmask != 0)) - return bucket_from + CTZ(bmask); -#elif EMH_ITER_SAFE +#if EMH_ITER_SAFE const auto boset = bucket_from % 8; - auto* const start = (uint8_t*)_bitmask + bucket_from / 8; - size_t bmask; memcpy(&bmask, start + 0, sizeof(bmask)); bmask >>= boset;// bmask |= ((size_t)start[8] << (SIZE_BIT - boset)); + auto* const align = (uint8_t*)_bitmask + bucket_from / 8;(void)main_bucket; + size_t bmask; memcpy(&bmask, align + 0, sizeof(bmask)); bmask >>= boset;// bmask |= ((size_t)align[8] << (SIZE_BIT - boset)); if (EMH_LIKELY(bmask != 0)) return bucket_from + CTZ(bmask); #else @@ -1715,21 +1709,15 @@ private: } // key is not in this map. Find a place to put it. - size_type find_unique_empty(const size_type bucket_from, const size_t main_bucket) - { -#ifdef EMH_ALIGN64 - const auto boset = bucket_from % MASK_BIT; - auto* const align = _bitmask + bucket_from / MASK_BIT; - const auto bmask = ((size_t)align[1] << (MASK_BIT - boset)) | (align[0] >> boset); - static_assert(sizeof(size_t) > 4); -#elif EMH_ITER_SAFE + size_type find_unique_empty(const size_type bucket_from) + { const auto boset = bucket_from % 8; - auto* const start = (uint8_t*)_bitmask + bucket_from / 8; - size_t bmask; memcpy(&bmask, start + 0, sizeof(bmask)); bmask >>= boset; -#else - const auto boset = bucket_from % 8; (void)main_bucket; auto* const align = (uint8_t*)_bitmask + bucket_from / 8; - const auto bmask = (*(size_t*)(align) >> boset); //maybe not aligned and warning + +#if EMH_ITER_SAFE + size_t bmask; memcpy(&bmask, align + 0, sizeof(bmask)); bmask >>= boset; +#else + const auto bmask = (*(size_t*)(align) >> boset); //maybe not aligned and warning #endif if (EMH_LIKELY(bmask != 0)) return bucket_from + CTZ(bmask); @@ -1789,12 +1777,12 @@ private: next_bucket = find_last_bucket(next_bucket); //find a new empty and link it to tail - return EMH_BUCKET(_pairs, next_bucket) = find_unique_empty(next_bucket, bucket); + return EMH_BUCKET(_pairs, next_bucket) = find_empty_bucket(next_bucket, bucket); } #if EMH_INT_HASH static constexpr uint64_t KC = UINT64_C(11400714819323198485); - inline uint64_t hash64(uint64_t key) + inline static uint64_t hash64(uint64_t key) { #if __SIZEOF_INT128__ && EMH_INT_HASH == 1 __uint128_t r = key; r *= KC; diff --git a/misc/benchmarks/plotbench/cdeq_benchmark.cpp b/misc/benchmarks/plotbench/cdeq_benchmark.cpp index bb0e28c8..d11b4103 100644 --- a/misc/benchmarks/plotbench/cdeq_benchmark.cpp +++ b/misc/benchmarks/plotbench/cdeq_benchmark.cpp @@ -9,10 +9,10 @@ #endif enum {INSERT, ERASE, FIND, ITER, DESTRUCT, N_TESTS}; -const char* operations[] = {"insert", "erase", "find", "iter", "destruct"}; +const char* operations[] = {"insert", "erase", "access", "iter", "destruct"}; typedef struct { time_t t1, t2; uint64_t sum; float fac; } Range; typedef struct { const char* name; Range test[N_TESTS]; } Sample; -enum {SAMPLES = 2, N = 50000000, S = 0x3ffc, R = 4}; +enum {SAMPLES = 2, N = 60000000, R = 4}; uint64_t seed = 1, mask1 = 0xfffffff, mask2 = 0xffff; static float secs(Range s) { return (float)(s.t2 - s.t1) / CLOCKS_PER_SEC; } @@ -44,14 +44,12 @@ Sample test_std_deque() { c_forrange (N) con.push_back(crand() & mask2); s.test[FIND].t1 = clock(); size_t sum = 0; - // Iteration - not inherent find - skipping - //container::iterator it; - //c_forrange (S) if ((it = std::find(con.begin(), con.end(), crand() & mask2)) != con.end()) sum += *it; + c_forrange (R) c_forrange (i, N) sum += con[i]; s.test[FIND].t2 = clock(); s.test[FIND].sum = sum; s.test[ITER].t1 = clock(); sum = 0; - c_forrange (R) c_forrange (i, N) sum += con[i]; + c_forrange (R) for (const auto i: con) sum += i; s.test[ITER].t2 = clock(); s.test[ITER].sum = sum; s.test[DESTRUCT].t1 = clock(); @@ -89,13 +87,12 @@ Sample test_stc_deque() { c_forrange (N) cdeq_x_push_back(&con, crand() & mask2); s.test[FIND].t1 = clock(); size_t sum = 0; - //cdeq_x_iter it, end = cdeq_x_end(&con); - //c_forrange (S) if ((it = cdeq_x_find(&con, crand() & mask2)).ref != end.ref) sum += *it.ref; + c_forrange (R) c_forrange (i, N) sum += *cdeq_x_at(&con, i); s.test[FIND].t2 = clock(); s.test[FIND].sum = sum; s.test[ITER].t1 = clock(); sum = 0; - c_forrange (R) c_forrange (i, N) sum += con.data[i]; + c_forrange (R) c_foreach (i, cdeq_x, con) sum += *i.ref; s.test[ITER].t2 = clock(); s.test[ITER].sum = sum; s.test[DESTRUCT].t1 = clock(); diff --git a/misc/benchmarks/plotbench/clist_benchmark.cpp b/misc/benchmarks/plotbench/clist_benchmark.cpp index 01bfbf83..8fdfddba 100644 --- a/misc/benchmarks/plotbench/clist_benchmark.cpp +++ b/misc/benchmarks/plotbench/clist_benchmark.cpp @@ -9,10 +9,10 @@ #endif enum {INSERT, ERASE, FIND, ITER, DESTRUCT, N_TESTS}; -const char* operations[] = {"insert", "erase", "find", "iter", "destruct"}; +const char* operations[] = {"insert", "erase", "access", "iter", "destruct"}; typedef struct { time_t t1, t2; uint64_t sum; float fac; } Range; typedef struct { const char* name; Range test[N_TESTS]; } Sample; -enum {SAMPLES = 2, N = 10000000, S = 0x3ffc, R = 4}; +enum {SAMPLES = 2, N = 10000000, R = 4}; uint64_t seed = 1, mask1 = 0xfffffff, mask2 = 0xffff; static float secs(Range s) { return (float)(s.t2 - s.t1) / CLOCKS_PER_SEC; } @@ -45,7 +45,6 @@ Sample test_std_forward_list() { size_t sum = 0; container::iterator it; // Iteration - not inherent find - skipping - //c_forrange (S) if ((it = std::find(con.begin(), con.end(), crand() & mask2)) != con.end()) sum += *it; s.test[FIND].t2 = clock(); s.test[FIND].sum = sum; s.test[ITER].t1 = clock(); @@ -86,8 +85,7 @@ Sample test_stc_forward_list() { c_forrange (N) clist_x_push_front(&con, crand() & mask2); s.test[FIND].t1 = clock(); size_t sum = 0; - //clist_x_iter it, end = clist_x_end(&con); - //c_forrange (S) if ((it = clist_x_find(&con, crand() & mask2)).ref != end.ref) sum += *it.ref; + //clist iteration - skipping s.test[FIND].t2 = clock(); s.test[FIND].sum = sum; s.test[ITER].t1 = clock(); diff --git a/misc/benchmarks/plotbench/cmap_benchmark.cpp b/misc/benchmarks/plotbench/cmap_benchmark.cpp index 6b2edbd7..c7957bb7 100644 --- a/misc/benchmarks/plotbench/cmap_benchmark.cpp +++ b/misc/benchmarks/plotbench/cmap_benchmark.cpp @@ -8,7 +8,7 @@ #endif enum {INSERT, ERASE, FIND, ITER, DESTRUCT, N_TESTS}; -const char* operations[] = {"insert", "erase", "find", "iter", "destruct"}; +const char* operations[] = {"insert", "erase", "access", "iter", "destruct"}; typedef struct { time_t t1, t2; uint64_t sum; float fac; } Range; typedef struct { const char* name; Range test[N_TESTS]; } Sample; enum {SAMPLES = 2, N = 2000000, R = 4}; @@ -47,12 +47,14 @@ Sample test_std_unordered_map() { s.test[FIND].t1 = clock(); size_t sum = 0; container::iterator it; - c_forrange (N) if ((it = con.find(crand() & mask1)) != con.end()) sum += it->second; + c_forrange (N) + if ((it = con.find(crand() & mask1)) != con.end()) + sum += it->second; s.test[FIND].t2 = clock(); s.test[FIND].sum = sum; s.test[ITER].t1 = clock(); sum = 0; - c_forrange (R) for (auto i: con) sum += i.second; + c_forrange (R) for (const auto& i: con) sum += i.second; s.test[ITER].t2 = clock(); s.test[ITER].sum = sum; s.test[DESTRUCT].t1 = clock(); diff --git a/misc/benchmarks/plotbench/cpque_benchmark.cpp b/misc/benchmarks/plotbench/cpque_benchmark.cpp index 2d4c7a28..aafc9eb0 100644 --- a/misc/benchmarks/plotbench/cpque_benchmark.cpp +++ b/misc/benchmarks/plotbench/cpque_benchmark.cpp @@ -37,7 +37,8 @@ void stc_test() { int N = 10000000; - c_auto (cpque_f, pq) + cpque_f pq = {0}; + c_defer(cpque_f_drop(&pq)) { csrand(seed); clock_t start = clock(); @@ -58,7 +59,7 @@ void stc_test() } -int main() +int main(void) { puts("STD P.QUEUE:"); std_test(); diff --git a/misc/benchmarks/plotbench/csmap_benchmark.cpp b/misc/benchmarks/plotbench/csmap_benchmark.cpp index 60f2db49..480163ed 100644 --- a/misc/benchmarks/plotbench/csmap_benchmark.cpp +++ b/misc/benchmarks/plotbench/csmap_benchmark.cpp @@ -8,7 +8,7 @@ #endif enum {INSERT, ERASE, FIND, ITER, DESTRUCT, N_TESTS}; -const char* operations[] = {"insert", "erase", "find", "iter", "destruct"}; +const char* operations[] = {"insert", "erase", "access", "iter", "destruct"}; typedef struct { time_t t1, t2; uint64_t sum; float fac; } Range; typedef struct { const char* name; Range test[N_TESTS]; } Sample; enum {SAMPLES = 2, N = 1000000, R = 4}; @@ -47,12 +47,14 @@ Sample test_std_map() { s.test[FIND].t1 = clock(); size_t sum = 0; container::iterator it; - c_forrange (N) if ((it = con.find(crand() & mask1)) != con.end()) sum += it->second; + c_forrange (N) + if ((it = con.find(crand() & mask1)) != con.end()) + sum += it->second; s.test[FIND].t2 = clock(); s.test[FIND].sum = sum; s.test[ITER].t1 = clock(); sum = 0; - c_forrange (R) for (auto i: con) sum += i.second; + c_forrange (R) for (const auto& i: con) sum += i.second; s.test[ITER].t2 = clock(); s.test[ITER].sum = sum; s.test[DESTRUCT].t1 = clock(); diff --git a/misc/benchmarks/plotbench/cvec_benchmark.cpp b/misc/benchmarks/plotbench/cvec_benchmark.cpp index c488a01c..fb10653a 100644 --- a/misc/benchmarks/plotbench/cvec_benchmark.cpp +++ b/misc/benchmarks/plotbench/cvec_benchmark.cpp @@ -9,10 +9,10 @@ #endif enum {INSERT, ERASE, FIND, ITER, DESTRUCT, N_TESTS}; -const char* operations[] = {"insert", "erase", "find", "iter", "destruct"}; +const char* operations[] = {"insert", "erase", "access", "iter", "destruct"}; typedef struct { time_t t1, t2; uint64_t sum; float fac; } Range; typedef struct { const char* name; Range test[N_TESTS]; } Sample; -enum {SAMPLES = 2, N = 80000000, S = 0x3ffc, R = 4}; +enum {SAMPLES = 2, N = 60000000, R = 4}; uint64_t seed = 1, mask1 = 0xfffffff, mask2 = 0xffff; static float secs(Range s) { return (float)(s.t2 - s.t1) / CLOCKS_PER_SEC; } @@ -42,14 +42,12 @@ Sample test_std_vector() { c_forrange (N) con.push_back(crand() & mask2); s.test[FIND].t1 = clock(); size_t sum = 0; - //container::iterator it; - // Iteration - not inherent find - skipping - //c_forrange (S) if ((it = std::find(con.begin(), con.end(), crand() & mask2)) != con.end()) sum += *it; + c_forrange (R) c_forrange (i, N) sum += con[i]; s.test[FIND].t2 = clock(); s.test[FIND].sum = sum; s.test[ITER].t1 = clock(); sum = 0; - c_forrange (R) c_forrange (i, N) sum += con[i]; + c_forrange (R) for (const auto i: con) sum += i; s.test[ITER].t2 = clock(); s.test[ITER].sum = sum; s.test[DESTRUCT].t1 = clock(); @@ -71,27 +69,26 @@ Sample test_stc_vector() { s.test[INSERT].t1 = clock(); container con = cvec_x_init(); csrand(seed); - c_forrange (N) cvec_x_push_back(&con, crand() & mask1); + c_forrange (N) cvec_x_push(&con, crand() & mask1); s.test[INSERT].t2 = clock(); s.test[INSERT].sum = cvec_x_size(&con); s.test[ERASE].t1 = clock(); - c_forrange (N) { cvec_x_pop_back(&con); } + c_forrange (N) { cvec_x_pop(&con); } s.test[ERASE].t2 = clock(); s.test[ERASE].sum = cvec_x_size(&con); cvec_x_drop(&con); }{ csrand(seed); container con = cvec_x_init(); - c_forrange (N) cvec_x_push_back(&con, crand() & mask2); + c_forrange (N) cvec_x_push(&con, crand() & mask2); s.test[FIND].t1 = clock(); size_t sum = 0; - //cvec_x_iter it, end = cvec_x_end(&con); - //c_forrange (S) if ((it = cvec_x_find(&con, crand() & mask2)).ref != end.ref) sum += *it.ref; + c_forrange (R) c_forrange (i, N) sum += con.data[i]; s.test[FIND].t2 = clock(); s.test[FIND].sum = sum; s.test[ITER].t1 = clock(); sum = 0; - c_forrange (R) c_forrange (i, N) sum += con.data[i]; + c_forrange (R) c_foreach (i, cvec_x, con) sum += *i.ref; s.test[ITER].t2 = clock(); s.test[ITER].sum = sum; s.test[DESTRUCT].t1 = clock(); diff --git a/misc/benchmarks/plotbench/plot.py b/misc/benchmarks/plotbench/plot.py index 0ba92264..4a02c6b2 100644 --- a/misc/benchmarks/plotbench/plot.py +++ b/misc/benchmarks/plotbench/plot.py @@ -4,7 +4,7 @@ import pandas as pd import matplotlib.pyplot as plt #sns.set_theme(style="whitegrid") -comp = ['All compilers', 'Mingw-g++-11.3.0', 'Win-Clang-14.0.1', 'VC-19.28'] +comp = ['All compilers', 'Mingw-g++-13.1.0', 'Win-Clang-16.0.5', 'VC-19.36'] n = int(sys.argv[1]) if len(sys.argv) > 1 else 0 file = sys.argv[2] if len(sys.argv) > 2 else 'plot_win.csv' df = pd.read_csv(file) @@ -12,13 +12,12 @@ df = df[df.Method != 'total'] if n > 0: df = df[df.Compiler == comp[n]] -g = sns.catplot(data=df, x='Method', y='Seconds', hue='Library', col='C', kind='bar', - ci=68, legend=False, col_wrap=2, sharex=False, aspect=1.4, height=3.1) +g = sns.catplot(data=df, x='Method', y='Seconds', hue='Library', col='C', kind='bar', orient='v', + errorbar=('ci', 68), legend=False, col_wrap=2, sharex=False, aspect=1.4, height=3.0) g.set_xlabels('') -g.add_legend(bbox_to_anchor=(0.75, 0.2), borderaxespad=0.) - -g.fig.subplots_adjust(top=0.90, left=0.06, bottom=0.07) +g.add_legend(bbox_to_anchor=(0.75, 0.2), borderaxespad=0) +g.fig.subplots_adjust(top=0.90, left=0.08, right=0.98, bottom=0.04, hspace=0.4) g.fig.suptitle('Benchmark STC vs c++ std containers: %s' % comp[n], fontsize=15, y=0.98) plt.show() diff --git a/misc/benchmarks/plotbench/run_all.bat b/misc/benchmarks/plotbench/run_all.bat index 98913a50..de380531 100644 --- a/misc/benchmarks/plotbench/run_all.bat +++ b/misc/benchmarks/plotbench/run_all.bat @@ -1,7 +1,5 @@ -set out=plot_win.csv +@set out=plot_win.csv echo Compiler,Library,C,Method,Seconds,Ratio> %out% -echo gcc sh run_gcc.sh >> %out% -echo clang sh run_clang.sh >> %out% -REM call run_vc.bat >> %out% +call run_vc.bat >> %out% diff --git a/misc/benchmarks/plotbench/run_clang.sh b/misc/benchmarks/plotbench/run_clang.sh index 096e71be..4f649cbc 100644 --- a/misc/benchmarks/plotbench/run_clang.sh +++ b/misc/benchmarks/plotbench/run_clang.sh @@ -1,12 +1,12 @@ exe='' if [ "$OS" = "Windows_NT" ] ; then exe=".exe" ; fi -clang++ -I../include -O3 -o cdeq_benchmark$exe cdeq_benchmark.cpp -clang++ -I../include -O3 -o clist_benchmark$exe clist_benchmark.cpp -clang++ -I../include -O3 -o cmap_benchmark$exe cmap_benchmark.cpp -clang++ -I../include -O3 -o csmap_benchmark$exe csmap_benchmark.cpp -clang++ -I../include -O3 -o cvec_benchmark$exe cvec_benchmark.cpp +clang -DNDEBUG -I../../include -O3 -o cdeq_benchmark$exe cdeq_benchmark.cpp -lstdc++ +clang -DNDEBUG -I../../include -O3 -o clist_benchmark$exe clist_benchmark.cpp -lstdc++ +clang -DNDEBUG -I../../include -O3 -o cmap_benchmark$exe cmap_benchmark.cpp -lstdc++ +clang -DNDEBUG -I../../include -O3 -o csmap_benchmark$exe csmap_benchmark.cpp -lstdc++ +clang -DNDEBUG -I../../include -O3 -o cvec_benchmark$exe cvec_benchmark.cpp -lstdc++ -c='Win-Clang-14.0.1' +c='Win-Clang-16.0.5' ./cdeq_benchmark$exe $c ./clist_benchmark$exe $c ./cmap_benchmark$exe $c diff --git a/misc/benchmarks/plotbench/run_gcc.sh b/misc/benchmarks/plotbench/run_gcc.sh index 5249ed1e..0bd2d6ee 100644 --- a/misc/benchmarks/plotbench/run_gcc.sh +++ b/misc/benchmarks/plotbench/run_gcc.sh @@ -1,10 +1,10 @@ -g++ -I../include -O3 -o cdeq_benchmark cdeq_benchmark.cpp -g++ -I../include -O3 -o clist_benchmark clist_benchmark.cpp -g++ -I../include -O3 -o cmap_benchmark cmap_benchmark.cpp -g++ -I../include -O3 -o csmap_benchmark csmap_benchmark.cpp -g++ -I../include -O3 -o cvec_benchmark cvec_benchmark.cpp +g++ -DNDEBUG -I../../include -O3 -o cdeq_benchmark cdeq_benchmark.cpp +g++ -DNDEBUG -I../../include -O3 -o clist_benchmark clist_benchmark.cpp +g++ -DNDEBUG -I../../include -O3 -o cmap_benchmark cmap_benchmark.cpp +g++ -DNDEBUG -I../../include -O3 -o csmap_benchmark csmap_benchmark.cpp +g++ -DNDEBUG -I../../include -O3 -o cvec_benchmark cvec_benchmark.cpp -c='Mingw-g++-11.3.0' +c='Mingw-g++-13.1.0' ./cdeq_benchmark $c ./clist_benchmark $c ./cmap_benchmark $c diff --git a/misc/benchmarks/plotbench/run_vc.bat b/misc/benchmarks/plotbench/run_vc.bat index dc4938f8..c00d059b 100644 --- a/misc/benchmarks/plotbench/run_vc.bat +++ b/misc/benchmarks/plotbench/run_vc.bat @@ -1,14 +1,14 @@ @echo off -if "%VSINSTALLDIR%"=="" call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat" >nul +if "%VSINSTALLDIR%"=="" call "C:\Program Files (x86)\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat" >nul cl.exe -nologo -EHsc -std:c++latest -I../include -O2 cdeq_benchmark.cpp >nul cl.exe -nologo -EHsc -std:c++latest -I../include -O2 clist_benchmark.cpp >nul cl.exe -nologo -EHsc -std:c++latest -I../include -O2 cmap_benchmark.cpp >nul cl.exe -nologo -EHsc -std:c++latest -I../include -O2 csmap_benchmark.cpp >nul cl.exe -nologo -EHsc -std:c++latest -I../include -O2 cvec_benchmark.cpp >nul -del *.obj >nul +if exist *obj del *.obj -set c=VC-19.28 +set c=VC-19.36 cdeq_benchmark.exe %c% clist_benchmark.exe %c% cmap_benchmark.exe %c% diff --git a/misc/benchmarks/shootout_hashmaps.cpp b/misc/benchmarks/shootout_hashmaps.cpp index bae9a42b..c6a81777 100644 --- a/misc/benchmarks/shootout_hashmaps.cpp +++ b/misc/benchmarks/shootout_hashmaps.cpp @@ -2,7 +2,7 @@ #include <time.h> #include <stc/crand.h> -#define MAX_LOAD_FACTOR 85 +#define MAX_LOAD_FACTOR 80 #ifdef __cplusplus #include <limits> @@ -35,8 +35,8 @@ KHASH_MAP_INIT_INT64(ii, IValue) // cmap template expansion #define i_key IKey #define i_val IValue -#define i_ssize int32_t // enable 2^K buckets like the rest. #define i_tag ii +//#define i_expandby 1 #define i_max_load_factor MAX_LOAD_FACTOR / 100.0f #include <stc/cmap.h> diff --git a/misc/benchmarks/various/csort_bench.c b/misc/benchmarks/various/csort_bench.c index d5d7fa7c..793a0503 100644 --- a/misc/benchmarks/various/csort_bench.c +++ b/misc/benchmarks/various/csort_bench.c @@ -5,8 +5,12 @@ #ifdef __cplusplus #include <algorithm> #endif -#define i_val int -#include <stc/algo/csort.h> +#define NDEBUG +#define i_type Ints +#define i_key int +#define i_more +#include <stc/cvec.h> +#include <stc/algo/sort.h> #define ROTL(d,bits) ((d<<(bits)) | (d>>(8*sizeof(d)-(bits)))) uint64_t romutrio(uint64_t s[3]) { @@ -21,14 +25,14 @@ static int cmp_int(const void* a, const void* b) { return c_default_cmp((const int*)a, (const int*)b); } -void testsort(int *a, int size, const char *desc) { +void testsort(Ints *a, int size, const char *desc) { clock_t t = clock(); #ifdef __cplusplus - printf("std::sort: "); std::sort(a, a + size); + printf("std::sort: "); std::sort(a->data, a->data + size); #elif defined QSORT - printf("qsort: "); qsort(a, size, sizeof *a, cmp_int); + printf("qsort: "); qsort(a->data, size, sizeof *a->data, cmp_int); #else - printf("stc_sort: "); csort_int(a, size); + printf("STC sort_n: "); Ints_sort_n(a, size); #endif t = clock() - t; @@ -41,27 +45,27 @@ int main(int argc, char *argv[]) { size_t i, size = argc > 1 ? strtoull(argv[1], NULL, 0) : 10000000; uint64_t s[3] = {123456789, 3456789123, 789123456}; - int32_t *a = (int32_t*)malloc(sizeof(*a) * size); - if (!a) return -1; + Ints a = Ints_with_capacity(size); for (i = 0; i < size; i++) - a[i] = romutrio(s) & (1U << 30) - 1; - testsort(a, size, "random"); + Ints_push(&a, romutrio(s) & (1U << 30) - 1); + testsort(&a, size, "random"); for (i = 0; i < 20; i++) - printf(" %d", (int)a[i]); + printf(" %d", (int)*Ints_at(&a, i)); puts(""); for (i = 0; i < size; i++) - a[i] = i; - testsort(a, size, "sorted"); + *Ints_at_mut(&a, i) = i; + testsort(&a, size, "sorted"); for (i = 0; i < size; i++) - a[i] = size - i; - testsort(a, size, "reverse sorted"); + *Ints_at_mut(&a, i) = size - i; + testsort(&a, size, "reverse sorted"); for (i = 0; i < size; i++) - a[i] = 126735; - testsort(a, size, "constant"); + *Ints_at_mut(&a, i) = 126735; + testsort(&a, size, "constant"); for (i = 0; i < size; i++) - a[i] = i + 1; - a[size - 1] = 0; - testsort(a, size, "rotated"); - free(a); + *Ints_at_mut(&a, i) = i + 1; + *Ints_at_mut(&a, size - 1) = 0; + testsort(&a, size, "rotated"); + + Ints_drop(&a); } diff --git a/misc/benchmarks/various/cspan_bench.c b/misc/benchmarks/various/cspan_bench.c index 589df13a..bfc0ead3 100644 --- a/misc/benchmarks/various/cspan_bench.c +++ b/misc/benchmarks/various/cspan_bench.c @@ -1,4 +1,5 @@ -#define STC_NDEBUG +// ref: https://stackoverflow.com/questions/74382366/why-is-iterating-over-stdrangesviewsjoin-so-slow +#define NDEBUG #include <stc/cspan.h> #include <stdio.h> #include <time.h> @@ -11,9 +12,9 @@ enum { ny = 64, nz = 64 }; +// subspan 15x5x10: int lx = 15, ly = 10, lz = 5; -int hx = 20, hy = 15, hz = 15; -intptr_t n = 1000000; +int hx = 30, hy = 15, hz = 15; // define the contents of two nx x ny x nz arrays in and out double Vout[nx * ny * nz]; @@ -21,36 +22,15 @@ double Vin[nx * ny * nz]; //, 1.23; // define some slice indices for each dimension -static void MDRanges_setup(intptr_t state) -{ - double sum = 0; - clock_t t = clock(); - - for (intptr_t s = 0; s < state; ++s) - { - MD3 r_in = cspan_md(Vin, nx, ny, nz); - MD3 r_out = cspan_md(Vout, nx, ny, nz); - - r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); - r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); - MD3_iter i = MD3_begin(&r_in); // can be iterated "flat". - MD3_iter o = MD3_begin(&r_out); - sum += Vin[s % nx]; - } - t = clock() - t; - printf("setup: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); -} - -static void TraditionalForLoop(intptr_t state) +static void Traditional_for_loop(intptr_t n) { clock_t t = clock(); double sum = 0; - for (int s = 0; s < state; ++s) { + for (int s = 0; s < n; ++s) { for (int x = lx; x < hx; ++x) { for (int y = ly; y < hy; ++y) { - for (int z = lz; z < hz; ++z) - { + for (int z = lz; z < hz; ++z) { double d = Vin[nz*(ny*x + y) + z]; Vout[nz*(ny*x + y) + z] += d; sum += d; @@ -59,68 +39,65 @@ static void TraditionalForLoop(intptr_t state) } } t = clock() - t; - printf("forloop: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); + printf("forloop : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -static void MDRanges_nested_loop(intptr_t state) +static void MDRanges_loop_over_joined(intptr_t n) { + clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); MD3 r_out = cspan_md(Vout, nx, ny, nz); r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); - - // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; } - clock_t t = clock(); double sum = 0; - for (intptr_t s = 0; s < state; ++s) { - for (int x = 0; x < r_in.shape[0]; ++x) { - for (int y = 0; y < r_in.shape[1]; ++y) { - for (int z = 0; z < r_in.shape[2]; ++z) - { - double d = *cspan_at(&r_in, x, y, z); - *cspan_at(&r_out, x, y, z) += d; - sum += d; - } - } + for (intptr_t s = 0; s < n; ++s) { + MD3_iter i = MD3_begin(&r_in); + MD3_iter o = MD3_begin(&r_out); + + for (; i.ref; MD3_next(&i), MD3_next(&o)) + { + *o.ref += *i.ref; + sum += *i.ref; } } t = clock() - t; - printf("nested: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); + printf("joined : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -static void MDRanges_loop_over_joined(intptr_t state) +static void MDRanges_nested_loop(intptr_t n) { + clock_t t = clock(); MD3 r_in = cspan_md(Vin, nx, ny, nz); MD3 r_out = cspan_md(Vout, nx, ny, nz); r_in = cspan_slice(MD3, &r_in, {lx, hx}, {ly, hy}, {lz, hz}); r_out = cspan_slice(MD3, &r_out, {lx, hx}, {ly, hy}, {lz, hz}); - - // C++23: for (auto [o, i] : std::views::zip(flat(r_out), flat(r_in))) { o = i; } double sum = 0; - clock_t t = clock(); - - for (intptr_t s = 0; s < state; ++s) { - MD3_iter i = MD3_begin(&r_in); - MD3_iter o = MD3_begin(&r_out); - for (; i.ref; MD3_next(&i), MD3_next(&o)) - { - *o.ref += *i.ref; - sum += *i.ref; + for (intptr_t s = 0; s < n; ++s) { + for (int x = 0; x < r_in.shape[0]; ++x) { + for (int y = 0; y < r_in.shape[1]; ++y) { + for (int z = 0; z < r_in.shape[2]; ++z) + { + double d = *cspan_at(&r_in, x,y,z); + *cspan_at(&r_out, x,y,z) += d; + sum += d; + } + } } } t = clock() - t; - printf("joined: %.1f ms, %f\n", 1000.0f * t / CLOCKS_PER_SEC, sum); + printf("nested : %.1f ms, %f\n", 1000.0f*t / CLOCKS_PER_SEC, sum); } -int main() + +int main(void) { + intptr_t n = 100000; for (int i = 0; i < nx * ny * nz; ++i) Vin[i] = i + 1.23; - MDRanges_setup(n); - TraditionalForLoop(n); - MDRanges_nested_loop(n); + Traditional_for_loop(n); MDRanges_loop_over_joined(n); + MDRanges_nested_loop(n); } diff --git a/misc/benchmarks/various/prng_bench.cpp b/misc/benchmarks/various/prng_bench.cpp index 234e3805..45c14d18 100644 --- a/misc/benchmarks/various/prng_bench.cpp +++ b/misc/benchmarks/various/prng_bench.cpp @@ -66,7 +66,7 @@ uint32_t pcg32(uint32_t s[2]) { } -/* xoshiro128+ */ +/* xo(ro)shiro */ uint64_t xoroshiro128plus(uint64_t s[2]) { const uint64_t s0 = s[0]; @@ -80,9 +80,6 @@ uint64_t xoroshiro128plus(uint64_t s[2]) { return result; } - -/* xoshiro256** */ - static inline uint64_t xoshiro256starstar(uint64_t s[4]) { const uint64_t result = rotl64(s[1] * 5, 7) * 9; const uint64_t t = s[1] << 17; @@ -95,7 +92,7 @@ static inline uint64_t xoshiro256starstar(uint64_t s[4]) { return result; } -// wyrand - 2020-12-07 +/* wyrand - 2020-12-07 */ static inline void _wymum(uint64_t *A, uint64_t *B){ #if defined(__SIZEOF_INT128__) __uint128_t r = *A; r *= *B; @@ -136,44 +133,44 @@ int main(void) for (size_t ti = 0; ti < 2; ti++) { init_state(rng.state, 12345123); cout << endl << "ROUND " << ti+1 << " ---------" << endl; - +/* beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = romu_trio(rng.state); + recipient[i] = sfc32((uint32_t *)rng.state); end = clock(); - cout << "romu_trio:\t" + cout << "sfc32:\t\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = wyrand64(rng.state); + recipient[i] = stc32((uint32_t *)rng.state); end = clock(); - cout << "wyrand64:\t" + cout << "stc32:\t\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = sfc32((uint32_t *)rng.state); + recipient[i] = pcg32((uint32_t *)rng.state); end = clock(); - cout << "sfc32:\t\t" + cout << "pcg32:\t\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; - +*/ beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = stc32((uint32_t *)rng.state); + recipient[i] = romu_trio(rng.state); end = clock(); - cout << "stc32:\t\t" + cout << "romu_trio:\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; beg = clock(); for (size_t i = 0; i < N; i++) - recipient[i] = pcg32((uint32_t *)rng.state); + recipient[i] = wyrand64(rng.state); end = clock(); - cout << "pcg32:\t\t" + cout << "wyrand64:\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; @@ -189,7 +186,7 @@ int main(void) for (size_t i = 0; i < N; i++) recipient[i] = crand_u64(&rng); end = clock(); - cout << "stc64:\t\t" + cout << "crand64:\t" << (float(end - beg) / CLOCKS_PER_SEC) << "s: " << recipient[312] << endl; diff --git a/misc/benchmarks/various/rust_cmap.c b/misc/benchmarks/various/rust_cmap.c index abdb42b0..97047e0b 100644 --- a/misc/benchmarks/various/rust_cmap.c +++ b/misc/benchmarks/various/rust_cmap.c @@ -22,7 +22,7 @@ uint64_t romu_trio(uint64_t s[3]) { return xp; } -int main() +int main(void) { cmap_u64 m = {0}; diff --git a/misc/benchmarks/various/sso_bench.cpp b/misc/benchmarks/various/sso_bench.cpp index 993ff1bb..244c1291 100644 --- a/misc/benchmarks/various/sso_bench.cpp +++ b/misc/benchmarks/various/sso_bench.cpp @@ -3,65 +3,82 @@ #include <chrono> #include <stc/crand.h> +#define i_static #include <stc/cstr.h> #define i_type StcVec #define i_val_str #include <stc/cstack.h> -#define i_type StcSet -#define i_val_str -#include <stc/csset.h> - #include <vector> using StdVec = std::vector<std::string>; -#include <set> -using StdSet = std::set<std::string>; -static const int BENCHMARK_SIZE = 2000000; -static const int MAX_STRING_SIZE = 50; +#include <unordered_set> +#include "../external/ankerl/robin_hood.h" + +struct string_hash { + using is_transparent = void; + [[nodiscard]] size_t operator()(const char *txt) const { + return std::hash<std::string_view>{}(txt); + } + [[nodiscard]] size_t operator()(std::string_view txt) const { + return std::hash<std::string_view>{}(txt); + } + [[nodiscard]] size_t operator()(const std::string &txt) const { + return std::hash<std::string>{}(txt); + } +}; +using StdSet = robin_hood::unordered_flat_set<std::string, string_hash, std::equal_to<>>; +//using StdSet = std::unordered_set<std::string>; + +#define i_type StcSet +#define i_val_str +//#define i_hash(txtp) std::hash<std::string_view>{}(*txtp) +#include <stc/cset.h> + + +static const int BENCHMARK_SIZE = 250000; +static const int MAX_STRING_SIZE = 100; static const char CHARS[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=+-"; using time_point = std::chrono::high_resolution_clock::time_point; -static inline std::string randomString_STD(int strsize) { - std::string s(strsize, 0); - char* p = &s[0]; +static inline const char* randomString(int strsize) { + static char str[256]; union { uint64_t u8; uint8_t b[8]; } r; for (int i = 0; i < strsize; ++i) { if ((i & 7) == 0) r.u8 = crand() & 0x3f3f3f3f3f3f3f3f; - p[i] = CHARS[r.b[i & 7]]; + str[i] = CHARS[r.b[i & 7]]; } - return s; + str[strsize] = 0; + return str; } -static inline cstr randomString_STC(int strsize) { - cstr s = cstr_with_size(strsize, 0); - char* p = cstr_data(&s); - union { uint64_t u8; uint8_t b[8]; } r; - for (int i = 0; i < strsize; ++i) { - if ((i & 7) == 0) r.u8 = crand() & 0x3f3f3f3f3f3f3f3f; - p[i] = CHARS[r.b[i & 7]]; - } - return s; + + +static inline void addRandomString(StdVec& vec, const char* str) { + vec.push_back(str); } +static inline void addRandomString(StcVec& vec, const char* str) { + StcVec_emplace(&vec, str); +} -void addRandomString(StdVec& vec, int strsize) { - vec.push_back(std::move(randomString_STD(strsize))); +static inline void addRandomString(StdSet& set, const char* str) { + set.insert(str); } -void addRandomString(StcVec& vec, int strsize) { - StcVec_push(&vec, randomString_STC(strsize)); +static inline void addRandomString(StcSet& set, const char* str) { + StcSet_emplace(&set, str); } -void addRandomString(StdSet& set, int strsize) { - set.insert(std::move(randomString_STD(strsize))); +static inline bool getRandomString(const StdSet& set, const char* str) { + return set.find(str) != set.end(); } -void addRandomString(StcSet& set, int strsize) { - StcSet_insert(&set, randomString_STC(strsize)); +static inline bool getRandomString(const StcSet& set, const char* str) { + return StcSet_contains(&set, str); } @@ -70,7 +87,7 @@ int benchmark(C& container, const int n, const int strsize) { time_point t1 = std::chrono::high_resolution_clock::now(); for (int i = 0; i < n; i++) - addRandomString(container, strsize); + addRandomString(container, randomString(strsize)); time_point t2 = std::chrono::high_resolution_clock::now(); const auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count(); @@ -78,9 +95,25 @@ int benchmark(C& container, const int n, const int strsize) { return (int)duration; } +template <class C> +int benchmark_lookup(C& container, const int n, const int strsize) { + for (int i = 0; i < n; i++) + addRandomString(container, randomString(strsize)); + + time_point t1 = std::chrono::high_resolution_clock::now(); + int found = 0; + for (int i = 0; i < n; i++) + found += (int)getRandomString(container, randomString(strsize)); + + time_point t2 = std::chrono::high_resolution_clock::now(); + const auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count(); + std::cerr << (strsize ? strsize : 32) << "\t" << duration << '\t' << found; + return (int)duration; +} -int main() { - uint64_t seed = 4321; +#include <time.h> +int main(void) { + uint64_t seed = time(NULL); // 4321; int sum, n; // VECTOR WITH STRINGS @@ -88,48 +121,75 @@ int main() { csrand(seed); sum = 0, n = 0; std::cerr << "\nstrsize\tmsecs\tstd::vector<std::string>, size=" << BENCHMARK_SIZE << "\n"; - for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) { + for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 4) { StdVec vec; vec.reserve(BENCHMARK_SIZE); sum += benchmark(vec, BENCHMARK_SIZE, strsize), ++n; std::cout << '\t' << vec.front() << '\n'; } - std::cout << "Avg:\t" << sum/n << '\n'; + std::cout << "Avg:\t" << sum/n << "ms\n"; csrand(seed); sum = 0, n = 0; std::cerr << "\nstrsize\tmsecs\tcvec<cstr>, size=" << BENCHMARK_SIZE << "\n"; - for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) { + for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 4) { StcVec vec = StcVec_with_capacity(BENCHMARK_SIZE); sum += benchmark(vec, BENCHMARK_SIZE, strsize), ++n; std::cout << '\t' << cstr_str(&vec.data[0]) << '\n'; StcVec_drop(&vec); } - std::cout << "Avg:\t" << sum/n << '\n'; + std::cout << "Avg:\t" << sum/n << "ms\n"; + + // INSERT: SORTED SET WITH STRINGS + + csrand(seed); + sum = 0, n = 0; + std::cerr << "\nstrsize\tmsecs\tinsert: robin_hood::unordered_flat_set<std::string>, size=" << BENCHMARK_SIZE/2 << "\n"; + for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 4) { + StdSet set; set.reserve(BENCHMARK_SIZE/2); + sum += benchmark(set, BENCHMARK_SIZE/2, strsize), ++n; + std::cout << '\t' << *set.begin() << '\n'; + } + std::cout << "Avg:\t" << sum/n << "ms\n"; - // SORTED SET WITH STRINGS csrand(seed); sum = 0, n = 0; - std::cerr << "\nstrsize\tmsecs\tstd::set<std::string>, size=" << BENCHMARK_SIZE/16 << "\n"; + std::cerr << "\nstrsize\tmsecs\tinsert: cset<cstr>, size=" << BENCHMARK_SIZE/2 << "\n"; + for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 4) { + StcSet set = StcSet_with_capacity(BENCHMARK_SIZE/2); + sum += benchmark(set, BENCHMARK_SIZE/2, strsize), ++n; + std::cout << '\t' << cstr_str(StcSet_begin(&set).ref) << '\n'; + StcSet_drop(&set); + } + std::cout << "Avg:\t" << sum/n << "ms\n"; + + // LOOKUP: SORTED SET WITH STRINGS + + csrand(seed); + sum = 0, n = 0; + std::cerr << "\nstrsize\tmsecs\tfind: robin_hood::unordered_flat_set<std::string>, size=" << BENCHMARK_SIZE/2 << "\n"; for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) { - StdSet set; - sum += benchmark(set, BENCHMARK_SIZE/16, strsize), ++n; + StdSet set; set.reserve(BENCHMARK_SIZE/2); + sum += benchmark_lookup(set, BENCHMARK_SIZE/2, strsize), ++n; std::cout << '\t' << *set.begin() << '\n'; } - std::cout << "Avg:\t" << sum/n << '\n'; + std::cout << "Avg:\t" << sum/n << "ms\n"; csrand(seed); sum = 0, n = 0; - std::cerr << "\nstrsize\tmsecs\tcsset<cstr>, size=" << BENCHMARK_SIZE/16 << "\n"; + std::cerr << "\nstrsize\tmsecs\tfind: cset<cstr>, size=" << BENCHMARK_SIZE/2 << "\n"; for (int strsize = 1; strsize <= MAX_STRING_SIZE; strsize += 2) { - StcSet set = StcSet_with_capacity(BENCHMARK_SIZE/16); - sum += benchmark(set, BENCHMARK_SIZE/16, strsize), ++n; - std::cout << '\t' << cstr_str(StcSet_front(&set)) << '\n'; + StcSet set = StcSet_with_capacity(BENCHMARK_SIZE/2); + sum += benchmark_lookup(set, BENCHMARK_SIZE/2, strsize), ++n; + std::cout << '\t' << cstr_str(StcSet_begin(&set).ref) << '\n'; StcSet_drop(&set); } - std::cout << "Avg:\t" << sum/n << '\n'; + std::cout << "Avg:\t" << sum/n << "ms\n"; + std::cerr << "sizeof(std::string) : " << sizeof(std::string) << std::endl - << "sizeof(cstr) : " << sizeof(cstr) << std::endl; + << "sizeof(cstr) : " << sizeof(cstr) << std::endl + << "sizeof(StdSet) : " << sizeof(StdSet) << std::endl + << "sizeof(StcSet) : " << sizeof(StcSet) << std::endl; return 0; } diff --git a/misc/benchmarks/various/string_bench_STC.cpp b/misc/benchmarks/various/string_bench_STC.cpp index ae8e4c38..a5dfd901 100644 --- a/misc/benchmarks/various/string_bench_STC.cpp +++ b/misc/benchmarks/various/string_bench_STC.cpp @@ -4,10 +4,11 @@ #include <iostream> #include <iomanip> #include <chrono> -#define i_static +#define i_implement #include <stc/cstr.h> // string -#define i_static +#define i_implement #include <stc/csview.h> // string_view +#include <stc/algo/raii.h> #define i_key_str #include <stc/cvec.h> // vec of cstr with const char* lookup @@ -183,7 +184,7 @@ void benchmark( //const size_t MAX_LOOP = 1000000; const size_t MAX_LOOP = 2000; -int main() +int main(void) { c_auto (cvec_str, vec_string) c_auto (cvec_sv, vec_stringview) diff --git a/misc/benchmarks/various/string_bench_STD.cpp b/misc/benchmarks/various/string_bench_STD.cpp index 8bb87937..153ac02f 100644 --- a/misc/benchmarks/various/string_bench_STD.cpp +++ b/misc/benchmarks/various/string_bench_STD.cpp @@ -12,6 +12,7 @@ #include <unordered_map> #define i_static #include <stc/cstr.h> +#include <stc/algo/raii.h> std::vector<std::string> read_file(const char* name) { @@ -193,7 +194,7 @@ void benchmark( //const size_t MAX_LOOP = 1000000; const size_t MAX_LOOP = 2000; -int main() +int main(void) { std::vector<std::string> vec_shortstr; std::vector<std::string_view> vec_shortstrview; |
